In [3]:
import numpy as np
import pandas as pd

## Series

In [4]:
myDict = {"musa": 50, "zeyn": 40, "lila": 30}

In [5]:
pd.Series(myDict)

musa    50
zeyn    40
lila    30
dtype: int64

In [6]:
ages = np.array([50, 40, 30]) # Also numpy arrays can be used
names = ["musa", "zeyn", "lila"]

In [7]:
pd.Series(ages,names)

musa    50
zeyn    40
lila    30
dtype: int32

In [8]:
pd.Series(["a", "b", "c"])

0    a
1    b
2    c
dtype: object

In [9]:
s1 = pd.Series([10, 20, 60, 40], ["a","b","c","f"])
s2 = pd.Series( [10, 5, 27, 40],["a","b","d","g"])

In [10]:
s1 + s2

a    20.0
b    25.0
c     NaN
d     NaN
f     NaN
g     NaN
dtype: float64

## DataFrame

In [11]:
data = np.random.randn(4,3)

In [12]:
pd.DataFrame(data)

Unnamed: 0,0,1,2
0,-0.425287,1.921662,-0.650971
1,1.70215,-0.014867,1.192356
2,-1.164646,1.397096,-1.015675
3,1.599313,-0.483346,0.240418


In [13]:
dataFrame = pd.DataFrame(data, index=["a", "b", "c", "d"], columns=["e", "f", "g"])

In [14]:
dataFrame

Unnamed: 0,e,f,g
a,-0.425287,1.921662,-0.650971
b,1.70215,-0.014867,1.192356
c,-1.164646,1.397096,-1.015675
d,1.599313,-0.483346,0.240418


In [15]:
dataFrame["e"] # Gets the column "e"

a   -0.425287
b    1.702150
c   -1.164646
d    1.599313
Name: e, dtype: float64

In [16]:
dataFrame[["f", "e"]] # Gets columns "e" and "f" in given order

Unnamed: 0,f,e
a,1.921662,-0.425287
b,-0.014867,1.70215
c,1.397096,-1.164646
d,-0.483346,1.599313


In [17]:
dataFrame.loc["a"] # Gets the rpw "a"

e   -0.425287
f    1.921662
g   -0.650971
Name: a, dtype: float64

In [18]:
dataFrame.iloc[0] # Gets the row with index

e   -0.425287
f    1.921662
g   -0.650971
Name: a, dtype: float64

In [19]:
dataFrame["new col"] = np.random.randint(0,100,4)

In [20]:
dataFrame

Unnamed: 0,e,f,g,new col
a,-0.425287,1.921662,-0.650971,90
b,1.70215,-0.014867,1.192356,84
c,-1.164646,1.397096,-1.015675,56
d,1.599313,-0.483346,0.240418,33


In [21]:
dataFrame.drop("new col", axis=1) # Gives the data frame except given axis lines

Unnamed: 0,e,f,g
a,-0.425287,1.921662,-0.650971
b,1.70215,-0.014867,1.192356
c,-1.164646,1.397096,-1.015675
d,1.599313,-0.483346,0.240418


In [22]:
dataFrame.drop(["e", "f"], axis=1) # Also works with more than one line

Unnamed: 0,g,new col
a,-0.650971,90
b,1.192356,84
c,-1.015675,56
d,0.240418,33


In [23]:
dataFrame.drop("new col", axis=1, inplace=True)

In [24]:
dataFrame

Unnamed: 0,e,f,g
a,-0.425287,1.921662,-0.650971
b,1.70215,-0.014867,1.192356
c,-1.164646,1.397096,-1.015675
d,1.599313,-0.483346,0.240418


In [25]:
dataFrame.loc["a"]["e"]

-0.42528725737833367

In [26]:
dataFrame.loc["a","e"]

-0.42528725737833367

In [27]:
booleanFrame = dataFrame < 0

In [28]:
booleanFrame

Unnamed: 0,e,f,g
a,True,False,True
b,False,True,False
c,True,False,True
d,False,True,False


In [29]:
dataFrame[booleanFrame]

Unnamed: 0,e,f,g
a,-0.425287,,-0.650971
b,,-0.014867,
c,-1.164646,,-1.015675
d,,-0.483346,


In [30]:
dataFrame[dataFrame > 0]

Unnamed: 0,e,f,g
a,,1.921662,
b,1.70215,,1.192356
c,,1.397096,
d,1.599313,,0.240418


In [31]:
dataFrame[dataFrame["f"] > 0]

Unnamed: 0,e,f,g
a,-0.425287,1.921662,-0.650971
c,-1.164646,1.397096,-1.015675


In [32]:
dataFrame.reset_index() # resets indexes

Unnamed: 0,index,e,f,g
0,a,-0.425287,1.921662,-0.650971
1,b,1.70215,-0.014867,1.192356
2,c,-1.164646,1.397096,-1.015675
3,d,1.599313,-0.483346,0.240418


In [33]:
 newIndexList = ["Musa", "Ahmet", "Mehmet", "Leyla"]

In [34]:
dataFrame["New Index"] = newIndexList

In [35]:
dataFrame.set_index("New Index", inplace=True)

In [36]:
dataFrame

Unnamed: 0_level_0,e,f,g
New Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Musa,-0.425287,1.921662,-0.650971
Ahmet,1.70215,-0.014867,1.192356
Mehmet,-1.164646,1.397096,-1.015675
Leyla,1.599313,-0.483346,0.240418


## Multi Index

In [37]:
firstInd = ["Simpson"] * 3 + ["South Park"] * 3

In [38]:
innerInd = ["Homer", "Bart", "Marge", "Cartmen", "Kenny", "Kyle"]

In [39]:
unitedInd = list(zip(firstInd, innerInd))

In [40]:
unitedInd = pd.MultiIndex.from_tuples(unitedInd)

In [41]:
unitedInd 

MultiIndex([(   'Simpson',   'Homer'),
            (   'Simpson',    'Bart'),
            (   'Simpson',   'Marge'),
            ('South Park', 'Cartmen'),
            ('South Park',   'Kenny'),
            ('South Park',    'Kyle')],
           )

In [42]:
cartoonList = np.array([[40, "A"], [10, "B"], [30, "C"], [9, "D"], [10, "E"], [11, "F"]])

In [43]:
cartoonDataFrame = pd.DataFrame(cartoonList, index=unitedInd,  columns=["Age", "Job"])

In [44]:
cartoonDataFrame

Unnamed: 0,Unnamed: 1,Age,Job
Simpson,Homer,40,A
Simpson,Bart,10,B
Simpson,Marge,30,C
South Park,Cartmen,9,D
South Park,Kenny,10,E
South Park,Kyle,11,F


In [45]:
cartoonDataFrame.loc["Simpson"]

Unnamed: 0,Age,Job
Homer,40,A
Bart,10,B
Marge,30,C


In [46]:
cartoonDataFrame.loc["South Park"].loc["Kenny"]

Age    10
Job     E
Name: Kenny, dtype: object

In [47]:
cartoonDataFrame.loc["South Park", "Kenny"]

Age    10
Job     E
Name: (South Park, Kenny), dtype: object

In [48]:
cartoonDataFrame.index.names = ["Cartoon", "Name"]

In [49]:
cartoonDataFrame

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Job
Cartoon,Name,Unnamed: 2_level_1,Unnamed: 3_level_1
Simpson,Homer,40,A
Simpson,Bart,10,B
Simpson,Marge,30,C
South Park,Cartmen,9,D
South Park,Kenny,10,E
South Park,Kyle,11,F


In [50]:
weatherDict = {
    "Istanbul": [30, 29, np.nan],
    "Ankara": [20, np.nan, 25],
    "Izmir": [40, 39, 38],
    "Antalya": [45, np.nan, np.nan]
}
weatherDF = pd.DataFrame(weatherDict)

In [51]:
weatherDF

Unnamed: 0,Istanbul,Ankara,Izmir,Antalya
0,30.0,20.0,40,45.0
1,29.0,,39,
2,,25.0,38,


In [52]:
weatherDF.dropna()

Unnamed: 0,Istanbul,Ankara,Izmir,Antalya
0,30.0,20.0,40,45.0


In [53]:
weatherDF.dropna(axis = 1)

Unnamed: 0,Izmir
0,40
1,39
2,38


In [54]:
weatherDF.dropna(axis=1, thresh=2)

Unnamed: 0,Istanbul,Ankara,Izmir
0,30.0,20.0,40
1,29.0,,39
2,,25.0,38


In [55]:
weatherDF.fillna(20)

Unnamed: 0,Istanbul,Ankara,Izmir,Antalya
0,30.0,20.0,40,45.0
1,29.0,20.0,39,20.0
2,20.0,25.0,38,20.0


In [56]:
incomeDict = {
    "Department": ["Software", "Software", "Law", "Marketing", "Law", "Marketing"],
    "Name": ["Ahmet", "Mehmet", "Atil", "Burak", "Zeynep", "Fatma"],
    "Income": [100,1500, 200, 300 ,400, 500]
}

In [57]:
incomeDF = pd.DataFrame(incomeDict)

In [58]:
incomeDF

Unnamed: 0,Department,Name,Income
0,Software,Ahmet,100
1,Software,Mehmet,1500
2,Law,Atil,200
3,Marketing,Burak,300
4,Law,Zeynep,400
5,Marketing,Fatma,500


In [59]:
groupObj = incomeDF.groupby("Department")

In [60]:
groupObj.count()

Unnamed: 0_level_0,Name,Income
Department,Unnamed: 1_level_1,Unnamed: 2_level_1
Law,2,2
Marketing,2,2
Software,2,2


In [61]:
groupObj.describe()

Unnamed: 0_level_0,Income,Income,Income,Income,Income,Income,Income,Income
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Department,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Law,2.0,300.0,141.421356,200.0,250.0,300.0,350.0,400.0
Marketing,2.0,400.0,141.421356,300.0,350.0,400.0,450.0,500.0
Software,2.0,800.0,989.949494,100.0,450.0,800.0,1150.0,1500.0


In [62]:
dict1 = {
    "Name": ["Ahmet", "Mehmet", "Zeynep", "Atıl"],
    "Sport": ["Running", "Basketball", "Swimming", "Running"],
    "Cals": [200, 300 , 150, 200]
}

In [65]:
dF1 = pd.DataFrame(dict1, index=[0,1,2,3])

In [72]:
dict2 = {
    "Name": ["Osman", "Levent", "Atlas", "Fatma"],
    "Sport": ["Running", "Basketball", "Swimming", "Running"],
    "Cals": [123, 500 , 321, 300]
}

In [73]:
dF2 = pd.DataFrame(dict2, index=[4,5,6,7])

In [74]:
dict3 = {
    "Name": ["Hasan", "Cansel", "Musa", "Alev"],
    "Sport": ["Running", "Basketball", "Badminton", "Tennis"],
    "Cals": [123, 500 , 321, 300]
}

In [75]:
dF3 = pd.DataFrame(dict3, index=[8,9,10,11])

In [79]:
pd.concat([dF1, dF2, dF3])

Unnamed: 0,Name,Sport,Cals
0,Ahmet,Running,200
1,Mehmet,Basketball,300
2,Zeynep,Swimming,150
3,Atıl,Running,200
4,Osman,Running,123
5,Levent,Basketball,500
6,Atlas,Swimming,321
7,Fatma,Running,300
8,Hasan,Running,123
9,Cansel,Basketball,500


In [82]:
dict1 = {
    "Name": ["Ahmet", "Mehmet", "Zeynep", "Atıl"],
    "Sport": ["Running", "Basketball", "Swimming", "Running"],
}

In [83]:
dict2 = {
    "Name": ["Ahmet", "Mehmet", "Zeynep", "Atıl"],
    "Cals": [200, 300 , 150, 200]
}

In [84]:
dF1 = pd.DataFrame(dict1)

In [85]:
dF2 = pd.DataFrame(dict2)

In [87]:
pd.merge(dF1, dF2, on="Name")

Unnamed: 0,Name,Sport,Cals
0,Ahmet,Running,200
1,Mehmet,Basketball,300
2,Zeynep,Swimming,150
3,Atıl,Running,200


In [88]:
dict1 = {
    "Name": ["Ahmet", "Mehmet", "Zeynep", "Atıl"],
    "Sport": ["Running", "Basketball", "Swimming", "Running"],
    "Cals": [200, 300 , 150, 200]
}

In [92]:
sampleDataFrame = pd.DataFrame(dict1)

In [93]:
sampleDataFrame["Sport"].unique()

array(['Running', 'Basketball', 'Swimming'], dtype=object)

In [95]:
sampleDataFrame["Sport"].nunique()

3

In [97]:
sampleDataFrame["Sport"].value_counts()

Sport
Running       2
Basketball    1
Swimming      1
Name: count, dtype: int64

In [101]:
def increaseCals(cal):
    return cal * 1.77

In [102]:
sampleDataFrame["Cals"].apply(increaseCals)

0    354.0
1    531.0
2    265.5
3    354.0
Name: Cals, dtype: float64

In [103]:
sampleDataFrame.isnull()

Unnamed: 0,Name,Sport,Cals
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
