## Introduction Pandas

In [1]:
import numpy as np
import pandas as pd

## Series

In [2]:
myDict = {"Andre": 50, "Jessica": 30, "John": 40}

In [3]:
pd.Series(myDict)

Andre      50
Jessica    30
John       40
dtype: int64

In [4]:
type(pd.Series(myDict))

pandas.core.series.Series

In [5]:
theirAges = [50, 30, 40]
theirNames = ["Andre", "Jessica", "John"]

In [6]:
pd.Series(theirAges)

0    50
1    30
2    40
dtype: int64

In [7]:
pd.Series(theirAges, theirNames)

Andre      50
Jessica    30
John       40
dtype: int64

In [8]:
pd.Series(data = theirAges, index = theirNames)

Andre      50
Jessica    30
John       40
dtype: int64

In [9]:
numpyArray = np.array([50, 30, 40])

In [10]:
numpyArray

array([50, 30, 40])

In [11]:
pd.Series(numpyArray)

0    50
1    30
2    40
dtype: int32

In [12]:
pd.Series(numpyArray, theirNames)

Andre      50
Jessica    30
John       40
dtype: int32

In [13]:
pd.Series(["Art", "Amar", "Tech"], [1, 2, 3])

1     Art
2    Amar
3    Tech
dtype: object

In [14]:
ResultofTournament = pd.Series([8, 3, 2], ["Art", "Amar", "Tech"])

In [15]:
ResultofTournament

Art     8
Amar    3
Tech    2
dtype: int64

In [16]:
rResultofTournament = pd.Series([7, 4, 1], ["Art", "Amar", "Tech"])

In [17]:
rResultofTournament

Art     7
Amar    4
Tech    1
dtype: int64

In [18]:
rResultofTournament["Art"]

7

In [19]:
lastResult = ResultofTournament + rResultofTournament

In [20]:
lastResult

Art     15
Amar     7
Tech     3
dtype: int64

In [21]:
diffSeries = pd.Series([1, 3, 5, 7], ["a", "b", "c", "d"])

In [22]:
diff2Series = pd.Series([2, 4, 6, 8], ["a", "b", "c", "f"])

In [23]:
oLastResult = diffSeries + diff2Series

In [24]:
oLastResult

a     3.0
b     7.0
c    11.0
d     NaN
f     NaN
dtype: float64

## Data Frame

In [25]:
data = np.random.randn(4,3)

In [26]:
data

array([[ 0.1074913 , -1.8857336 ,  0.46713219],
       [-1.88482237,  0.92535134, -0.91338556],
       [-0.94115473, -1.85948181, -0.06105061],
       [-0.62119573,  0.36146174,  1.15744584]])

In [27]:
dataFrame = pd.DataFrame(data)

In [28]:
dataFrame

Unnamed: 0,0,1,2
0,0.107491,-1.885734,0.467132
1,-1.884822,0.925351,-0.913386
2,-0.941155,-1.859482,-0.061051
3,-0.621196,0.361462,1.157446


In [29]:
dataFrame[1]

0   -1.885734
1    0.925351
2   -1.859482
3    0.361462
Name: 1, dtype: float64

In [30]:
type(dataFrame[2])

pandas.core.series.Series

In [31]:
newDataFrame = pd.DataFrame(data, index = ["Art", "Amar", "Tech", "artamartech"], columns = ["Price", "Age", "Profession"])

In [32]:
newDataFrame

Unnamed: 0,Price,Age,Profession
Art,0.107491,-1.885734,0.467132
Amar,-1.884822,0.925351,-0.913386
Tech,-0.941155,-1.859482,-0.061051
artamartech,-0.621196,0.361462,1.157446


In [33]:
newDataFrame["Age"]

Art           -1.885734
Amar           0.925351
Tech          -1.859482
artamartech    0.361462
Name: Age, dtype: float64

In [34]:
newDataFrame[["Price", "Age"]]

Unnamed: 0,Price,Age
Art,0.107491,-1.885734
Amar,-1.884822,0.925351
Tech,-0.941155,-1.859482
artamartech,-0.621196,0.361462


In [35]:
newDataFrame.loc["Tech"]

Price        -0.941155
Age          -1.859482
Profession   -0.061051
Name: Tech, dtype: float64

In [36]:
newDataFrame.iloc[0]

Price         0.107491
Age          -1.885734
Profession    0.467132
Name: Art, dtype: float64

In [37]:
newDataFrame

Unnamed: 0,Price,Age,Profession
Art,0.107491,-1.885734,0.467132
Amar,-1.884822,0.925351,-0.913386
Tech,-0.941155,-1.859482,-0.061051
artamartech,-0.621196,0.361462,1.157446


In [38]:
newDataFrame["Price"]

Art            0.107491
Amar          -1.884822
Tech          -0.941155
artamartech   -0.621196
Name: Price, dtype: float64

In [39]:
newDataFrame["Retired Age"] = newDataFrame["Age"] + newDataFrame["Age"]

In [40]:
newDataFrame

Unnamed: 0,Price,Age,Profession,Retired Age
Art,0.107491,-1.885734,0.467132,-3.771467
Amar,-1.884822,0.925351,-0.913386,1.850703
Tech,-0.941155,-1.859482,-0.061051,-3.718964
artamartech,-0.621196,0.361462,1.157446,0.722923


In [41]:
newDataFrame.drop("Retired Age", axis = 1)

Unnamed: 0,Price,Age,Profession
Art,0.107491,-1.885734,0.467132
Amar,-1.884822,0.925351,-0.913386
Tech,-0.941155,-1.859482,-0.061051
artamartech,-0.621196,0.361462,1.157446


In [42]:
newDataFrame.drop("artamartech", axis = 0)

Unnamed: 0,Price,Age,Profession,Retired Age
Art,0.107491,-1.885734,0.467132,-3.771467
Amar,-1.884822,0.925351,-0.913386,1.850703
Tech,-0.941155,-1.859482,-0.061051,-3.718964


In [43]:
newDataFrame

Unnamed: 0,Price,Age,Profession,Retired Age
Art,0.107491,-1.885734,0.467132,-3.771467
Amar,-1.884822,0.925351,-0.913386,1.850703
Tech,-0.941155,-1.859482,-0.061051,-3.718964
artamartech,-0.621196,0.361462,1.157446,0.722923


In [44]:
newDataFrame.drop("Retired Age", axis = 1, inplace = True)

In [45]:
newDataFrame

Unnamed: 0,Price,Age,Profession
Art,0.107491,-1.885734,0.467132
Amar,-1.884822,0.925351,-0.913386
Tech,-0.941155,-1.859482,-0.061051
artamartech,-0.621196,0.361462,1.157446


In [46]:
newDataFrame.loc["Art"]

Price         0.107491
Age          -1.885734
Profession    0.467132
Name: Art, dtype: float64

In [47]:
newDataFrame.loc["Art"]["Age"]

-1.8857335977974878

In [48]:
newDataFrame.loc["Art", "Age"]

-1.8857335977974878

In [49]:
newDataFrame < 0

Unnamed: 0,Price,Age,Profession
Art,False,True,False
Amar,True,False,True
Tech,True,True,True
artamartech,True,False,False


In [50]:
newDataFrame > 0.7

Unnamed: 0,Price,Age,Profession
Art,False,False,False
Amar,False,True,False
Tech,False,False,False
artamartech,False,False,True


In [51]:
booleanFrame = newDataFrame > 0.5

In [52]:
newDataFrame[booleanFrame]

Unnamed: 0,Price,Age,Profession
Art,,,
Amar,,0.925351,
Tech,,,
artamartech,,,1.157446


In [53]:
newDataFrame["Age"]

Art           -1.885734
Amar           0.925351
Tech          -1.859482
artamartech    0.361462
Name: Age, dtype: float64

In [54]:
newDataFrame[newDataFrame["Age"] > 0.7]

Unnamed: 0,Price,Age,Profession
Amar,-1.884822,0.925351,-0.913386


In [55]:
newDataFrame

Unnamed: 0,Price,Age,Profession
Art,0.107491,-1.885734,0.467132
Amar,-1.884822,0.925351,-0.913386
Tech,-0.941155,-1.859482,-0.061051
artamartech,-0.621196,0.361462,1.157446


In [56]:
newDataFrame.reset_index()

Unnamed: 0,index,Price,Age,Profession
0,Art,0.107491,-1.885734,0.467132
1,Amar,-1.884822,0.925351,-0.913386
2,Tech,-0.941155,-1.859482,-0.061051
3,artamartech,-0.621196,0.361462,1.157446


In [57]:
newDataFrame

Unnamed: 0,Price,Age,Profession
Art,0.107491,-1.885734,0.467132
Amar,-1.884822,0.925351,-0.913386
Tech,-0.941155,-1.859482,-0.061051
artamartech,-0.621196,0.361462,1.157446


In [58]:
newIndexList = ["tar", " rama", "hcet", "hcetramatar"]

In [59]:
newDataFrame["New Index"] = newIndexList

In [60]:
newDataFrame

Unnamed: 0,Price,Age,Profession,New Index
Art,0.107491,-1.885734,0.467132,tar
Amar,-1.884822,0.925351,-0.913386,rama
Tech,-0.941155,-1.859482,-0.061051,hcet
artamartech,-0.621196,0.361462,1.157446,hcetramatar


In [61]:
newDataFrame.set_index("New Index")

Unnamed: 0_level_0,Price,Age,Profession
New Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
tar,0.107491,-1.885734,0.467132
rama,-1.884822,0.925351,-0.913386
hcet,-0.941155,-1.859482,-0.061051
hcetramatar,-0.621196,0.361462,1.157446


In [62]:
newDataFrame

Unnamed: 0,Price,Age,Profession,New Index
Art,0.107491,-1.885734,0.467132,tar
Amar,-1.884822,0.925351,-0.913386,rama
Tech,-0.941155,-1.859482,-0.061051,hcet
artamartech,-0.621196,0.361462,1.157446,hcetramatar


In [63]:
newDataFrame.set_index("New Index", inplace = True)

In [64]:
newDataFrame

Unnamed: 0_level_0,Price,Age,Profession
New Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
tar,0.107491,-1.885734,0.467132
rama,-1.884822,0.925351,-0.913386
hcet,-0.941155,-1.859482,-0.061051
hcetramatar,-0.621196,0.361462,1.157446


In [65]:
newDataFrame.loc["tar"]

Price         0.107491
Age          -1.885734
Profession    0.467132
Name: tar, dtype: float64

### Multi Index

In [66]:
firstIndex = ["Simpson", "Simpson", "Simpson", "South Park", "South Park", "South Park"]

In [67]:
subIndex = ["Homer", "Bart", "Marge", "Cartman", "Kenny", "Kyle"]

In [68]:
mergeIndex = list(zip(firstIndex, subIndex))

In [69]:
mergeIndex

[('Simpson', 'Homer'),
 ('Simpson', 'Bart'),
 ('Simpson', 'Marge'),
 ('South Park', 'Cartman'),
 ('South Park', 'Kenny'),
 ('South Park', 'Kyle')]

In [70]:
mergeIndex = pd.MultiIndex.from_tuples(mergeIndex)

In [71]:
mergeIndex

MultiIndex([(   'Simpson',   'Homer'),
            (   'Simpson',    'Bart'),
            (   'Simpson',   'Marge'),
            ('South Park', 'Cartman'),
            ('South Park',   'Kenny'),
            ('South Park',    'Kyle')],
           )

In [72]:
type(mergeIndex)

pandas.core.indexes.multi.MultiIndex

In [73]:
myCartoonList = [[40, "A"], [30, "B"], [10, "C"], [5, "D"], [7, "E"], [50, "F"]]

In [74]:
cartoonNumpyArray = np.array(myCartoonList)

In [75]:
cartoonDataFrame = pd.DataFrame(cartoonNumpyArray, index = mergeIndex, columns = ["Age", "Profession"])

In [76]:
cartoonDataFrame

Unnamed: 0,Unnamed: 1,Age,Profession
Simpson,Homer,40,A
Simpson,Bart,30,B
Simpson,Marge,10,C
South Park,Cartman,5,D
South Park,Kenny,7,E
South Park,Kyle,50,F


In [77]:
cartoonDataFrame.loc["Simpson"]

Unnamed: 0,Age,Profession
Homer,40,A
Bart,30,B
Marge,10,C


In [78]:
cartoonDataFrame.loc["South Park"]

Unnamed: 0,Age,Profession
Cartman,5,D
Kenny,7,E
Kyle,50,F


In [79]:
cartoonDataFrame.loc["South Park", "Kenny"]

Age           7
Profession    E
Name: (South Park, Kenny), dtype: object

In [80]:
cartoonDataFrame.loc["South Park"].loc["Kenny"]

Age           7
Profession    E
Name: Kenny, dtype: object

In [81]:
cartoonDataFrame.index.names = ["Film Adı", "İsim"]

In [82]:
cartoonDataFrame

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Profession
Film Adı,İsim,Unnamed: 2_level_1,Unnamed: 3_level_1
Simpson,Homer,40,A
Simpson,Bart,30,B
Simpson,Marge,10,C
South Park,Cartman,5,D
South Park,Kenny,7,E
South Park,Kyle,50,F
