# Pandas Serileri

In [1]:
import numpy as np 
import pandas as pd

In [2]:
labels_list= ["Mustafa","Kemal","Murat","Kadir","Zeynep"]

In [3]:
data_list = [10,20,30,40,50]

In [4]:
pd.Series(data_list,labels_list)

Mustafa    10
Kemal      20
Murat      30
Kadir      40
Zeynep     50
dtype: int64

In [5]:
pd.Series(data_list)

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [6]:
pd.Series(labels_list)

0    Mustafa
1      Kemal
2      Murat
3      Kadir
4     Zeynep
dtype: object

In [7]:
npArray = np.array([10,20,30,40,50])
pd.Series(npArray)

0    10
1    20
2    30
3    40
4    50
dtype: int32

In [8]:
pd.Series(data=npArray,index=["A","B","C","D","E"])

A    10
B    20
C    30
D    40
E    50
dtype: int32

In [9]:
dataDict = {"Kadir":30,"Kemal":80,"Kamuran":60}
pd.Series(dataDict)

Kadir      30
Kemal      80
Kamuran    60
dtype: int64

In [10]:
ser2017 = pd.Series(data=[5,10,14,20],index=["Buğday","Mısır","Kiraz","Erik"])
ser2017

Buğday     5
Mısır     10
Kiraz     14
Erik      20
dtype: int64

In [11]:
ser2018 = pd.Series(data=[2,12,12,6],index=["Buğday","Mısır","Çilek","Erik"])
ser2018

Buğday     2
Mısır     12
Çilek     12
Erik       6
dtype: int64

In [12]:
ser2017 + ser2018

Buğday     7.0
Erik      26.0
Kiraz      NaN
Mısır     22.0
Çilek      NaN
dtype: float64

In [13]:
total = ser2017 + ser2018
total

Buğday     7.0
Erik      26.0
Kiraz      NaN
Mısır     22.0
Çilek      NaN
dtype: float64

In [14]:
total["Erik"]

26.0

In [15]:
total["Kiraz"]

nan

In [16]:
total["Üzüm"]

KeyError: 'Üzüm'

# Dataframeler

In [19]:
from numpy.random import randn
randn(3,3)

array([[-0.73938944,  0.15590657, -0.94549578],
       [-0.37065193, -0.31932747,  0.91330006],
       [-1.61332359, -0.02836595,  1.17197225]])

In [20]:
df = pd.DataFrame(data = randn(3,3),index=["A","B","C"],columns=["Column1","Column2","Column3"])
df

Unnamed: 0,Column1,Column2,Column3
A,0.749775,0.535896,-0.83842
B,-1.152641,0.3218,-2.112081
C,-1.512467,1.174268,-0.180375


In [21]:
df["Column1"]

A    0.749775
B   -1.152641
C   -1.512467
Name: Column1, dtype: float64

In [22]:
type(df["Column2"])

pandas.core.series.Series

In [23]:
df.loc["A"]

Column1    0.749775
Column2    0.535896
Column3   -0.838420
Name: A, dtype: float64

In [24]:
df[["Column1","Column3"]]

Unnamed: 0,Column1,Column3
A,0.749775,-0.83842
B,-1.152641,-2.112081
C,-1.512467,-0.180375


In [25]:
df

Unnamed: 0,Column1,Column2,Column3
A,0.749775,0.535896,-0.83842
B,-1.152641,0.3218,-2.112081
C,-1.512467,1.174268,-0.180375


In [26]:
df["Column4"]= pd.Series(randn(3),["A","B","C"])
df

Unnamed: 0,Column1,Column2,Column3,Column4
A,0.749775,0.535896,-0.83842,-0.48622
B,-1.152641,0.3218,-2.112081,1.301699
C,-1.512467,1.174268,-0.180375,0.135693


In [27]:
df["Column5"] = df["Column1"]+df["Column2"]+df["Column3"]+df["Column4"]
df

Unnamed: 0,Column1,Column2,Column3,Column4,Column5
A,0.749775,0.535896,-0.83842,-0.48622,-0.038969
B,-1.152641,0.3218,-2.112081,1.301699,-1.641223
C,-1.512467,1.174268,-0.180375,0.135693,-0.38288


In [28]:
df.drop("Column5")

KeyError: "['Column5'] not found in axis"

In [29]:
df.drop("Column5",axis=1,inplace=True)

In [30]:
df

Unnamed: 0,Column1,Column2,Column3,Column4
A,0.749775,0.535896,-0.83842,-0.48622
B,-1.152641,0.3218,-2.112081,1.301699
C,-1.512467,1.174268,-0.180375,0.135693


In [31]:
df.loc["A"]

Column1    0.749775
Column2    0.535896
Column3   -0.838420
Column4   -0.486220
Name: A, dtype: float64

In [32]:
df.iloc[0]

Column1    0.749775
Column2    0.535896
Column3   -0.838420
Column4   -0.486220
Name: A, dtype: float64

In [33]:
df.loc["A","Column1"]

0.7497747870830132

In [34]:
df.loc[["A","B"],["Column1","Column2"]]

Unnamed: 0,Column1,Column2
A,0.749775,0.535896
B,-1.152641,0.3218


# dataframe filtreleme

In [35]:
df = pd.DataFrame(randn(4,3),["A","B","C","D"],["Column1","Column2","Column3"])
df

Unnamed: 0,Column1,Column2,Column3
A,-0.101573,-0.561846,-0.922459
B,-0.622159,1.666121,-1.752061
C,0.419237,1.053142,0.141858
D,1.100483,-1.532581,1.746415


In [36]:
df > -1

Unnamed: 0,Column1,Column2,Column3
A,True,True,True
B,True,True,False
C,True,True,True
D,True,False,True


In [37]:
df > 0

Unnamed: 0,Column1,Column2,Column3
A,False,False,False
B,False,True,False
C,True,True,True
D,True,False,True


In [38]:
booleanDf = df > 0
booleanDf

Unnamed: 0,Column1,Column2,Column3
A,False,False,False
B,False,True,False
C,True,True,True
D,True,False,True


In [39]:
df[booleanDf]

Unnamed: 0,Column1,Column2,Column3
A,,,
B,,1.666121,
C,0.419237,1.053142,0.141858
D,1.100483,,1.746415


In [40]:
df[df > 0]

Unnamed: 0,Column1,Column2,Column3
A,,,
B,,1.666121,
C,0.419237,1.053142,0.141858
D,1.100483,,1.746415


In [41]:
df[df > -2]

Unnamed: 0,Column1,Column2,Column3
A,-0.101573,-0.561846,-0.922459
B,-0.622159,1.666121,-1.752061
C,0.419237,1.053142,0.141858
D,1.100483,-1.532581,1.746415


In [42]:
df["Column1"]

A   -0.101573
B   -0.622159
C    0.419237
D    1.100483
Name: Column1, dtype: float64

In [43]:
df["Column1"] > 0

A    False
B    False
C     True
D     True
Name: Column1, dtype: bool

In [44]:
df[df["Column1"] > 0 ]

Unnamed: 0,Column1,Column2,Column3
C,0.419237,1.053142,0.141858
D,1.100483,-1.532581,1.746415


In [45]:
df[df["Column2"] > 0 ]

Unnamed: 0,Column1,Column2,Column3
B,-0.622159,1.666121,-1.752061
C,0.419237,1.053142,0.141858


In [46]:
df["Column2"]>0.5

A    False
B     True
C     True
D    False
Name: Column2, dtype: bool

In [47]:
df[df["Column2"] > 0.5]

Unnamed: 0,Column1,Column2,Column3
B,-0.622159,1.666121,-1.752061
C,0.419237,1.053142,0.141858


In [48]:
df[df["Column2"] > 0 ]

Unnamed: 0,Column1,Column2,Column3
B,-0.622159,1.666121,-1.752061
C,0.419237,1.053142,0.141858


In [49]:
df[df["Column1"] > 0 ]

Unnamed: 0,Column1,Column2,Column3
C,0.419237,1.053142,0.141858
D,1.100483,-1.532581,1.746415


In [50]:
df[(df["Column1"]>0)&(df["Column2"] > 0)]

Unnamed: 0,Column1,Column2,Column3
C,0.419237,1.053142,0.141858


In [51]:
df[(df["Column1"]>0) |(df["Column2"] > 0)]

Unnamed: 0,Column1,Column2,Column3
B,-0.622159,1.666121,-1.752061
C,0.419237,1.053142,0.141858
D,1.100483,-1.532581,1.746415


In [52]:
df

Unnamed: 0,Column1,Column2,Column3
A,-0.101573,-0.561846,-0.922459
B,-0.622159,1.666121,-1.752061
C,0.419237,1.053142,0.141858
D,1.100483,-1.532581,1.746415


In [53]:
df["Column4"] = pd.Series(randn(4),["A","B","C","D"])
df

Unnamed: 0,Column1,Column2,Column3,Column4
A,-0.101573,-0.561846,-0.922459,-0.002661
B,-0.622159,1.666121,-1.752061,1.456007
C,0.419237,1.053142,0.141858,0.465944
D,1.100483,-1.532581,1.746415,0.7718


In [54]:
df["Column5"] = randn(4)
df

Unnamed: 0,Column1,Column2,Column3,Column4,Column5
A,-0.101573,-0.561846,-0.922459,-0.002661,-0.377522
B,-0.622159,1.666121,-1.752061,1.456007,-1.068197
C,0.419237,1.053142,0.141858,0.465944,-0.429384
D,1.100483,-1.532581,1.746415,0.7718,0.288454


In [60]:
df["Column6"] = ["newValue1","newValue2","newValue3","newValue4"]
df

Unnamed: 0_level_0,Column1,Column2,Column3,Column4,Column5,Column6
Column6,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
newValue1,-0.101573,-0.561846,-0.922459,-0.002661,-0.377522,newValue1
newValue2,-0.622159,1.666121,-1.752061,1.456007,-1.068197,newValue2
newValue3,0.419237,1.053142,0.141858,0.465944,-0.429384,newValue3
newValue4,1.100483,-1.532581,1.746415,0.7718,0.288454,newValue4


In [61]:
df.set_index("Column6",inplace=True)

In [62]:
df

Unnamed: 0_level_0,Column1,Column2,Column3,Column4,Column5
Column6,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
newValue1,-0.101573,-0.561846,-0.922459,-0.002661,-0.377522
newValue2,-0.622159,1.666121,-1.752061,1.456007,-1.068197
newValue3,0.419237,1.053142,0.141858,0.465944,-0.429384
newValue4,1.100483,-1.532581,1.746415,0.7718,0.288454


In [63]:
df.index.names

FrozenList(['Column6'])

# dataframelere multiindex tanımlama

In [65]:
outterIndex = ["group1","group1","group1","group2","group2","group2","group3","group3","group3"]
innerIndex = ["Index1","Index2","Index3","Index1","Index2","Index3","Index1","Index2","Index3"]

In [67]:
list(zip(outterIndex,innerIndex))

[('group1', 'Index1'),
 ('group1', 'Index2'),
 ('group1', 'Index3'),
 ('group2', 'Index1'),
 ('group2', 'Index2'),
 ('group2', 'Index3'),
 ('group3', 'Index1'),
 ('group3', 'Index2'),
 ('group3', 'Index3')]

In [69]:
hierarchy = list(zip(outterIndex,innerIndex))
hierarchy = pd.MultiIndex.from_tuples(hierarchy)
hierarchy

MultiIndex([('group1', 'Index1'),
            ('group1', 'Index2'),
            ('group1', 'Index3'),
            ('group2', 'Index1'),
            ('group2', 'Index2'),
            ('group2', 'Index3'),
            ('group3', 'Index1'),
            ('group3', 'Index2'),
            ('group3', 'Index3')],
           )

In [71]:
fd = pd.DataFrame(randn(9,3),hierarchy,columns=["column1","column2","column3"])
fd

Unnamed: 0,Unnamed: 1,column1,column2,column3
group1,Index1,-0.034478,0.733255,-0.154031
group1,Index2,1.312298,0.414496,-1.562907
group1,Index3,-1.262568,0.208238,-1.407849
group2,Index1,-1.115216,1.494019,-0.12565
group2,Index2,-0.20046,0.683305,0.797636
group2,Index3,0.526423,-1.233072,1.73351
group3,Index1,-0.788774,-0.871677,1.638033
group3,Index2,-0.716768,2.504527,-0.135183
group3,Index3,-1.061658,-0.148186,-0.621397


In [73]:
fd["column1"]

group1  Index1   -0.034478
        Index2    1.312298
        Index3   -1.262568
group2  Index1   -1.115216
        Index2   -0.200460
        Index3    0.526423
group3  Index1   -0.788774
        Index2   -0.716768
        Index3   -1.061658
Name: column1, dtype: float64

In [75]:
fd.loc["group1"]

Unnamed: 0,column1,column2,column3
Index1,-0.034478,0.733255,-0.154031
Index2,1.312298,0.414496,-1.562907
Index3,-1.262568,0.208238,-1.407849


In [76]:
fd.loc["group2"]

Unnamed: 0,column1,column2,column3
Index1,-1.115216,1.494019,-0.12565
Index2,-0.20046,0.683305,0.797636
Index3,0.526423,-1.233072,1.73351


In [77]:
fd.loc[["group1","group2"]]

Unnamed: 0,Unnamed: 1,column1,column2,column3
group1,Index1,-0.034478,0.733255,-0.154031
group1,Index2,1.312298,0.414496,-1.562907
group1,Index3,-1.262568,0.208238,-1.407849
group2,Index1,-1.115216,1.494019,-0.12565
group2,Index2,-0.20046,0.683305,0.797636
group2,Index3,0.526423,-1.233072,1.73351


In [80]:
fd.loc["group1"].loc["Index1"]["column1"]

-0.03447845447649411

In [82]:
fd.index.names

FrozenList([None, None])

In [86]:
fd.index.names = ["groups","indexes"]
fd

Unnamed: 0_level_0,Unnamed: 1_level_0,column1,column2,column3
groups,indexes,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
group1,Index1,-0.034478,0.733255,-0.154031
group1,Index2,1.312298,0.414496,-1.562907
group1,Index3,-1.262568,0.208238,-1.407849
group2,Index1,-1.115216,1.494019,-0.12565
group2,Index2,-0.20046,0.683305,0.797636
group2,Index3,0.526423,-1.233072,1.73351
group3,Index1,-0.788774,-0.871677,1.638033
group3,Index2,-0.716768,2.504527,-0.135183
group3,Index3,-1.061658,-0.148186,-0.621397


In [87]:
fd.xs("group1")

Unnamed: 0_level_0,column1,column2,column3
indexes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Index1,-0.034478,0.733255,-0.154031
Index2,1.312298,0.414496,-1.562907
Index3,-1.262568,0.208238,-1.407849


In [88]:
fd.xs("group2").xs("Index1").xs("column2")

1.4940185637405063

In [89]:
fd.xs("Index1",level="indexes")

Unnamed: 0_level_0,column1,column2,column3
groups,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
group1,-0.034478,0.733255,-0.154031
group2,-1.115216,1.494019,-0.12565
group3,-0.788774,-0.871677,1.638033
