In [15]:
import pandas as pd
import numpy as np
from numpy.random import randn

In [16]:
df = pd.DataFrame(randn(4,4), ['A','B','C','D'], ['W','X','Y','Z'])

In [17]:
df

Unnamed: 0,W,X,Y,Z
A,0.705804,1.737935,-0.389471,-0.962889
B,0.74082,0.611294,-1.063601,-0.462507
C,2.012329,1.06684,-1.794844,-2.2786
D,0.592999,-0.165652,-1.132064,1.324295


In [18]:
#Tek bir sütun seçilmek istendiğinde
df['X'] 

A    1.737935
B    0.611294
C    1.066840
D   -0.165652
Name: X, dtype: float64

In [19]:
#Birden fazla sütun seçilmek istendiğinde
df[['W','Z']]

Unnamed: 0,W,Z
A,0.705804,-0.962889
B,0.74082,-0.462507
C,2.012329,-2.2786
D,0.592999,1.324295


In [20]:
#Veri çerçevesine yeni bir sütun ekleme
df['new'] = df['W'] + df['X']

In [21]:
df

Unnamed: 0,W,X,Y,Z,new
A,0.705804,1.737935,-0.389471,-0.962889,2.443739
B,0.74082,0.611294,-1.063601,-0.462507,1.352114
C,2.012329,1.06684,-1.794844,-2.2786,3.079169
D,0.592999,-0.165652,-1.132064,1.324295,0.427347


In [22]:
#Veri çerçevesinden bir sütun silme
df.drop('new', axis=1, inplace=True)

In [23]:
df

Unnamed: 0,W,X,Y,Z
A,0.705804,1.737935,-0.389471,-0.962889
B,0.74082,0.611294,-1.063601,-0.462507
C,2.012329,1.06684,-1.794844,-2.2786
D,0.592999,-0.165652,-1.132064,1.324295


In [24]:
#Veri çerçevesinden bir satır silmek için
df.drop('D', axis=0)

Unnamed: 0,W,X,Y,Z
A,0.705804,1.737935,-0.389471,-0.962889
B,0.74082,0.611294,-1.063601,-0.462507
C,2.012329,1.06684,-1.794844,-2.2786


In [25]:
#Veri çerçevesinden istenen satırı seçmek
df.loc['A']

W    0.705804
X    1.737935
Y   -0.389471
Z   -0.962889
Name: A, dtype: float64

In [26]:
#Veri çerçevesinden istenen satırı seçmek
df.iloc[0]

W    0.705804
X    1.737935
Y   -0.389471
Z   -0.962889
Name: A, dtype: float64

In [27]:
#Koşul ifadeleri
booldf = df > 0

In [28]:
booldf

Unnamed: 0,W,X,Y,Z
A,True,True,False,False
B,True,True,False,False
C,True,True,False,False
D,True,False,False,True


In [29]:
df[booldf]

Unnamed: 0,W,X,Y,Z
A,0.705804,1.737935,,
B,0.74082,0.611294,,
C,2.012329,1.06684,,
D,0.592999,,,1.324295


In [30]:
df[df['W'] > 0]

Unnamed: 0,W,X,Y,Z
A,0.705804,1.737935,-0.389471,-0.962889
B,0.74082,0.611294,-1.063601,-0.462507
C,2.012329,1.06684,-1.794844,-2.2786
D,0.592999,-0.165652,-1.132064,1.324295


In [31]:
#and(&) operatörü
df[(df['W'] > 0) & (df['Z'] < 0)]

Unnamed: 0,W,X,Y,Z
A,0.705804,1.737935,-0.389471,-0.962889
B,0.74082,0.611294,-1.063601,-0.462507
C,2.012329,1.06684,-1.794844,-2.2786


In [32]:
#or (|) operatörü
df[(df['W'] > 0) | (df['Z'] < 0)]

Unnamed: 0,W,X,Y,Z
A,0.705804,1.737935,-0.389471,-0.962889
B,0.74082,0.611294,-1.063601,-0.462507
C,2.012329,1.06684,-1.794844,-2.2786
D,0.592999,-0.165652,-1.132064,1.324295


## Multi Index

In [60]:
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [61]:
df = pd.DataFrame(np.random.randn(6,2), index=hier_index, columns=['A', 'B'])

In [62]:
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,0.031916,0.536335
G1,2,-0.029859,0.385903
G1,3,-1.23673,-0.236309
G2,1,-0.771868,0.65674
G2,2,-0.446267,-2.31034
G2,3,1.633959,-0.390104


In [63]:
df.index.names

FrozenList([None, None])

In [64]:
#İsmi olmayan sütunlara isim verilmesi
df.index.names = ['Grup', 'Num']

In [65]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Grup,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,0.031916,0.536335
G1,2,-0.029859,0.385903
G1,3,-1.23673,-0.236309
G2,1,-0.771868,0.65674
G2,2,-0.446267,-2.31034
G2,3,1.633959,-0.390104


In [66]:
#Çok seviyeli indeksleme
df.xs('G1')

Unnamed: 0_level_0,A,B
Num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.031916,0.536335
2,-0.029859,0.385903
3,-1.23673,-0.236309


In [40]:
df.xs(1, level='Num')

Unnamed: 0_level_0,A,B
Grup,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,-2.858624,-1.277613
G2,0.425214,-0.860316


## Kayıp Veriler [Missing Data]

In [67]:
#Bazı NaN değerlere sahip bir veri seti oluşturalım
df = pd.DataFrame({'A':[1,2,np.nan],
                  'B':[5,np.nan,np.nan],
                  'C':[1,2,3]})

In [68]:
df

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2
2,,,3


In [69]:
#NaN değerlere sahip olan satıların atılması
df.dropna()

Unnamed: 0,A,B,C
0,1.0,5.0,1


In [70]:
#En az 2 NaN değere sahip olan satıların atılması
df.dropna(thresh=2)

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2


In [71]:
#NaN değerlere sahip olan yerlerin doldurulması
df.fillna(value='Yeni veri')

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,Yeni veri,2
2,Yeni veri,Yeni veri,3


In [72]:
#Ortalama ile doldurulması
df['A'].fillna(value=df['A'].mean())

0    1.0
1    2.0
2    1.5
Name: A, dtype: float64