In [1]:
import pandas as pd
import numpy as np

## DataFrame

In [2]:
data = {"İsim": ["Ali", "Ayşe", "Fatma", "Veli"], "boy cm": [170,160,170,180], "kilo-kg": [70, 60, 60, 80]}
df = pd.DataFrame(data, index= ["A", "B", "C", "D"])
df["BMI"] = df["kilo-kg"] / (df["boy cm"] /100) ** 2
df["new"] = np.arange(4)

In [3]:
df

Unnamed: 0,İsim,boy cm,kilo-kg,BMI,new
A,Ali,170,70,24.221453,0
B,Ayşe,160,60,23.4375,1
C,Fatma,170,60,20.761246,2
D,Veli,180,80,24.691358,3


## Removing Columns & Rows

In [4]:
df.drop("new") # axis varsayılan değer 0 - sıfırdır. ve ssatır silmek içindir. Sütun silmek isteniyorsa axis = 1 yapılmalı

KeyError: "['new'] not found in axis"

In [5]:
df.drop("İsim", axis= 1)

Unnamed: 0,boy cm,kilo-kg,BMI,new
A,170,70,24.221453,0
B,160,60,23.4375,1
C,170,60,20.761246,2
D,180,80,24.691358,3


In [6]:
df.drop("kilo-kg", axis= 1)

Unnamed: 0,İsim,boy cm,BMI,new
A,Ali,170,24.221453,0
B,Ayşe,160,23.4375,1
C,Fatma,170,20.761246,2
D,Veli,180,24.691358,3


In [7]:
df

Unnamed: 0,İsim,boy cm,kilo-kg,BMI,new
A,Ali,170,70,24.221453,0
B,Ayşe,160,60,23.4375,1
C,Fatma,170,60,20.761246,2
D,Veli,180,80,24.691358,3


In [8]:
df.drop("new", axis=1, inplace=True) # inplace parametresi >> varsayılan False tur. True yapılırsa kalıcı değişikliğe imkan sağlar. 

In [9]:
df

Unnamed: 0,İsim,boy cm,kilo-kg,BMI
A,Ali,170,70,24.221453
B,Ayşe,160,60,23.4375
C,Fatma,170,60,20.761246
D,Veli,180,80,24.691358


In [10]:
df.drop("C")

Unnamed: 0,İsim,boy cm,kilo-kg,BMI
A,Ali,170,70,24.221453
B,Ayşe,160,60,23.4375
D,Veli,180,80,24.691358


In [11]:
df.drop(["C", "A"])

Unnamed: 0,İsim,boy cm,kilo-kg,BMI
B,Ayşe,160,60,23.4375
D,Veli,180,80,24.691358


In [12]:
df.drop(["B", "C"], inplace=True)

In [13]:
df

Unnamed: 0,İsim,boy cm,kilo-kg,BMI
A,Ali,170,70,24.221453
D,Veli,180,80,24.691358


In [14]:
df.drop(["boy cm", "kilo-kg"], axis= 1, inplace=True)

In [15]:
df

Unnamed: 0,İsim,BMI
A,Ali,24.221453
D,Veli,24.691358


## Selecting Rows and Columns using .loc[ ] and iloc[ ]

In [16]:
np.random.seed(45)

data = np.random.randint(1,100, 20).reshape(5, 4)

df = pd.DataFrame(data= data, index= range(101, 106), columns=["col1","col2","col3","col4"])

df

Unnamed: 0,col1,col2,col3,col4
101,76,31,4,33
102,96,62,86,36
103,69,16,66,15
104,54,58,73,88
105,47,9,54,13


In [17]:
df.loc[103]   # buradaki label olan 103. İnteger olan değil. Çünkü kendimiz verdik. Otomatik atanmadı...

col1    69
col2    16
col3    66
col4    15
Name: 103, dtype: int32

In [18]:
df.loc[102:104]   # int olsaydı son değeri getirmezdi. Label olduğu için 104 de dahil oldu. Biz oluşturduk...

Unnamed: 0,col1,col2,col3,col4
102,96,62,86,36
103,69,16,66,15
104,54,58,73,88


In [19]:
df

Unnamed: 0,col1,col2,col3,col4
101,76,31,4,33
102,96,62,86,36
103,69,16,66,15
104,54,58,73,88
105,47,9,54,13


In [20]:
df.iloc[1:4]  # 4. dahil değil. iloc olduğu için. int değerler ile calıstı..

Unnamed: 0,col1,col2,col3,col4
102,96,62,86,36
103,69,16,66,15
104,54,58,73,88


In [21]:
df.loc[101]

col1    76
col2    31
col3     4
col4    33
Name: 101, dtype: int32

In [22]:
df.iloc[101]

IndexError: single positional indexer is out-of-bounds

In [23]:
df

Unnamed: 0,col1,col2,col3,col4
101,76,31,4,33
102,96,62,86,36
103,69,16,66,15
104,54,58,73,88
105,47,9,54,13


In [24]:
"a b c d e".split()

['a', 'b', 'c', 'd', 'e']

In [25]:
df.index = "a b c d e".split()

In [26]:
df

Unnamed: 0,col1,col2,col3,col4
a,76,31,4,33
b,96,62,86,36
c,69,16,66,15
d,54,58,73,88
e,47,9,54,13


In [27]:
df.loc["b": "d"]

Unnamed: 0,col1,col2,col3,col4
b,96,62,86,36
c,69,16,66,15
d,54,58,73,88


In [28]:
df.iloc[1:4]

Unnamed: 0,col1,col2,col3,col4
b,96,62,86,36
c,69,16,66,15
d,54,58,73,88


In [29]:
df

Unnamed: 0,col1,col2,col3,col4
a,76,31,4,33
b,96,62,86,36
c,69,16,66,15
d,54,58,73,88
e,47,9,54,13


In [30]:
df.iloc[::2]

Unnamed: 0,col1,col2,col3,col4
a,76,31,4,33
c,69,16,66,15
e,47,9,54,13


In [31]:
df.loc["a":"e":2]

Unnamed: 0,col1,col2,col3,col4
a,76,31,4,33
c,69,16,66,15
e,47,9,54,13


In [32]:
df

Unnamed: 0,col1,col2,col3,col4
a,76,31,4,33
b,96,62,86,36
c,69,16,66,15
d,54,58,73,88
e,47,9,54,13


In [33]:
df.loc["a": "c", "col3"]    # virgülun solu satırları, sağı sütunları ifade eder. 
                            # sütun ismi tek başına yazılırsa sonuç pandas serisi olarak gelir.

a     4
b    86
c    66
Name: col3, dtype: int32

In [34]:
a = df.loc["a": "c", "col3"]
a

a     4
b    86
c    66
Name: col3, dtype: int32

In [35]:
type(a)

pandas.core.series.Series

In [36]:
b = df.loc["a": "c", ["col3"]]
b

Unnamed: 0,col3
a,4
b,86
c,66


In [37]:
type(b)

pandas.core.frame.DataFrame

In [38]:
df

Unnamed: 0,col1,col2,col3,col4
a,76,31,4,33
b,96,62,86,36
c,69,16,66,15
d,54,58,73,88
e,47,9,54,13


In [39]:
df.iloc[:3, 2]

a     4
b    86
c    66
Name: col3, dtype: int32

In [40]:
df.iloc[:3, [2]]

Unnamed: 0,col3
a,4
b,86
c,66


In [41]:
df

Unnamed: 0,col1,col2,col3,col4
a,76,31,4,33
b,96,62,86,36
c,69,16,66,15
d,54,58,73,88
e,47,9,54,13


In [42]:
df.loc["d":"e", "col2" : "col3"]

Unnamed: 0,col2,col3
d,58,73
e,9,54


In [43]:
df.loc["d":"e", "col2" : "col4"]

Unnamed: 0,col2,col3,col4
d,58,73,88
e,9,54,13


In [44]:
df

Unnamed: 0,col1,col2,col3,col4
a,76,31,4,33
b,96,62,86,36
c,69,16,66,15
d,54,58,73,88
e,47,9,54,13


In [45]:
df.loc["d":"e", ["col2","col4"]]

Unnamed: 0,col2,col4
d,58,88
e,9,13


In [46]:
df.loc[["d","e"], ["col2","col4"]]

Unnamed: 0,col2,col4
d,58,88
e,9,13


In [47]:
df

Unnamed: 0,col1,col2,col3,col4
a,76,31,4,33
b,96,62,86,36
c,69,16,66,15
d,54,58,73,88
e,47,9,54,13


In [48]:
df.loc[["d","a"], ["col2","col4"]]

Unnamed: 0,col2,col4
d,58,88
a,31,33


In [49]:
df.loc["d":"e"]

Unnamed: 0,col1,col2,col3,col4
d,54,58,73,88
e,47,9,54,13


In [50]:
df.loc["d":"e"]["col2"]

d    58
e     9
Name: col2, dtype: int32

In [51]:
df.loc["d":"e"][["col2"]]

Unnamed: 0,col2
d,58
e,9


In [52]:
df

Unnamed: 0,col1,col2,col3,col4
a,76,31,4,33
b,96,62,86,36
c,69,16,66,15
d,54,58,73,88
e,47,9,54,13


In [53]:
df.loc["d":"e"][["col2","col3"]]

Unnamed: 0,col2,col3
d,58,73
e,9,54


In [54]:
df[["col3"]]

Unnamed: 0,col3
a,4
b,86
c,66
d,73
e,54


In [55]:
df

Unnamed: 0,col1,col2,col3,col4
a,76,31,4,33
b,96,62,86,36
c,69,16,66,15
d,54,58,73,88
e,47,9,54,13


In [56]:
df.loc[["d", "a", "b"] , ["col3", "col1"]]

Unnamed: 0,col3,col1
d,73,54
a,4,76
b,86,96


In [57]:
import seaborn as sns

In [58]:
sns.get_dataset_names()

['anagrams',
 'anscombe',
 'attention',
 'brain_networks',
 'car_crashes',
 'diamonds',
 'dots',
 'dowjones',
 'exercise',
 'flights',
 'fmri',
 'geyser',
 'glue',
 'healthexp',
 'iris',
 'mpg',
 'penguins',
 'planets',
 'seaice',
 'taxis',
 'tips',
 'titanic']

In [59]:
df = sns.load_dataset("titanic")
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [60]:
df1 = df.copy()

In [61]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [62]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   survived     891 non-null    int64   
 1   pclass       891 non-null    int64   
 2   sex          891 non-null    object  
 3   age          714 non-null    float64 
 4   sibsp        891 non-null    int64   
 5   parch        891 non-null    int64   
 6   fare         891 non-null    float64 
 7   embarked     889 non-null    object  
 8   class        891 non-null    category
 9   who          891 non-null    object  
 10  adult_male   891 non-null    bool    
 11  deck         203 non-null    category
 12  embark_town  889 non-null    object  
 13  alive        891 non-null    object  
 14  alone        891 non-null    bool    
dtypes: bool(2), category(2), float64(2), int64(4), object(5)
memory usage: 80.7+ KB


In [63]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
survived,891.0,0.383838,0.486592,0.0,0.0,0.0,1.0,1.0
pclass,891.0,2.308642,0.836071,1.0,2.0,3.0,3.0,3.0
age,714.0,29.699118,14.526497,0.42,20.125,28.0,38.0,80.0
sibsp,891.0,0.523008,1.102743,0.0,0.0,0.0,1.0,8.0
parch,891.0,0.381594,0.806057,0.0,0.0,0.0,0.0,6.0
fare,891.0,32.204208,49.693429,0.0,7.9104,14.4542,31.0,512.3292


In [65]:
df.describe(include="object").T

Unnamed: 0,count,unique,top,freq
sex,891,2,male,577
embarked,889,3,S,644
who,891,3,man,537
embark_town,889,3,Southampton,644
alive,891,2,no,549


In [66]:
df.describe(include="all").T

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
survived,891.0,,,,0.383838,0.486592,0.0,0.0,0.0,1.0,1.0
pclass,891.0,,,,2.308642,0.836071,1.0,2.0,3.0,3.0,3.0
sex,891.0,2.0,male,577.0,,,,,,,
age,714.0,,,,29.699118,14.526497,0.42,20.125,28.0,38.0,80.0
sibsp,891.0,,,,0.523008,1.102743,0.0,0.0,0.0,1.0,8.0
parch,891.0,,,,0.381594,0.806057,0.0,0.0,0.0,0.0,6.0
fare,891.0,,,,32.204208,49.693429,0.0,7.9104,14.4542,31.0,512.3292
embarked,889.0,3.0,S,644.0,,,,,,,
class,891.0,3.0,Third,491.0,,,,,,,
who,891.0,3.0,man,537.0,,,,,,,


In [67]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [68]:
df.survived.value_counts()

survived
0    549
1    342
Name: count, dtype: int64

In [69]:
df.survived.value_counts(normalize=True)

survived
0    0.616162
1    0.383838
Name: proportion, dtype: float64

In [70]:
df.survived.value_counts(normalize=True) * 100

survived
0    61.616162
1    38.383838
Name: proportion, dtype: float64

In [71]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [72]:
df.drop("alive")

KeyError: "['alive'] not found in axis"

In [73]:
df.drop("alive", axis= 0)

KeyError: "['alive'] not found in axis"

In [74]:
df.drop("alive", axis= 1)   # inplace True yapmadığım taktirde kalıcı silme yapmaz.

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,True


In [75]:
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [76]:
df.drop("alive", axis= 1, inplace=True)

In [77]:
df   # alive silindi...

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,True


In [78]:
df.iloc[:100]   # 100 dahil değil.

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0,3,male,,0,0,8.0500,S,Third,man,True,,Southampton,True
96,0,1,male,71.0,0,0,34.6542,C,First,man,True,A,Cherbourg,True
97,1,1,male,23.0,0,1,63.3583,C,First,man,True,D,Cherbourg,False
98,1,2,female,34.0,0,1,23.0000,S,Second,woman,False,,Southampton,False


In [79]:
df.iloc[:100:2]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,True
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,True
6,0,1,male,54.0,0,0,51.8625,S,First,man,True,E,Southampton,True
8,1,3,female,27.0,0,2,11.1333,S,Third,woman,False,,Southampton,False
10,1,3,female,4.0,1,1,16.7,S,Third,child,False,G,Southampton,False
12,0,3,male,20.0,0,0,8.05,S,Third,man,True,,Southampton,True
14,0,3,female,14.0,0,0,7.8542,S,Third,child,False,,Southampton,True
16,0,3,male,2.0,4,1,29.125,Q,Third,child,False,,Queenstown,False
18,0,3,female,31.0,1,0,18.0,S,Third,woman,False,,Southampton,False
