# Pandas

Veri Analizi için önemli kütüphanelerden birisidir  
* Pandas series 
* Veri Okuma (Reading Data) 
* Veriye Hızlı Bakış (Quick Look at Pandas) 
* Pandas'ta Seçim İşlemleri ( Selection in Pandas) 
* Toplulaştırma ve Gruplama ( Aggregation & Grouping) 
* Apply ve Lambda 
* Birleştirme (Join) İşlemleri

### Pandas Series 

In [1]:
import pandas as pd 

In [2]:
s = pd.Series([10,66,23,56,78,64]) # seri oluştururuz.

In [3]:
s

0    10
1    66
2    23
3    56
4    78
5    64
dtype: int64

In [4]:
type(s)

pandas.core.series.Series

In [5]:
s.index

RangeIndex(start=0, stop=6, step=1)

In [6]:
s.dtype # içerisindeki verinin tip bilgisine erişmek için

dtype('int64')

In [7]:
s.size # elaman sayısına erişmek için

6

In [8]:
s.ndim  # boyut bilgisine erişmek için,pandas serileri tek boyutludur

1

In [9]:
s.values # içerisindeki değerlere erişmek istersek

array([10, 66, 23, 56, 78, 64], dtype=int64)

In [10]:
type(s.values)

numpy.ndarray

In [11]:
s.head(3) # baştan 3 değer

0    10
1    66
2    23
dtype: int64

In [12]:
s.tail(3) # sondan 3 değer 

3    56
4    78
5    64
dtype: int64

###  Veri Okuma (Reading Data) 

In [13]:
#df = pd.read_csv("C:/Users/tolga/OneDrive/Masaüstü/datasets/advertising.csv")

In [14]:
#df

In [15]:
#df.head()

### Veriye Hızlı Bakış (Quick Look at Data ) 

In [16]:
import pandas as pd 
import seaborn as sns 

In [17]:
df = sns.load_dataset("titanic")

In [18]:
df.head() # -> baştaki 5 değer

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [19]:
# 1 -> alive | 0 -> dead

In [20]:
df.tail() # -> sondaki 5 değer

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
886,0,2,male,27.0,0,0,13.0,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.45,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0,C,First,man,True,C,Cherbourg,yes,True
890,0,3,male,32.0,0,0,7.75,Q,Third,man,True,,Queenstown,no,True


In [21]:
df.shape # boyut bilgisine erişmek için

(891, 15)

In [22]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   survived     891 non-null    int64   
 1   pclass       891 non-null    int64   
 2   sex          891 non-null    object  
 3   age          714 non-null    float64 
 4   sibsp        891 non-null    int64   
 5   parch        891 non-null    int64   
 6   fare         891 non-null    float64 
 7   embarked     889 non-null    object  
 8   class        891 non-null    category
 9   who          891 non-null    object  
 10  adult_male   891 non-null    bool    
 11  deck         203 non-null    category
 12  embark_town  889 non-null    object  
 13  alive        891 non-null    object  
 14  alone        891 non-null    bool    
dtypes: bool(2), category(2), float64(2), int64(4), object(5)
memory usage: 80.7+ KB


In [23]:
# değişken isimlerine erişmek için
df.columns

Index(['survived', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare',
       'embarked', 'class', 'who', 'adult_male', 'deck', 'embark_town',
       'alive', 'alone'],
      dtype='object')

In [24]:
# index bilgisine erişmek için
df.index

RangeIndex(start=0, stop=891, step=1)

In [25]:
# betimsel istatistiğine bakmak için
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
survived,891.0,0.383838,0.486592,0.0,0.0,0.0,1.0,1.0
pclass,891.0,2.308642,0.836071,1.0,2.0,3.0,3.0,3.0
age,714.0,29.699118,14.526497,0.42,20.125,28.0,38.0,80.0
sibsp,891.0,0.523008,1.102743,0.0,0.0,0.0,1.0,8.0
parch,891.0,0.381594,0.806057,0.0,0.0,0.0,0.0,6.0
fare,891.0,32.204208,49.693429,0.0,7.9104,14.4542,31.0,512.3292


In [26]:
# veride eksiklik var mı ? 
df.isnull().values.any()

True

In [27]:
# eğer eksik değer varsa 
df.isnull().sum()

survived         0
pclass           0
sex              0
age            177
sibsp            0
parch            0
fare             0
embarked         2
class            0
who              0
adult_male       0
deck           688
embark_town      2
alive            0
alone            0
dtype: int64

In [28]:
# bir kategorik değişken içerisinde kaç tane sınıf olduğu bilgisine erişmek isteyelim 

In [29]:
df["sex"].head()

0      male
1    female
2    female
3    female
4      male
Name: sex, dtype: object

In [30]:
df["sex"].value_counts()

male      577
female    314
Name: sex, dtype: int64

### Pandas'ta Seçim İşlemleri (Selection in Pandas ) ***

In [31]:
df = sns.load_dataset("titanic") 
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [32]:
df.index

RangeIndex(start=0, stop=891, step=1)

In [33]:
df[0:13]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True
5,0,3,male,,0,0,8.4583,Q,Third,man,True,,Queenstown,no,True
6,0,1,male,54.0,0,0,51.8625,S,First,man,True,E,Southampton,no,True
7,0,3,male,2.0,3,1,21.075,S,Third,child,False,,Southampton,no,False
8,1,3,female,27.0,0,2,11.1333,S,Third,woman,False,,Southampton,yes,False
9,1,2,female,14.0,1,0,30.0708,C,Second,child,False,,Cherbourg,yes,False


In [34]:
# silme işlemi
df.drop(0,axis=0,).head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True
5,0,3,male,,0,0,8.4583,Q,Third,man,True,,Queenstown,no,True


In [35]:
# alternatif silme işlemi,kalıcı değil 
# kalıcı yapmak için ; df = df.drop(delete_indexes,axis=0).head(10) ya da df.drop(delete_indexes,axis=0,inplace = True).head(10)
delete_indexes = [1,3,5,7] 
df.drop(delete_indexes,axis=0).head(10)


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True
6,0,1,male,54.0,0,0,51.8625,S,First,man,True,E,Southampton,no,True
8,1,3,female,27.0,0,2,11.1333,S,Third,woman,False,,Southampton,yes,False
9,1,2,female,14.0,1,0,30.0708,C,Second,child,False,,Cherbourg,yes,False
10,1,3,female,4.0,1,1,16.7,S,Third,child,False,G,Southampton,yes,False
11,1,1,female,58.0,0,0,26.55,S,First,woman,False,C,Southampton,yes,True
12,0,3,male,20.0,0,0,8.05,S,Third,man,True,,Southampton,no,True
13,0,3,male,39.0,1,5,31.275,S,Third,man,True,,Southampton,no,False


### Değişkeni Indexe Çevirme 

In [36]:
df.index = df["age"] 
df.drop("age",axis=1).head()

Unnamed: 0_level_0,survived,pclass,sex,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
22.0,0,3,male,1,0,7.25,S,Third,man,True,,Southampton,no,False
38.0,1,1,female,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
26.0,1,3,female,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
35.0,1,1,female,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
35.0,0,3,male,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [37]:
df.drop("age",axis=1).head()

Unnamed: 0_level_0,survived,pclass,sex,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
22.0,0,3,male,1,0,7.25,S,Third,man,True,,Southampton,no,False
38.0,1,1,female,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
26.0,1,3,female,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
35.0,1,1,female,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
35.0,0,3,male,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [38]:
 # kalıcı yapalım 
df.drop("age",axis=1,inplace=True) 
df.head()

Unnamed: 0_level_0,survived,pclass,sex,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
22.0,0,3,male,1,0,7.25,S,Third,man,True,,Southampton,no,False
38.0,1,1,female,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
26.0,1,3,female,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
35.0,1,1,female,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
35.0,0,3,male,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [39]:
# indexi değişkene çevirme 

df.index 

df["age"] = df.index


In [40]:
df.head()

Unnamed: 0_level_0,survived,pclass,sex,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,age
age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
22.0,0,3,male,1,0,7.25,S,Third,man,True,,Southampton,no,False,22.0
38.0,1,1,female,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,38.0
26.0,1,3,female,0,0,7.925,S,Third,woman,False,,Southampton,yes,True,26.0
35.0,1,1,female,1,0,53.1,S,First,woman,False,C,Southampton,yes,False,35.0
35.0,0,3,male,0,0,8.05,S,Third,man,True,,Southampton,no,True,35.0


In [41]:
# 2.yol , reset index 
#df.reset_index().head()
#df = df.reset_index()

### Değişkenler Üzerinde İşlemler 

In [42]:
# ... dan kurtulmak istersek 
import pandas as pd  
import seaborn as sns
pd.set_option("display.max_columns",None) 
df = sns.load_dataset("titanic") 
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [43]:
# bir dataframede herhangi bir değişkenin varlığını sorgulamak için

In [44]:
"age" in df 

True

In [45]:
# değişken seçmek istersek 
df["age"].head() 
df.age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [46]:
df["age"].head()  
type(df["age"].head())

pandas.core.series.Series

In [47]:
# bir dataframe den birden fazla değişken seçmek istersek 
df[["age","alive"]]

Unnamed: 0,age,alive
0,22.0,no
1,38.0,yes
2,26.0,yes
3,35.0,yes
4,35.0,no
...,...,...
886,27.0,no
887,19.0,yes
888,,no
889,26.0,yes


In [48]:
col_names = ["age","adult_male","alive"] 
df[col_names]

Unnamed: 0,age,adult_male,alive
0,22.0,True,no
1,38.0,False,yes
2,26.0,False,yes
3,35.0,False,yes
4,35.0,True,no
...,...,...,...
886,27.0,True,no
887,19.0,False,yes
888,,False,no
889,26.0,True,yes


In [49]:
# bir dataframe değişken ekleme 

df["age2"]= df["age"]**2

In [50]:
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,age2
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False,484.0
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,1444.0
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True,676.0
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False,1225.0
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True,1225.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True,729.0
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True,361.0
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False,
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True,676.0


In [51]:
df["age3"]= df["age"] / df["age2"]

In [52]:
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,age2,age3
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False,484.0,0.045455
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,1444.0,0.026316
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True,676.0,0.038462
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False,1225.0,0.028571
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True,1225.0,0.028571
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True,729.0,0.037037
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True,361.0,0.052632
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False,,
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True,676.0,0.038462


In [53]:
df.drop("age3",axis=1).head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,age2
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False,484.0
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,1444.0
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True,676.0
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False,1225.0
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True,1225.0


In [54]:
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,age2,age3
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False,484.0,0.045455
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,1444.0,0.026316
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True,676.0,0.038462
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False,1225.0,0.028571
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True,1225.0,0.028571
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True,729.0,0.037037
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True,361.0,0.052632
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False,,
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True,676.0,0.038462


In [55]:
# binlerce veri seti arasından seçip silmek için,işlem yapmak için 
# seçtik, aradık ve bulduk
df.loc[:, df.columns.str.contains("age")]

Unnamed: 0,age,age2,age3
0,22.0,484.0,0.045455
1,38.0,1444.0,0.026316
2,26.0,676.0,0.038462
3,35.0,1225.0,0.028571
4,35.0,1225.0,0.028571
...,...,...,...
886,27.0,729.0,0.037037
887,19.0,361.0,0.052632
888,,,
889,26.0,676.0,0.038462


In [56]:
# silmek istiyoruz 
df.loc[:,~df.columns.str.contains("age")].head()

Unnamed: 0,survived,pclass,sex,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,0,0,8.05,S,Third,man,True,,Southampton,no,True


### Loc -> label base selection & ILoc -> int based selection 

In [57]:
# seçmek ,değiştirmek , listelemek , silmek birden fazla veriye erişim kolaylığı sağlıyor. 

ILoc

In [58]:
df.iloc[0:3] 
df.iloc[0,0]

0

In [59]:
df.iloc[0:3]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,age2,age3
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False,484.0,0.045455
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,1444.0,0.026316
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True,676.0,0.038462


Loc

In [60]:
df.loc[0:3]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,age2,age3
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False,484.0,0.045455
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,1444.0,0.026316
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True,676.0,0.038462
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False,1225.0,0.028571


In [61]:
df.iloc[0:3,0:3] # -> isimlendirme yapamıyorsun int girmen gerek

Unnamed: 0,survived,pclass,sex
0,0,3,male
1,1,1,female
2,1,3,female


In [62]:
df.loc[0:3,"age"] # -> isimlendirme yapıyorsun "age"

0    22.0
1    38.0
2    26.0
3    35.0
Name: age, dtype: float64

### Koşullu Seçiim (Conditional Selection) 

In [63]:
import pandas as pd 
import seaborn as sns 
pd.set_option('display.max_columns',None) 
df = sns.load_dataset("titanic") 
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [64]:
# yaşı 50 den büyük olanlar
df[df["age"] > 50].head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
6,0,1,male,54.0,0,0,51.8625,S,First,man,True,E,Southampton,no,True
11,1,1,female,58.0,0,0,26.55,S,First,woman,False,C,Southampton,yes,True
15,1,2,female,55.0,0,0,16.0,S,Second,woman,False,,Southampton,yes,True
33,0,2,male,66.0,0,0,10.5,S,Second,man,True,,Southampton,no,True
54,0,1,male,65.0,0,1,61.9792,C,First,man,True,B,Cherbourg,no,False


In [65]:
# yaşı 50 den büyük total kaç kişi var 

df[df["age"] > 50]["age"].count()

64

In [66]:
# yaşı 50 den büyük olan class sınıfındakileri seçerek göster

df.loc[df["age"] > 50, ["age","class"]].head()

Unnamed: 0,age,class
6,54.0,First
11,58.0,First
15,55.0,Second
33,66.0,Second
54,65.0,First


In [67]:
# yaşı 50 den büyük erkekleri seçmek istiyoruz # iki koşul 
df.loc[(df["age"] > 50) & (df["sex"]=="male"),["age","class"]].head()

Unnamed: 0,age,class
6,54.0,First
33,66.0,Second
54,65.0,First
94,59.0,Third
96,71.0,First


In [68]:
# 3.koşul olarak bir limandakini bulmayı seçeriz ve koşulu &

df.loc[(df["age"] > 50)  
       & (df["sex"]=="male")  
       & (df["embark_town"]=="Cherbourg"), 
       ["age","class","embark_town"]].head()

Unnamed: 0,age,class,embark_town
54,65.0,First,Cherbourg
96,71.0,First,Cherbourg
155,51.0,First,Cherbourg
174,56.0,First,Cherbourg
487,58.0,First,Cherbourg


In [69]:
# ya da koşulu ||  

df_new = df.loc[(df["age"] > 50)  
& (df["sex"]=="male")  
& ((df["embark_town"]=="Cherbourg") | (df["embark_town"]=="Southampton")), 
 ["age","class","embark_town"]].head()

In [70]:
df_new

Unnamed: 0,age,class,embark_town
6,54.0,First,Southampton
33,66.0,Second,Southampton
54,65.0,First,Cherbourg
94,59.0,Third,Southampton
96,71.0,First,Cherbourg


In [71]:
df_new["embark_town"].count()

5

### Toplulaştırma ve Gruplama (Agrgregation & Grouping) 

* count() 
* first() 
* last() 
* mean() 
* median() 
* min() 
* max() 
* std()
* var() 
* sum() 
* pivot table 

In [72]:
import pandas as pd 
import seaborn as sns 
pd.set_option('display.max_columns',None) 
df = sns.load_dataset("titanic") 
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [73]:
# cinsiyete göre yaş ortalamasını grupla ve yaşın ortalamasını al
df["age"].mean() 

df.groupby("sex")["age"].mean()

sex
female    27.915709
male      30.726645
Name: age, dtype: float64

In [74]:
# cinsiyete göre yaş ortalamasını grupla ve yaşı topla

df.groupby("sex").agg({"age":"mean"})

Unnamed: 0_level_0,age
sex,Unnamed: 1_level_1
female,27.915709
male,30.726645


In [75]:
df.groupby("sex").agg("age").mean()

sex
female    27.915709
male      30.726645
Name: age, dtype: float64

In [76]:
# frekans bilgisi istersek toplamı
df.groupby("sex").agg({"age":["mean","sum"], 
                      "embark_town":"count"})

Unnamed: 0_level_0,age,age,embark_town
Unnamed: 0_level_1,mean,sum,count
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
female,27.915709,7286.0,312
male,30.726645,13919.17,577


In [77]:
# survived alırsak 1 yakınsa hayatta kalma oranı yüksek 0 a yakınsa düşük ortalama ile

df.groupby("sex").agg({"age":["mean","sum"], 
                      "survived":"mean"})

Unnamed: 0_level_0,age,age,survived
Unnamed: 0_level_1,mean,sum,mean
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
female,27.915709,7286.0,0.742038
male,30.726645,13919.17,0.188908


In [78]:
# sadece cinsiyete  göre değil farklı kategorik değişkenlere de bir kırılım yapmak istiyorum 

df.groupby(["sex","embark_town"]).agg({"age":["mean","sum"], 
                      "survived":"mean"})

Unnamed: 0_level_0,Unnamed: 1_level_0,age,age,survived
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,sum,mean
sex,embark_town,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
female,Cherbourg,28.344262,1729.0,0.876712
female,Queenstown,24.291667,291.5,0.75
female,Southampton,27.771505,5165.5,0.689655
male,Cherbourg,32.998841,2276.92,0.305263
male,Queenstown,30.9375,495.0,0.073171
male,Southampton,30.29144,11147.25,0.174603


In [79]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [80]:
# cinsiyeti kadın ve erkek olan 1-2-3 . sınıfların hayatta kalma durumları 
df.groupby(["sex","embark_town","class"]).agg({"age":["mean","sum"], 
                      "survived":"mean"})

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,age,age,survived
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,sum,mean
sex,embark_town,class,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
female,Cherbourg,First,36.052632,1370.0,0.976744
female,Cherbourg,Second,19.142857,134.0,1.0
female,Cherbourg,Third,14.0625,225.0,0.652174
female,Queenstown,First,33.0,33.0,1.0
female,Queenstown,Second,30.0,30.0,1.0
female,Queenstown,Third,22.85,228.5,0.727273
female,Southampton,First,32.704545,1439.0,0.958333
female,Southampton,Second,29.719697,1961.5,0.910448
female,Southampton,Third,23.223684,1765.0,0.375
male,Cherbourg,First,40.111111,1444.0,0.404762


In [81]:
# hayatta kalma durumları toplam "count"
df.groupby(["sex","embark_town","class"]).agg({ 
    "age":["mean","sum"], 
    "survived":"mean" ,
     "sex":"count"})

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,age,age,survived,sex
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,sum,mean,count
sex,embark_town,class,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
female,Cherbourg,First,36.052632,1370.0,0.976744,43
female,Cherbourg,Second,19.142857,134.0,1.0,7
female,Cherbourg,Third,14.0625,225.0,0.652174,23
female,Queenstown,First,33.0,33.0,1.0,1
female,Queenstown,Second,30.0,30.0,1.0,2
female,Queenstown,Third,22.85,228.5,0.727273,33
female,Southampton,First,32.704545,1439.0,0.958333,48
female,Southampton,Second,29.719697,1961.5,0.910448,67
female,Southampton,Third,23.223684,1765.0,0.375,88
male,Cherbourg,First,40.111111,1444.0,0.404762,42


### Pivot Table 

In [82]:
import pandas as pd 
import seaborn as sns 
pd.set_option('display.max_columns',None) 
df = sns.load_dataset("titanic") 
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [84]:
# yaş ve gemiye binme lokasyonu ifade eden bir pivot table oluşturma 
# pivot table ın ön tanımlı değeri ortalamadır 
df.pivot_table("survived","sex","embarked")

embarked,C,Q,S
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.876712,0.75,0.689655
male,0.305263,0.073171,0.174603


In [85]:
df.pivot_table("survived","sex","embarked",aggfunc="std")

embarked,C,Q,S
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.331042,0.439155,0.463778
male,0.462962,0.263652,0.380058


In [86]:
df.pivot_table("survived","sex",["embarked","class"])

embarked,C,C,C,Q,Q,Q,S,S,S
class,First,Second,Third,First,Second,Third,First,Second,Third
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
female,0.976744,1.0,0.652174,1.0,1.0,0.727273,0.958333,0.910448,0.375
male,0.404762,0.2,0.232558,0.0,0.0,0.076923,0.35443,0.154639,0.128302


In [91]:
# hem cinsiyet kırılımı hem gemiye binilen lokasyon kırılımı hem de yaş kırılımı istiyorum hepsinin hayatta kalma 
# durumunu ele alalım , yaş değişkenini sayısal değişkenden kategorik değişkene çevirelim -< "pd.cut" ,eğer 
# hangi değişkeni çevireceğimi bilmiyorsam "qcut" kullanılır .  

df.head() 

df["new_age"] = pd.cut(df["age"],[0, 10, 18, 25, 40, 90])
df.pivot_table("survived","sex","new_age")

new_age,"(0, 10]","(10, 18]","(18, 25]","(25, 40]","(40, 90]"
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
female,0.612903,0.72973,0.759259,0.802198,0.770833
male,0.575758,0.131579,0.12037,0.22093,0.176471


In [92]:
# yolculuk sınıfı 
df["new_age"] = pd.cut(df["age"],[0, 10, 18, 25, 40, 90])
df.pivot_table("survived","sex",["new_age","class"])

new_age,"(0, 10]","(0, 10]","(0, 10]","(10, 18]","(10, 18]","(10, 18]","(18, 25]","(18, 25]","(18, 25]","(25, 40]","(25, 40]","(25, 40]","(40, 90]","(40, 90]","(40, 90]"
class,First,Second,Third,First,Second,Third,First,Second,Third,First,Second,Third,First,Second,Third
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
female,0.0,1.0,0.5,1.0,1.0,0.52381,0.941176,0.933333,0.5,1.0,0.90625,0.464286,0.961538,0.846154,0.111111
male,1.0,1.0,0.363636,0.666667,0.0,0.103448,0.333333,0.047619,0.115385,0.513514,0.071429,0.172043,0.28,0.095238,0.064516


### Apply ve Lamba 

In [93]:
import pandas as pd 
import seaborn as sns 
pd.set_option('display.max_columns',None) 
df = sns.load_dataset("titanic") 
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [95]:
#appy : satır ve sütunlarda fonksiyonu çalıştırma imkanı sağlar.
#lambda : kullan - at fonksiyonu kod akışı esnasında kullanılır. 

In [96]:
df["age2"] = df["age"]*2 
df["age3"] = df["age"]*3

In [97]:
(df["age"]/10).head()
(df["age2"]/10).head()
(df["age3"]/10).head()

0     6.6
1    11.4
2     7.8
3    10.5
4    10.5
Name: age3, dtype: float64

In [98]:
for col in df.columns: 
    if "age" in col:
        print(col)

age
age2
age3


In [99]:
for col in df.columns: 
    if "age" in col:
        print((df[col]/10).head())

0    2.2
1    3.8
2    2.6
3    3.5
4    3.5
Name: age, dtype: float64
0    4.4
1    7.6
2    5.2
3    7.0
4    7.0
Name: age2, dtype: float64
0     6.6
1    11.4
2     7.8
3    10.5
4    10.5
Name: age3, dtype: float64


In [101]:
# kaydetmesini istersek
for col in df.columns: 
    if "age" in col:
       df[col] = df[col]/10 
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,age2,age3
0,0,3,male,0.22,1,0,7.25,S,Third,man,True,,Southampton,no,False,0.44,0.66
1,1,1,female,0.38,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,0.76,1.14
2,1,3,female,0.26,0,0,7.925,S,Third,woman,False,,Southampton,yes,True,0.52,0.78
3,1,1,female,0.35,1,0,53.1,S,First,woman,False,C,Southampton,yes,False,0.7,1.05
4,0,3,male,0.35,0,0,8.05,S,Third,man,True,,Southampton,no,True,0.7,1.05


In [102]:
# apply - lambda ile nasıl yapılır 
df[["age","age2","age3"]].apply(lambda x: x/10).head()

Unnamed: 0,age,age2,age3
0,0.022,0.044,0.066
1,0.038,0.076,0.114
2,0.026,0.052,0.078
3,0.035,0.07,0.105
4,0.035,0.07,0.105


In [106]:
# daha iyi halde yazmak için
df.loc[:,df.columns.str.contains("age")].apply(lambda x: x/10).head()

Unnamed: 0,age,age2,age3
0,0.022,0.044,0.066
1,0.038,0.076,0.114
2,0.026,0.052,0.078
3,0.035,0.07,0.105
4,0.035,0.07,0.105


In [107]:
df.loc[:,df.columns.str.contains("age")].apply(lambda x:(x-x.mean())/x.std()).head()

Unnamed: 0,age,age2,age3
0,-0.530005,-0.530005,-0.530005
1,0.57143,0.57143,0.57143
2,-0.254646,-0.254646,-0.254646
3,0.364911,0.364911,0.364911
4,0.364911,0.364911,0.364911


In [111]:
def standart_scaler(col_name): 
    return(col_name - col_name.mean()) / col_name.std()
df.loc[:,df.columns.str.contains("age")].apply(standart_scaler).head()

Unnamed: 0,age,age2,age3
0,-0.530005,-0.530005,-0.530005
1,0.57143,0.57143,0.57143
2,-0.254646,-0.254646,-0.254646
3,0.364911,0.364911,0.364911
4,0.364911,0.364911,0.364911


In [112]:
# değişiklikleri kaydetmek için

df.loc[:,df.columns.str.contains("age")] = df.loc[:,df.columns.str.contains("age")].apply(standart_scaler).head()

In [113]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,age2,age3
0,0,3,male,-0.530005,1,0,7.25,S,Third,man,True,,Southampton,no,False,-0.530005,-0.530005
1,1,1,female,0.57143,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,0.57143,0.57143
2,1,3,female,-0.254646,0,0,7.925,S,Third,woman,False,,Southampton,yes,True,-0.254646,-0.254646
3,1,1,female,0.364911,1,0,53.1,S,First,woman,False,C,Southampton,yes,False,0.364911,0.364911
4,0,3,male,0.364911,0,0,8.05,S,Third,man,True,,Southampton,no,True,0.364911,0.364911


### Birleştirme (Join) İşlemleri

In [115]:
import numpy as np 
import pandas as pd 
m = np.random.randint(1,30,size=(5,3)) 
df1 = pd.DataFrame(m,columns=["var1", "var2", "var3"]) 
df2 = df1 + 99

In [116]:
df2

Unnamed: 0,var1,var2,var3
0,115,115,111
1,116,118,124
2,120,117,123
3,121,127,109
4,128,107,115


In [117]:
df1

Unnamed: 0,var1,var2,var3
0,16,16,12
1,17,19,25
2,21,18,24
3,22,28,10
4,29,8,16


In [119]:
# iki dataframe i birleştirmek için tekrardan 0 dan başlamış bir hata var 

pd.concat([df1,df2])

Unnamed: 0,var1,var2,var3
0,16,16,12
1,17,19,25
2,21,18,24
3,22,28,10
4,29,8,16
0,115,115,111
1,116,118,124
2,120,117,123
3,121,127,109
4,128,107,115


In [120]:
pd.concat([df1,df2], ignore_index="True")

Unnamed: 0,var1,var2,var3
0,16,16,12
1,17,19,25
2,21,18,24
3,22,28,10
4,29,8,16
5,115,115,111
6,116,118,124
7,120,117,123
8,121,127,109
9,128,107,115


In [121]:
# birleştirme işlemini yan yana yapmak istersek axis = 0 yerine 1 yazacağız 
pd.concat([df1,df2], ignore_index="True",axis = 1)

Unnamed: 0,0,1,2,3,4,5
0,16,16,12,115,115,111
1,17,19,25,116,118,124
2,21,18,24,120,117,123
3,22,28,10,121,127,109
4,29,8,16,128,107,115


### Merge ile birleştirme işlemleri


In [143]:
df1 = pd.DataFrame({'employees':["Johni","Dennis","Marki","Mari"], 
                   'group':["accounting","engineering","finance","hr"]}) 

df2 = pd.DataFrame({'employees':["Marki","Johni","Dennis","Mari"], 
                   'start_date':[2012, 2013, 2015, 2019]}) 

df4 = pd.DataFrame({'group':["accounting","engineering","finance","hr"], 
       'manager': ["Merve","Tolga","Melih","Mert"]}) 

In [144]:
df3 = pd.merge(df1,df2) 
df3

Unnamed: 0,employees,group,start_date
0,Johni,accounting,2013
1,Dennis,engineering,2015
2,Marki,finance,2012
3,Mari,hr,2019


In [145]:
# Amaç : her çalışanın müdür bilgisine erişmek istiyorum.
pd.merge(df3, df4)

Unnamed: 0,employees,group,start_date,manager
0,Johni,accounting,2013,Merve
1,Dennis,engineering,2015,Tolga
2,Marki,finance,2012,Melih
3,Mari,hr,2019,Mert
