![title](./pic/merging/inner_join/1_title.png)

In [21]:
import pandas as pd
import time
from IPython.display import Image

In [22]:
df_passenger = pd.read_csv('./csv/merge_function/df_passenger.csv', index_col=0)
df_passenger.head()

Unnamed: 0,PassengerId,Name,Sex
0,935,"Corbett, Mrs. Walter H (Irene Colvin)",female
1,1023,"Gracie, Col. Archibald IV",male
2,1136,"Johnston, Master. William Arthur Willie""""",male
3,1044,"Storey, Mr. Thomas",male
4,954,"Bjorklund, Mr. Ernst Herbert",male


In [23]:
df_passenger.shape

(443, 3)

In [24]:
df_ids = pd.read_csv('./csv/merge_function/df_ids.csv', index_col=0)
df_ids.head()

Unnamed: 0,PassengerId,Survived
0,1297,0
1,1260,1
2,1153,0
3,1139,0
4,998,0


In [25]:
df_ids.shape

(433, 2)

---

## Merge function: `inner_join` mit `on=`

![title](./pic/merging/inner_join/2_inner_on.png)

<video width="1000" controls src="./pic/merging/inner_join/3_inner_join_example.mp4" />

In [26]:
df_passenger.head()

Unnamed: 0,PassengerId,Name,Sex
0,935,"Corbett, Mrs. Walter H (Irene Colvin)",female
1,1023,"Gracie, Col. Archibald IV",male
2,1136,"Johnston, Master. William Arthur Willie""""",male
3,1044,"Storey, Mr. Thomas",male
4,954,"Bjorklund, Mr. Ernst Herbert",male


In [27]:
df_ids.head()

Unnamed: 0,PassengerId,Survived
0,1297,0
1,1260,1
2,1153,0
3,1139,0
4,998,0


Mit dem Attribut `on=` geben wir die Spalte der beiden `DataFrames` an, über die gemerged werden soll. Hierbei muss es sich um eine eindeutige Zuordnun handeln wie z.B. die unique PassengerId pro Passagier.

In [28]:
pd.merge(df_ids, # left
         df_passenger, # right
         how='inner', # join mode
         on='PassengerId' # Spalte
         )

Unnamed: 0,PassengerId,Survived,Name,Sex
0,1297,0,"Nourney, Mr. Alfred (Baron von Drachstedt"")""",male
1,1260,1,"Gibson, Mrs. Leonard (Pauline C Boeson)",female
2,1153,0,"Nilsson, Mr. August Ferdinand",male
3,1139,0,"Drew, Mr. James Vivian",male
4,998,0,"Buckley, Mr. Daniel",male
...,...,...,...,...
413,1117,1,"Moubarek, Mrs. George (Omine Amenia"" Alexander)""",female
414,1149,0,"Niklasson, Mr. Samuel",male
415,958,1,"Burns, Miss. Mary Delia",female
416,1272,0,"O'Connor, Mr. Patrick",male


![title](./pic/merging/inner_join/4_pid.png)

---

## Merge function: `inner_join` mit `left_on=` & `right_on=`

In [29]:
df_passenger.rename({'PassengerId': 'pass_id'}, axis=1, inplace=True)
df_passenger.head()

Unnamed: 0,pass_id,Name,Sex
0,935,"Corbett, Mrs. Walter H (Irene Colvin)",female
1,1023,"Gracie, Col. Archibald IV",male
2,1136,"Johnston, Master. William Arthur Willie""""",male
3,1044,"Storey, Mr. Thomas",male
4,954,"Bjorklund, Mr. Ernst Herbert",male


![title](./pic/merging/inner_join/5_inner_onlr.png)

In [30]:
#pd.merge(df_ids,
#         df_passenger,
#         how='inner',
#         on='PassengerId'
#         )

In [31]:
pd.merge(df_ids,
         df_passenger,
         how='inner',
         left_on='PassengerId',
         right_on='pass_id'
         )

Unnamed: 0,PassengerId,Survived,pass_id,Name,Sex
0,1297,0,1297,"Nourney, Mr. Alfred (Baron von Drachstedt"")""",male
1,1260,1,1260,"Gibson, Mrs. Leonard (Pauline C Boeson)",female
2,1153,0,1153,"Nilsson, Mr. August Ferdinand",male
3,1139,0,1139,"Drew, Mr. James Vivian",male
4,998,0,998,"Buckley, Mr. Daniel",male
...,...,...,...,...,...
413,1117,1,1117,"Moubarek, Mrs. George (Omine Amenia"" Alexander)""",female
414,1149,0,1149,"Niklasson, Mr. Samuel",male
415,958,1,958,"Burns, Miss. Mary Delia",female
416,1272,0,1272,"O'Connor, Mr. Patrick",male


---

## Manuelle Stichprobe zur Überprüfung

In [32]:
df = pd.read_csv('./csv/titanic.csv')
df.head(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,0,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,1,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,0,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q


In [33]:
df[df['PassengerId'] == 958]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
66,958,1,3,"Burns, Miss. Mary Delia",female,18.0,0,0,330963,7.8792,,Q


---

## Benötigte Zeit

In [35]:
s_time = time.time()

pd.merge(df_ids,
         df_passenger,
         left_on='PassengerId',
         right_on='pass_id'
         )

e_time = time.time()

In [36]:
print(f'{round(e_time - s_time, 4)} Sekunden')

0.0019 Sekunden


In [37]:
print(f'{round(0.0024 * 100 / 2.2, 4)} %')

0.1091 %
