# Different Joins

By default, pandas merge executes an inner join. Specify a different join with the `how` parameter.

In [61]:
import pandas as pd
from pyprojroot import here
import seaborn as sns

In [62]:
users = pd.read_csv(here("data/kaggle-data/user_table.csv"))
reactions = pd.read_csv(here("data/kaggle-data/reactions_table.csv"))
users["User"] = range(1, len(users)+1)

In [63]:
print(f"Number of rows on an inner join: {len(users.merge(reactions, on='User')):,}")
print(f"Number of rows on a left join: {len(users.merge(reactions, on='User', how='right')):,}")


Number of rows on an inner join: 14,042
Number of rows on a left join: 26,365


## Check Join Integrity with outer join
***

Count nulls in the table after performing an outer join to get an idea of unmatched IDs. 

In [64]:
user_react = users.merge(reactions, on="User", how="outer")
user_react

Unnamed: 0,Surname,Name,Age,Subscription Date,User,Reaction Type,Reaction Date
0,Smith,Sarah,30.0,1.588157e+09,1,,
1,Picard,Francine,32.0,1.588162e+09,2,,
2,Roth,Hans,40.0,1.588157e+09,3,,
3,Pomme,Ali,28.0,1.588166e+09,4,,
4,Di Lillo,Jordi,42.0,1.588156e+09,5,,
...,...,...,...,...,...,...,...
27235,,,,,7816,Like,1.588166e+09
27236,,,,,7816,Like,1.588168e+09
27237,,,,,7816,Emoticon,1.588165e+09
27238,,,,,7816,Like,1.588167e+09


In [65]:
mismatches = (user_react["Name"].isnull() | user_react["Reaction Type"].isnull())
print(f"{round(mismatches.sum() / len(user_react) * 100, 1)} % of rows were unmatched in this join.")

48.5 % of rows were unmatched in this join.


## Merge on self
***

Merging a table on self when you want to compare values with others in the same column. 

In [66]:
fmri = sns.load_dataset("fmri")
fmri_self = fmri.merge(fmri, on="subject")
# drop rows where timepoint is same for x and y
fmri_self[fmri_self["timepoint_x"] != fmri_self["timepoint_y"]]\
    .sort_values(["subject", "timepoint_x", "timepoint_y"], ascending=[True, True, True])

Unnamed: 0,subject,timepoint_x,event_x,region_x,signal_x,timepoint_y,event_y,region_y,signal_y
75409,s0,0,stim,frontal,-0.021452,1,stim,parietal,-0.035735
75411,s0,0,stim,frontal,-0.021452,1,stim,frontal,-0.021054
75436,s0,0,stim,frontal,-0.021452,1,cue,frontal,0.016440
75439,s0,0,stim,frontal,-0.021452,1,cue,parietal,0.000300
77917,s0,0,stim,parietal,-0.039327,1,stim,parietal,-0.035735
...,...,...,...,...,...,...,...,...,...
32814,s9,18,cue,frontal,-0.000643,17,cue,parietal,-0.036362
33213,s9,18,cue,parietal,-0.051040,17,stim,parietal,-0.121574
33248,s9,18,cue,parietal,-0.051040,17,stim,frontal,-0.030099
33267,s9,18,cue,parietal,-0.051040,17,cue,frontal,-0.004900
