### 1. join() → Combine on Index (or Key Column)

Joins two DataFrames by index (by default).
Can also use on= to join on a specific column.

In [4]:
import pandas as pd


df1 = pd.DataFrame({'artist': ['Drake', 'The Weeknd'], 'year': [2019, 2020]}, index=[0,1])
df2 = pd.DataFrame({'popularity': [85, 92]}, index=[0,1])

df1

Unnamed: 0,artist,year
0,Drake,2019
1,The Weeknd,2020


In [5]:
df2

Unnamed: 0,popularity
0,85
1,92


In [6]:
df1.join(df2)

Unnamed: 0,artist,year,popularity
0,Drake,2019,85
1,The Weeknd,2020,92


### 2. merge() → Database-Style Joins

👉 More powerful. Works like SQL JOIN (inner, left, right, outer).
Lets you merge on one or multiple columns, not just index.

In [8]:
df1 = pd.DataFrame({'artist': ['Drake', 'The Weeknd'], 'year': [2019, 2020]})
df2 = pd.DataFrame({'artist': ['Drake', 'The Weeknd'], 'popularity': [85, 92]})

In [9]:
df1.merge(df2, on='artist', how ='inner')

Unnamed: 0,artist,year,popularity
0,Drake,2019,85
1,The Weeknd,2020,92


In [10]:
df1.merge(df2,on='artist',how='left')

Unnamed: 0,artist,year,popularity
0,Drake,2019,85
1,The Weeknd,2020,92


### 3. concat() → Stack Together

👉 Concatenates (stacks) along rows (axis=0) or columns (axis=1).

In [11]:
df1 = pd.DataFrame({'artist': ['Drake', 'The Weeknd'], 'year': [2019, 2020]})
df2 = pd.DataFrame({'artist': ['Ed Sheeran'], 'year': [2020]})

In [12]:
pd.concat([df1,df2],axis=0)

Unnamed: 0,artist,year
0,Drake,2019
1,The Weeknd,2020
0,Ed Sheeran,2020


In [13]:
pd.concat([df1,df2],axis=1)

Unnamed: 0,artist,year,artist.1,year.1
0,Drake,2019,Ed Sheeran,2020.0
1,The Weeknd,2020,,


### Practice Questions

In [16]:
import pandas as pd

df1 = pd.DataFrame({
    'artist': ['Drake', 'The Weeknd', 'Ed Sheeran'],
    'year': [2019, 2020, 2020]
}, index=[0,1,2])

df2 = pd.DataFrame({
    'popularity': [85, 92, 88],
}, index=[0,1,2])

df3 = pd.DataFrame({
    'artist': ['Drake', 'The Weeknd'],
    'popularity': [85, 92]
})

df4 = pd.DataFrame({
    'artist': ['Drake', 'Ed Sheeran'],
    'streams': [500, 400]
})

df1

Unnamed: 0,artist,year
0,Drake,2019
1,The Weeknd,2020
2,Ed Sheeran,2020


In [17]:
# Q1. Join df1 and df2 on index.

df1.join(df2)

Unnamed: 0,artist,year,popularity
0,Drake,2019,85
1,The Weeknd,2020,92
2,Ed Sheeran,2020,88


In [18]:
# Do a join but only keep ['popularity'] from df2.

df_join2 = df1.join(df2[['popularity']])
df_join2

Unnamed: 0,artist,year,popularity
0,Drake,2019,85
1,The Weeknd,2020,92
2,Ed Sheeran,2020,88


In [19]:
# Join with how='inner'

df_join3 = df1.join(df2, how='inner')
df_join3

Unnamed: 0,artist,year,popularity
0,Drake,2019,85
1,The Weeknd,2020,92
2,Ed Sheeran,2020,88


In [20]:
# Part 2: merge()

# Q4. Inner merge on artist

df_merge1 = df3.merge(df4, on='artist', how='inner')
df_merge1

Unnamed: 0,artist,popularity,streams
0,Drake,85,500


In [21]:
# Q5. Outer merge on artist

df_merge2 = df3.merge(df4, on='artist', how='outer')
df_merge2

Unnamed: 0,artist,popularity,streams
0,Drake,85.0,500.0
1,The Weeknd,92.0,
2,Ed Sheeran,,400.0


In [23]:
# Q6. Left merge on artist (keep all rows from df3)

df_merge3 = df3.merge(df4, on='artist', how='left')
df_merge3

Unnamed: 0,artist,popularity,streams
0,Drake,85,500.0
1,The Weeknd,92,


In [24]:
# Part 3: concat()
# Q7. Row-wise concat (axis=0)

df_concat_rows = pd.concat([df3,df4], axis=0, ignore_index=False)
df_concat_rows

Unnamed: 0,artist,popularity,streams
0,Drake,85.0,
1,The Weeknd,92.0,
0,Drake,,500.0
1,Ed Sheeran,,400.0
