# Filtering joins

![image.png](attachment:08f54f72-2eed-4901-9691-ea59577739b4.png)

![image.png](attachment:4b670b44-adb1-4550-af03-ecb22d8f1bea.png)
![image.png](attachment:d294e45c-42f9-4597-a8d8-e6279499dd1d.png)
![image.png](attachment:118c0cfa-fdd1-496a-aa2d-aaba0b4c12b6.png)
![image.png](attachment:95929723-17d5-4309-ba78-85acca0dcb7b.png)

In [1]:
import pandas as pd

In [None]:
# Merge employees and top_cust
empl_cust = employees.merge(top_cust, on='srid', 
                                 how='left', indicator=True)

# Select the srid column where _merge is left_only
srid_list = empl_cust.loc[empl_cust['_merge'] == 'left_only', 'srid']

# Get employees not working with top customers
print(employees[employees["srid"].isin(srid_list)])

![image.png](attachment:5a5d4e14-023f-4d8a-93f8-3f787daa96d1.png)

![image.png](attachment:95ffe9b8-4e3c-46fe-8071-2bb92bb11243.png)

In [None]:
# Merge the non_mus_tck and top_invoices tables on tid
tracks_invoices = non_mus_tcks.merge(top_invoices, on="tid", how="inner")

# Use .isin() to subset non_mus_tcks to rows with tid in tracks_invoices
top_tracks = non_mus_tcks[non_mus_tcks["tid"].isin(tracks_invoices["tid"])]

# Group the top_tracks by gid and count the tid rows
cnt_by_gid = top_tracks.groupby(["gid"], as_index=False).agg({'tid':"count"})

# Merge the genres table to cnt_by_gid on gid and print
print(cnt_by_gid.merge(genres, on="gid", how="inner"))

![image.png](attachment:467b779f-0043-4294-85b0-f2ec60da22fe.png)

# Concatenate DataFrames together vertically

![image.png](attachment:67990391-22c5-460a-a8c7-ed62af16e2f0.png)

In [None]:
# Concatenate the tracks
tracks_from_albums = pd.concat([tracks_master, tracks_ride, tracks_st],
                               sort=True)
print(tracks_from_albums)

![image.png](attachment:ed5e60b1-5a01-4358-94b2-c8179d2caaca.png)

In [None]:
# Concatenate the tracks so the index goes from 0 to n-1
tracks_from_albums = pd.concat([tracks_master, tracks_ride, tracks_st],
                               ignore_index=True,
                               sort=True)
print(tracks_from_albums)

จะสังเกตว่ารูปบน index จะเป็น 0 1 4 0 1 4 แต่รูปนี้เป็น 0 1 2 3 4 ... 

![image.png](attachment:59b1801a-12cc-4495-b0e8-15c736330a4d.png)

In [None]:
# Concatenate the tracks, show only columns names that are in all tables
tracks_from_albums = pd.concat([tracks_master, tracks_ride, tracks_st],
                               join="inner",
                               sort=True)
print(tracks_from_albums)

ให้ join = "inner" เลือกแค่คอลัมน์ที่เหมือนกันมาต่อกัน

![image.png](attachment:8c2bc0b9-1fc5-4e9f-a0af-a5ec1e98238f.png)

![image.png](attachment:6eb1c167-99e6-4a12-a579-f2367fd96729.png)

In [None]:
# Concatenate the tables and add keys
inv_jul_thr_sep = pd.concat([inv_jul, inv_aug, inv_sep], 
                            keys=["7Jul", "8Aug", "9Sep"])

# Group the invoices by the index keys and find avg of the total column
avg_inv_by_month = inv_jul_thr_sep.groupby(level=0).agg({"total":"mean"})

# Bar plot of avg_inv_by_month
avg_inv_by_month.plot(kind="bar")
plt.show()

![image.png](attachment:cb08e696-69a7-4fdd-a10b-8743739cde6a.png)

![image.png](attachment:8d9789ff-6076-434a-8f19-3eaf33229c8c.png)

In [None]:
# Use the .append() method to combine the tracks tables
metallica_tracks = tracks_ride.append([tracks_master, tracks_st], sort=False)

# Merge metallica_tracks and invoice_items
tracks_invoices = metallica_tracks.merge(invoice_items, on="tid", how="inner")

# For each tid and name sum the quantity sold
tracks_sold = tracks_invoices.groupby(['tid','name']).agg({"quantity":"sum"})

# Sort in decending order by quantity and print the results
print(tracks_sold.sort_values("quantity", ascending=False))

![image.png](attachment:123ca78c-baf3-480a-a7b3-75f5a97e51f6.png)

# Verifying integrity

# ![image.png](attachment:a7771c25-2df4-4583-8915-663a366775db.png)

![image.png](attachment:80ef86af-1711-4424-ad59-4677443a12b5.png)

In [None]:
# Concatenate the classic tables vertically
classic_18_19 = pd.concat([classic_18, classic_19], ignore_index=True)

# Concatenate the pop tables vertically
pop_18_19 = pd.concat([pop_18, pop_19], ignore_index=True)

# Merge classic_18_19 with pop_18_19
classic_pop = classic_18_19.merge(pop_18_19, on="tid", how="inner")

# Using .isin(), filter classic_18_19 rows where tid is in classic_pop
popular_classic = classic_18_19[classic_18_19["tid"].isin(classic_pop["tid"])]

# Print popular chart
print(popular_classic)

![image.png](attachment:41b3c58a-5db4-40fb-bcbc-0a2106006942.png)