# Using merge_ordered()

![image.png](attachment:53b85388-528d-4a5b-a068-e4c6a4944296.png)# 

In [None]:
# Use merge_ordered() to merge gdp and sp500 on year and date
gdp_sp500 = pd.merge_ordered(gdp, sp500, left_on="year", right_on="date", 
                             how="left")

# Print gdp_sp500
print(gdp_sp500)

![image.png](attachment:4cd727b4-c767-4158-90b7-2753b6a85d72.png)

![image.png](attachment:ac2b51a0-c718-4e6f-b183-508ce41c4eb1.png)

In [None]:
# Use merge_ordered() to merge gdp and sp500, interpolate missing value
gdp_sp500 = pd.merge_ordered(gdp, sp500, left_on="year", right_on="date", 
                             how="left", fill_method="ffill")

# Print gdp_sp500
print (gdp_sp500)

![image.png](attachment:f151e004-76b2-4ff1-958e-2d53fb924b58.png)

In [None]:
# Use merge_ordered() to merge gdp and sp500, interpolate missing value
gdp_sp500 = pd.merge_ordered(gdp, sp500, left_on='year', right_on='date', 
                             how='left',  fill_method='ffill')

# Subset the gdp and returns columns
gdp_returns = gdp_sp500[["gdp", "returns"]]

# Print gdp_returns correlation
print(gdp_returns.corr())

![image.png](attachment:ebbb114a-8fb7-46f5-93bc-c864b89c5c5c.png)

![image.png](attachment:0fe0af46-4887-463f-8451-9caa28662faf.png)

In [None]:
# Use merge_ordered() to merge inflation, unemployment with inner join
inflation_unemploy = pd.merge_ordered(inflation, unemployment, how="inner")

# Print inflation_unemploy 
print(inflation_unemploy)

# Plot a scatter plot of unemployment_rate vs cpi of inflation_unemploy
inflation_unemploy.plot(x = "unemployment_rate", y = "cpi", kind="scatter")
plt.show()

![image.png](attachment:d53ef2cc-e338-4964-8a74-6c572c3ff0ec.png)
![image.png](attachment:01bc96ed-c130-45dc-83bf-988934e880fa.png)

![image.png](attachment:8ac41de9-6a06-4089-a49e-bac7bfd6928e.png)

In [None]:
# Merge gdp and pop on date and country with fill and notice rows 2 and 3
ctry_date = pd.merge_ordered(gdp, pop, on=["date","country"],
                             fill_method='ffill')

# Print ctry_date
print(ctry_date)

![image.png](attachment:26f6d19d-4722-47b4-b4a6-b9b38a47a587.png)

In [None]:
# Merge gdp and pop on country and date with fill
date_ctry = pd.merge_ordered(gdp, pop, on=["country", "date"],
                             fill_method='ffill')

# Print date_ctry
print(date_ctry)

### จะสังเกตว่าการใช้แบบ country ขึ้นก่อนจะดีกว่า เพราะลำดับการเรียงถ้าเราเอา country ไว้ข้างหน้า โปรแกรมจะจัดเรียง country ก่อน date ดังนั้นการดูข้อมูลจะดีขึ้น
![image.png](attachment:fb766bb4-20d4-474e-9957-2c3b0a164cfe.png)

# Using merge_asof()

ใช้สำหรับ merge ข้อมูลในลักษณะของ left join

แต่แตกต่างกันตรงที่เวลา merge ข้อมูล

ตรงที่เป็น key เราจะ merge ตัวที่เกือบจะเหมือนกัน

เกือบในที่นี้ก็คือ น้อยกว่าหรือเท่ากับตัวตั้ง(ฝั่งซ้าย) # เป็น default ที่ตั้งไว้

แต่ถ้ากำหนดให้ภายในฟังก์ชัน direction="forward" จะเป็นกรณีตรงข้ามคือ มากกว่าหรือเท่ากับตัวตั้งคือฝั่งซ้าย

![image.png](attachment:c90ebae9-2f3a-4e53-adfa-3e08e6cd0fce.png)

In [None]:
# Use merge_asof() to merge jpm and wells
jpm_wells = pd.merge_asof(jpm, wells, on="date_time", suffixes=("", "_wells"), direction="nearest")


# Use merge_asof() to merge jpm_wells and bac
jpm_wells_bac = pd.merge_asof(jpm_wells, bac, on="date_time", suffixes=("_jpm", "_bac"), direction="nearest")


# Compute price diff
price_diffs = jpm_wells_bac.diff()

# Plot the price diff of the close of jpm, wells and bac only
price_diffs.plot(y=["close_jpm", "close_wells", "close_bac"])
plt.show()

![image.png](attachment:2b2fee27-f5ed-4e70-846e-8ea3cbbcf131.png)

![image.png](attachment:7a32c640-f408-4051-a4bc-a5d1e1ce5cdf.png)
![image.png](attachment:4f24bb1e-cb5e-4a72-98f2-7697a757f436.png)

In [None]:
# Merge gdp and recession on date using merge_asof()
gdp_recession = pd.merge_asof(gdp, recession, on="date")

# Create a list based on the row value of gdp_recession['econ_status']
is_recession = ["r" if s=='recession' else 'g' for s in gdp_recession['econ_status']]

# Plot a bar chart of gdp_recession
gdp_recession.plot(kind="bar", y="gdp", x="date", color=is_recession, rot=90)
plt.show()

![image.png](attachment:d9ab1109-f268-409c-8363-b3ed98c3e738.png)

![image.png](attachment:069322be-52f3-471c-9eb1-fa42af1a501e.png)

# Selecting data with .query()

![image.png](attachment:e66cf896-a110-492e-82b1-7d1870349c95.png)

![image.png](attachment:f032873d-3f39-4bd7-8ea9-3e2ee09ef57a.png)

![image.png](attachment:0879c7f3-6865-42ff-b3ac-3990917429dd.png)

![image.png](attachment:41a58800-cdc7-4953-a905-ba7af542822f.png)

![image.png](attachment:94d6c9fc-f57d-4c6f-beb5-220c7ae4a367.png)

![image.png](attachment:767eee04-5456-4df9-9289-2123bccd5c1a.png)

In [None]:
import pandas as pd

In [None]:
# Merge gdp and pop on date and country with fill
gdp_pop = pd.merge_ordered(gdp, pop, on=["country", "date"], fill_method="ffill")

![image.png](attachment:9fa26abc-a9b0-4ff3-9dbb-02778dcc2f88.png)

![image.png](attachment:aa925544-c2e6-4a7b-b439-a4f2be42dbe3.png)

In [None]:
# Add a column named gdp_per_capita to gdp_pop that divides the gdp by pop
gdp_pop['gdp_per_capita'] = gdp_pop['gdp'] / gdp_pop['pop']

![image.png](attachment:8a63cc21-1af7-42cb-b442-010c2fb1840a.png)

![image.png](attachment:604af0c9-0409-42e5-89e4-167460d0b9f9.png)

In [None]:
# Pivot table of gdp_per_capita, where index is date and columns is country
gdp_pivot = gdp_pop.pivot_table(values = "gdp_per_capita", index = "date", columns = "country")

![image.png](attachment:d85ce68a-9745-4025-989a-22ff091ddbe5.png)

![image.png](attachment:3406b8f3-0e61-4322-9cf2-66137c68bb61.png)

In [None]:
# Select dates equal to or greater than 1991-01-01
recent_gdp_pop = gdp_pivot.query("date >= '1991-01-01'")

# Plot recent_gdp_pop
recent_gdp_pop.plot(rot=90)
plt.show()

![image.png](attachment:d6d93ff7-036a-4414-9f57-79315ec9aaac.png)

![image.png](attachment:73bb924b-226e-42b5-b2a0-f2579c8c3b06.png)

# Reshaping data with .melt()

![image.png](attachment:102dbf47-4c9d-4609-9bae-2c0fc640e76f.png)

![image.png](attachment:389db6fb-311c-47fc-91f3-3824efe3c05c.png)

In [None]:
# unpivot everything besides the year column
ur_tall = ur_wide.melt(id_vars=["year"], var_name="month", value_name="unempl_rate")

# Create a date column using the month and year columns of ur_tall
ur_tall['date'] = pd.to_datetime(ur_tall['year'] + '-' + ur_tall['month'])

# Sort ur_tall by date in ascending order
ur_sorted = ur_tall.sort_values("date")

# Plot the unempl_rate by date
ur_sorted.plot(x="date", y="unempl_rate")
plt.show()

![image.png](attachment:00beb9ef-0feb-41b2-b09a-9e190cd98455.png)

![image.png](attachment:c1fb6c74-f97c-4347-9d60-6a7a0b26ecf2.png)

![image.png](attachment:b7fb6b45-f54c-4aae-a828-520c8549d0ba.png)

In [None]:
# Use melt on ten_yr, unpivot everything besides the metric column
bond_perc = ten_yr.melt(id_vars=["metric"], var_name="date", value_name="close")
print(bond_perc)

![image.png](attachment:b667f326-88a0-4514-8f39-9f8f941defdc.png)

In [1]:
# Use query on bond_perc to select only the rows where metric=close
bond_perc_close = bond_perc.query("metric == 'close'")
print(bond_perc_close)

NameError: name 'bond_perc' is not defined

![image.png](attachment:2e5cc0b8-888b-479a-ad5a-6ff15618e3cd.png)

In [None]:
# Merge (ordered) dji and bond_perc_close on date with an inner join
dow_bond = pd.merge_ordered(dji, bond_perc_close, on="date", how="inner", suffixes=("_dow", "_bond"))
print(dow_bond)

NameError: name 'pd' is not defined

![image.png](attachment:8fd0c842-af26-45a0-87da-e7b456e73a1b.png)

In [None]:
# Plot only the close_dow and close_bond columns
dow_bond.plot(y=["close_dow", "close_bond"], x='date', rot=90)
plt.show()

![image.png](attachment:bc1cb92c-b38c-4a42-8a72-38883c0c1b07.png)