In [14]:
import pandas as pd

# -------- Table 1: Price Data --------
price_data = {
    "date": ["2024-01-01", "2024-01-02", "2024-01-03", "2024-01-04"],
    "open": [135.2, 136.9, 138.2, 136.5],
    "high": [137.8, 138.0, 139.8, 138.2],
    "low": [134.6, 135.4, 134.4, 135.7],
    "close": [136.9, 137.4, 136.4, 137.9],
    "volume": [1900000, 2100000, 1800000, 2300000]
}

price = pd.DataFrame(price_data)
price["date"] = pd.to_datetime(price["date"])  # make date proper datetime

# -------- Table 2: Fundamentals Data --------
fundamental_data = {
    "date": ["2024-01-01", "2024-01-02"],
    "pe_ratio": [28.4, 28.6],
    "market_cap": [1.7e12, 1.72e12]
}

fundamentals = pd.DataFrame(fundamental_data)
fundamentals["date"] = pd.to_datetime(fundamentals["date"])

# -------- Table 2: Fundamentals Data --------
fundamental_data2 = {
    "date": ["2024-01-03", "2024-01-04"],
    "pe_ratio": [28.5, 28.3],
    "market_cap": [1.73e12, 1.74e12]
}

fundamentals2 = pd.DataFrame(fundamental_data2)
fundamentals2["date"] = pd.to_datetime(fundamentals2["date"])

# Show them
print("Price Table:")
print(price, "\n")

print("Fundamentals Tables:")
print(fundamentals, fundamentals2)



Price Table:
        date   open   high    low  close   volume
0 2024-01-01  135.2  137.8  134.6  136.9  1900000
1 2024-01-02  136.9  138.0  135.4  137.4  2100000
2 2024-01-03  138.2  139.8  134.4  136.4  1800000
3 2024-01-04  136.5  138.2  135.7  137.9  2300000 

Fundamentals Tables:
        date  pe_ratio    market_cap
0 2024-01-01      28.4  1.700000e+12
1 2024-01-02      28.6  1.720000e+12         date  pe_ratio    market_cap
0 2024-01-03      28.5  1.730000e+12
1 2024-01-04      28.3  1.740000e+12


1. Merge price and fundamental on "date" using an inner join.

In [15]:
price.merge(fundamentals, on = 'date', how= 'inner')

Unnamed: 0,date,open,high,low,close,volume,pe_ratio,market_cap
0,2024-01-01,135.2,137.8,134.6,136.9,1900000,28.4,1700000000000.0
1,2024-01-02,136.9,138.0,135.4,137.4,2100000,28.6,1720000000000.0


2. Merge fundamentals into price, keeping all price rows even if fundamentals missing.

In [16]:
price.merge(fundamentals, on= 'date', how= 'left')

Unnamed: 0,date,open,high,low,close,volume,pe_ratio,market_cap
0,2024-01-01,135.2,137.8,134.6,136.9,1900000,28.4,1700000000000.0
1,2024-01-02,136.9,138.0,135.4,137.4,2100000,28.6,1720000000000.0
2,2024-01-03,138.2,139.8,134.4,136.4,1800000,,
3,2024-01-04,136.5,138.2,135.7,137.9,2300000,,


3. Merge only the "pe_ratio" column from df_fundamental into df_price.

In [17]:
price.merge(fundamentals[['date', 'pe_ratio']], on = 'date')

Unnamed: 0,date,open,high,low,close,volume,pe_ratio
0,2024-01-01,135.2,137.8,134.6,136.9,1900000,28.4
1,2024-01-02,136.9,138.0,135.4,137.4,2100000,28.6


4. Assume:

- df_price has date

- df_fundamental has report_date (update this name temporarily for this exercise)

Question:
Merge these even though columns have different names.

In [18]:
fundamentals.rename(columns={'date': 'report_date'}, inplace = True)
price.merge(fundamentals, left_on = 'date', right_on = 'report_date')

Unnamed: 0,date,open,high,low,close,volume,report_date,pe_ratio,market_cap
0,2024-01-01,135.2,137.8,134.6,136.9,1900000,2024-01-01,28.4,1700000000000.0
1,2024-01-02,136.9,138.0,135.4,137.4,2100000,2024-01-02,28.6,1720000000000.0


In [19]:
fundamentals.rename(columns={'report_date': 'date'}, inplace = True)

5. After a left join, find all rows where the PE ratio is missing.

In [20]:
merged = price.merge(fundamentals, on= 'date', how= 'left')
merged.isna()

Unnamed: 0,date,open,high,low,close,volume,pe_ratio,market_cap
0,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,True,True
3,False,False,False,False,False,False,True,True


6. Stack two fundamentals tables vertically.

In [21]:
fundamentals_merged = pd.concat([fundamentals, fundamentals2], axis = 0)
fundamentals_merged= fundamentals_merged.reset_index()
fundamentals_merged#.drop('index', inplace = True)

Unnamed: 0,index,date,pe_ratio,market_cap
0,0,2024-01-01,28.4,1700000000000.0
1,1,2024-01-02,28.6,1720000000000.0
2,0,2024-01-03,28.5,1730000000000.0
3,1,2024-01-04,28.3,1740000000000.0


In [22]:
fundamentals_merged.columns

Index(['index', 'date', 'pe_ratio', 'market_cap'], dtype='object')

7. Combine fundamentals with prices by connecting two dataframes horizontally.

In [23]:
combined = pd.concat([price, fundamentals_merged], axis = 1)
combined

Unnamed: 0,date,open,high,low,close,volume,index,date.1,pe_ratio,market_cap
0,2024-01-01,135.2,137.8,134.6,136.9,1900000,0,2024-01-01,28.4,1700000000000.0
1,2024-01-02,136.9,138.0,135.4,137.4,2100000,1,2024-01-02,28.6,1720000000000.0
2,2024-01-03,138.2,139.8,134.4,136.4,1800000,0,2024-01-03,28.5,1730000000000.0
3,2024-01-04,136.5,138.2,135.7,137.9,2300000,1,2024-01-04,28.3,1740000000000.0


8. Merge on multiple keys.

In [27]:
# --- Stock prices table ---
stock_prices = pd.DataFrame({
    "date": [
        "2025-01-01", "2025-01-01", "2025-01-01",
        "2025-01-02", "2025-01-02", "2025-01-02",
        "2025-01-03", "2025-01-03", "2025-01-03",
    ],
    "ticker": ["GOOG", "MSFT", "NVDA"] * 3,
    "close": [140, 330, 500, 142, 335, 510, 138, 332, 505],
    "volume": [1.1e6, 2.0e6, 3.5e6, 1.2e6, 2.2e6, 3.8e6, 1.3e6, 2.1e6, 3.6e6]
})

stock_prices["date"] = pd.to_datetime(stock_prices["date"])

fundamental_data = {
    "date": ["2025-01-01", "2025-01-01", "2025-01-01"],
    "ticker": ["GOOG", "MSFT", "NVDA"],
    "pe_ratio": [28.4, 28.6, 30.3],
    "market_cap": [1.7e12, 1.72e12, 2.4e12]
}

fundamental_data = pd.DataFrame(fundamental_data)
fundamental_data["date"] = pd.to_datetime(fundamental_data["date"])

In [28]:
fundamental_data.merge(stock_prices, on = ["date", "ticker"])

Unnamed: 0,date,ticker,pe_ratio,market_cap,close,volume
0,2025-01-01,GOOG,28.4,1700000000000.0,140,1100000.0
1,2025-01-01,MSFT,28.6,1720000000000.0,330,2000000.0
2,2025-01-01,NVDA,30.3,2400000000000.0,500,3500000.0


9. Semi-Join: Keep rows from price where matching date exists in fundamentals without actually merging the data.

In [41]:
stock_prices[stock_prices['date'].isin(fundamental_data['date'])]

Unnamed: 0,date,ticker,close,volume
0,2025-01-01,GOOG,140,1100000.0
1,2025-01-01,MSFT,330,2000000.0
2,2025-01-01,NVDA,500,3500000.0


10. Anti-Join: Find price dates that do not appear in fundamentals.

In [43]:
stock_prices['date'][~stock_prices['date'].isin(fundamental_data['date'])]

3   2025-01-02
4   2025-01-02
5   2025-01-02
6   2025-01-03
7   2025-01-03
8   2025-01-03
Name: date, dtype: datetime64[ns]