In [10]:
import pandas as pd
import sqlite3

# Connect to the SQLite database
conn = sqlite3.connect('../food_inflation_analysis.db')

# Get a list of all tables in the database
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()

# Create an empty DataFrame to store the combined data
combined_data = pd.DataFrame()



In [None]:
# Create a dictionary to store the DataFrames
dfs = {}

# Iterate over each table
for table in tables:
    table_name = table[0]
    
    # Read the table data into a DataFrame
    dfs[table_name] = pd.read_sql_query(f"SELECT * FROM {table_name}", conn)




In [11]:
tables

[('gas_prices',),
 ('OCED_USA_FOOD_INFLATION',),
 ('DOW_JONES_REAL',),
 ('USA_MEAT_EXPORT_IMPORT',),
 ('food_production',),
 ('interest_rate',)]

In [41]:
merged_df = pd.merge(dfs['gas_prices'], dfs['OCED_USA_FOOD_INFLATION'], on='Date', how='inner')

In [45]:
merged_df.rename({'Price':'gas_price'}, axis=1,inplace=True)

In [46]:
merged_df

Unnamed: 0,Date,gas_price,Inflation
0,1993-04-01 00:00:00,1.078,1.892285
1,1993-05-01 00:00:00,1.100,3.303965
2,1993-06-01 00:00:00,1.097,2.351212
3,1993-07-01 00:00:00,1.078,2.505527
4,1993-08-01 00:00:00,1.062,2.045289
...,...,...,...
361,2023-05-01 00:00:00,3.666,5.647190
362,2023-06-01 00:00:00,3.684,4.581234
363,2023-07-01 00:00:00,3.712,3.485868
364,2023-08-01 00:00:00,3.954,2.893452


In [47]:
merged_df = pd.merge(merged_df, dfs['DOW_JONES_REAL'], on='Date', how='inner')
merged_df

Unnamed: 0,Date,gas_price,Inflation,real-price
0,1993-04-01 00:00:00,1.078,1.892285,7324.67
1,1993-05-01 00:00:00,1.100,3.303965,7527.54
2,1993-06-01 00:00:00,1.097,2.351212,7496.28
3,1993-07-01 00:00:00,1.078,2.505527,7546.15
4,1993-08-01 00:00:00,1.062,2.045289,7762.56
...,...,...,...,...
361,2023-05-01 00:00:00,3.666,5.647190,33303.17
362,2023-06-01 00:00:00,3.684,4.581234,34717.27
363,2023-07-01 00:00:00,3.712,3.485868,35808.45
364,2023-08-01 00:00:00,3.954,2.893452,34791.35


In [48]:
merged_df.rename({'real-price':'dow_jones_real-price'}, axis=1,inplace=True)

In [49]:
merged_df

Unnamed: 0,Date,gas_price,Inflation,dow_jones_real-price
0,1993-04-01 00:00:00,1.078,1.892285,7324.67
1,1993-05-01 00:00:00,1.100,3.303965,7527.54
2,1993-06-01 00:00:00,1.097,2.351212,7496.28
3,1993-07-01 00:00:00,1.078,2.505527,7546.15
4,1993-08-01 00:00:00,1.062,2.045289,7762.56
...,...,...,...,...
361,2023-05-01 00:00:00,3.666,5.647190,33303.17
362,2023-06-01 00:00:00,3.684,4.581234,34717.27
363,2023-07-01 00:00:00,3.712,3.485868,35808.45
364,2023-08-01 00:00:00,3.954,2.893452,34791.35


In [53]:
# 检查 'Date' 列的数据类型
print(merged_df['Date'].dtype)

# 如果 'Date' 列的数据类型不是 datetime，将其转换为 datetime
if merged_df['Date'].dtype != 'datetime64[ns]':
    merged_df['Date'] = pd.to_datetime(merged_df['Date'])

# 从 'Date' 列中提取年份并将其存储在新的 'Year' 列中
merged_df['Year'] = merged_df['Date'].dt.year

object


In [54]:
merged_df

Unnamed: 0,Date,gas_price,Inflation,dow_jones_real-price,Year
0,1993-04-01,1.078,1.892285,7324.67,1993
1,1993-05-01,1.100,3.303965,7527.54,1993
2,1993-06-01,1.097,2.351212,7496.28,1993
3,1993-07-01,1.078,2.505527,7546.15,1993
4,1993-08-01,1.062,2.045289,7762.56,1993
...,...,...,...,...,...
361,2023-05-01,3.666,5.647190,33303.17,2023
362,2023-06-01,3.684,4.581234,34717.27,2023
363,2023-07-01,3.712,3.485868,35808.45,2023
364,2023-08-01,3.954,2.893452,34791.35,2023


In [55]:
# 将dfs['USA_MEAT_EXPORT_IMPORT']和merge_df按照Year列进行合并
merge_df = pd.merge(merged_df, dfs['USA_MEAT_EXPORT_IMPORT'], on='Year', how='inner')

merge_df


Unnamed: 0,Date,gas_price,Inflation,dow_jones_real-price,Year,Exports,Imports
0,1993-04-01,1.078,1.892285,7324.67,1993,2.338679e+10,1.731756e+10
1,1993-05-01,1.100,3.303965,7527.54,1993,2.338679e+10,1.731756e+10
2,1993-06-01,1.097,2.351212,7496.28,1993,2.338679e+10,1.731756e+10
3,1993-07-01,1.078,2.505527,7546.15,1993,2.338679e+10,1.731756e+10
4,1993-08-01,1.062,2.045289,7762.56,1993,2.338679e+10,1.731756e+10
...,...,...,...,...,...,...,...
361,2023-05-01,3.666,5.647190,33303.17,2023,7.199305e+10,1.524754e+10
362,2023-06-01,3.684,4.581234,34717.27,2023,7.199305e+10,1.524754e+10
363,2023-07-01,3.712,3.485868,35808.45,2023,7.199305e+10,1.524754e+10
364,2023-08-01,3.954,2.893452,34791.35,2023,7.199305e+10,1.524754e+10


In [56]:
merged_df = pd.merge(merged_df, dfs['food_production'], on='Year', how='inner')
merged_df

Unnamed: 0,Date,gas_price,Inflation,dow_jones_real-price,Year,Cereal_Production,Food_Production,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,lag_8,lag_9
0,1993-04-01,1.078,1.892285,7324.67,1993,259105342.4,67.27,73.34,67.32,68.81,66.52,60.46,65.48,66.19,68.59,65.41
1,1993-05-01,1.100,3.303965,7527.54,1993,259105342.4,67.27,73.34,67.32,68.81,66.52,60.46,65.48,66.19,68.59,65.41
2,1993-06-01,1.097,2.351212,7496.28,1993,259105342.4,67.27,73.34,67.32,68.81,66.52,60.46,65.48,66.19,68.59,65.41
3,1993-07-01,1.078,2.505527,7546.15,1993,259105342.4,67.27,73.34,67.32,68.81,66.52,60.46,65.48,66.19,68.59,65.41
4,1993-08-01,1.062,2.045289,7762.56,1993,259105342.4,67.27,73.34,67.32,68.81,66.52,60.46,65.48,66.19,68.59,65.41
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
340,2021-08-01,3.255,2.980927,39780.82,2021,452628438.9,105.46,104.05,99.65,103.20,101.96,104.86,97.01,98.13,95.81,90.90
341,2021-09-01,3.272,4.516333,37972.88,2021,452628438.9,105.46,104.05,99.65,103.20,101.96,104.86,97.01,98.13,95.81,90.90
342,2021-10-01,3.384,5.382061,39867.17,2021,452628438.9,105.46,104.05,99.65,103.20,101.96,104.86,97.01,98.13,95.81,90.90
343,2021-11-01,3.491,6.385879,38173.48,2021,452628438.9,105.46,104.05,99.65,103.20,101.96,104.86,97.01,98.13,95.81,90.90


In [59]:
# 检查 'Date' 列的数据类型
print(dfs['interest_rate']['Date'].dtype)

# 如果 'Date' 列的数据类型不是 datetime，将其转换为 datetime
if dfs['interest_rate']['Date'].dtype != 'datetime64[ns]':
    dfs['interest_rate']['Date'] = pd.to_datetime(dfs['interest_rate']['Date'])

object


In [60]:
merged_df = pd.merge(merged_df, dfs['interest_rate'], on='Date', how='inner')
merged_df

Unnamed: 0,Date,gas_price,Inflation,dow_jones_real-price,Year,Cereal_Production,Food_Production,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,lag_8,lag_9,FEDFUNDS
0,1993-04-01,1.078,1.892285,7324.67,1993,259105342.4,67.27,73.34,67.32,68.81,66.52,60.46,65.48,66.19,68.59,65.41,2.96
1,1993-05-01,1.100,3.303965,7527.54,1993,259105342.4,67.27,73.34,67.32,68.81,66.52,60.46,65.48,66.19,68.59,65.41,3.00
2,1993-06-01,1.097,2.351212,7496.28,1993,259105342.4,67.27,73.34,67.32,68.81,66.52,60.46,65.48,66.19,68.59,65.41,3.04
3,1993-07-01,1.078,2.505527,7546.15,1993,259105342.4,67.27,73.34,67.32,68.81,66.52,60.46,65.48,66.19,68.59,65.41,3.06
4,1993-08-01,1.062,2.045289,7762.56,1993,259105342.4,67.27,73.34,67.32,68.81,66.52,60.46,65.48,66.19,68.59,65.41,3.03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
340,2021-08-01,3.255,2.980927,39780.82,2021,452628438.9,105.46,104.05,99.65,103.20,101.96,104.86,97.01,98.13,95.81,90.90,0.09
341,2021-09-01,3.272,4.516333,37972.88,2021,452628438.9,105.46,104.05,99.65,103.20,101.96,104.86,97.01,98.13,95.81,90.90,0.08
342,2021-10-01,3.384,5.382061,39867.17,2021,452628438.9,105.46,104.05,99.65,103.20,101.96,104.86,97.01,98.13,95.81,90.90,0.08
343,2021-11-01,3.491,6.385879,38173.48,2021,452628438.9,105.46,104.05,99.65,103.20,101.96,104.86,97.01,98.13,95.81,90.90,0.08


In [None]:
# Close the database connection
conn.close()

In [20]:
dfs['food_inflation']

KeyError: 'food_inflation'