In [1]:
from pymongo import MongoClient
import pandas as pd
import os
import pathlib
from dotenv import load_dotenv
import datetime
from fredapi import Fred

env_path = pathlib.Path('..') / '.env'
load_dotenv(dotenv_path=env_path)
FRED_API_KEY = os.getenv("FRED_API_KEY")
fred = Fred(api_key=FRED_API_KEY)


client = MongoClient('localhost', 27017)
db = client.project2_db
collection = db.project2_collection

# Define your series dictionary, start and end dates as before
series_dict = {
    'bonds2tr': ('DGS2', 'D'),
    'gdpworld': ('NYGDPMKTPCDWLD', 'A'),
    'gdp': ('GDP', 'Q'),
    'recession': ('JHDUSRGDPBR', 'Q'),
    'bonds10tr': ('DGS10', 'D'),
    'ppi': ('PPIACO', 'M'),
    'cpi': ('CPIAUCSL', 'M'),
    'unrate': ('UNRATE', 'M'),
    'debt': ('GFDEBTN', 'Q'),
    'fedrate': ('FEDFUNDS', 'M'),
    'm0': ('BOGMBASE', 'M'),
    'm3': ('MABMM301USM189S', 'M'),
    'cbasstogdp': ('DDDI06USA156NWDB', 'A'),
    'resins': ('TOTRESNS', 'M'),
    'oil': ('WTISPLC', 'M'),
    'indpro': ('INDPRO', 'M'),
    'houses': ('MSPUS', 'Q'),
    'wages': ('AHETPI', 'M'),
    'cp': ('CP', 'Q')
}

# Initialize DataFrames
dfs = {'D': pd.DataFrame(), 'M': pd.DataFrame(), 'Q': pd.DataFrame(), 'A': pd.DataFrame()}


def fetch_data(df, var_name, series_id):
    series_data = fred.get_series(series_id)
    series_df = series_data.to_frame(name=var_name)
    series_df.index.name = 'date'
    return df.join(series_df, how='outer') if not df.empty else series_df


for var_name, (series_id, freq) in series_dict.items():
    dfs[freq] = fetch_data(dfs[freq], var_name, series_id)

In [2]:
df = {}
df['D_M'] = dfs['D'].resample('M').last()
df['M_M'] = dfs['M'].resample('M').last()
df['Q_M'] = dfs['Q'].resample('M').ffill()
# new_dates = pd.date_range(
#     start=df['Q_M'].index.min(), end='2022-12-31', freq='M')
# df['Q_M'] = df['Q_M'].reindex(new_dates).ffill()
df['A_M'] = dfs['A'].resample('M').ffill()
# data.tail()

In [6]:
data = pd.concat([df['D_M'], df['M_M'], df['Q_M'], df['A_M']], axis=1)
data = data.loc['1970-01-01':]
data.head(5)

Unnamed: 0_level_0,bonds2tr,bonds10tr,ppi,cpi,unrate,fedrate,m0,m3,resins,oil,indpro,wages,gdp,recession,debt,houses,cp,gdpworld,cbasstogdp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1970-01-31,,7.75,36.5,37.9,3.9,8.98,76400.0,589600000000.0,28.9,3.35,37.9372,3.31,1051.2,1.0,372007.0,23900.0,55.323,2997270000000.0,5.69662
1970-02-28,,6.9,36.7,38.1,4.2,8.98,75200.0,586300000000.0,27.9,3.35,37.9122,3.33,1051.2,1.0,372007.0,23900.0,55.323,2997270000000.0,5.69662
1970-03-31,,7.08,36.7,38.3,4.4,7.76,75300.0,587300000000.0,27.5,3.35,37.863,3.36,1051.2,1.0,372007.0,23900.0,55.323,2997270000000.0,5.69662
1970-04-30,,7.82,36.8,38.5,4.6,8.1,76300.0,588400000000.0,28.1,3.35,37.7656,3.36,1067.375,1.0,370094.0,24400.0,55.194,2997270000000.0,5.69662
1970-05-31,,7.95,36.8,38.6,4.8,7.95,76600.0,591500000000.0,27.9,3.35,37.7216,3.37,1067.375,1.0,370094.0,24400.0,55.194,2997270000000.0,5.69662


In [16]:
data['wages_month'] = round(data['wages'] * 168, 2)
data['house_wages'] = round(data['houses']/data['wages_month'], 2)
data['iyc'] = round(data['bonds10tr'] - data['bonds2tr'], 2)
data['gdp_pct'] = round(data['gdp'].pct_change(periods=4) * 100, 2)
data['gdp_pct_ma4'] = round(data['gdp_pct'].rolling(window=4).mean(), 2)
data['gdpworld_pct'] = round(data['gdpworld'].pct_change(12) * 100, 2)
data['debt_to_gdp'] = round(100 * data['debt'] / data['gdp']/1000, 2)
data['resins_to_gdp'] = round(100 * data['resins'] / data['gdp'], 2)
data['cp_to_gdp'] = round(100 * data['cp'] / data['gdp'], 2)
data['m0_to_gdp'] = round(data['m0'] / data['gdp']/1000 * 100, 2)
data['m3_to_gdp'] = round((data['m3']/1000000) / data['gdp']/1000 * 100, 2)
data['cpi_pct'] = round(data['cpi'].pct_change(periods=12) * 100, 2)
data['ppi_pct'] = round(data['ppi'].pct_change(periods=12) * 100, 2)
data['cbasstogdp'] = round(data['cbasstogdp'], 2)
data['indpro'] = round(data['indpro'], 2)
data['oil'] = round(data['oil'], 2)
data[[ 'gdp_pct', 'gdp_pct_ma4', 'gdpworld', 'ppi', 'resins_to_gdp', 'cp_to_gdp']].tail(15)

Unnamed: 0_level_0,gdp_pct,gdp_pct_ma4,gdpworld,ppi,resins_to_gdp,cp_to_gdp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-09-30,1.76,2.36,,267.898,12.05,11.59
2022-10-31,3.38,2.69,,265.061,11.57,10.79
2022-11-30,1.59,2.12,,263.157,11.84,10.79
2022-12-31,1.59,2.08,,257.897,11.77,10.79
2023-01-31,3.15,2.43,,260.227,11.3,10.74
2023-02-28,1.53,1.96,,258.669,11.27,10.74
2023-03-31,1.53,1.95,,257.062,12.15,10.74
2023-04-30,2.48,2.17,,256.908,12.08,10.73
2023-05-31,0.93,1.62,,253.67,11.96,10.73
2023-06-30,0.93,1.47,,253.908,12.07,10.73


In [28]:
data.index.name = 'date'
data = data.reset_index()

data_to_insert = data.to_dict(orient='records')
collection.insert_many(data_to_insert)

<pymongo.results.InsertManyResult at 0x1eaccef6dd0>