In [72]:
from pymongo import MongoClient
import pandas as pd
import os
import pathlib
from dotenv import load_dotenv
import datetime
from fredapi import Fred

env_path = pathlib.Path('..') / '.env'
load_dotenv(dotenv_path=env_path)
FRED_API_KEY = os.getenv("FRED_API_KEY")
fred = Fred(api_key=FRED_API_KEY)


client = MongoClient('localhost', 27017)
db = client.project2_db
collection = db.project2_collection

# Define your series dictionary, start and end dates as before
series_dict = {
    'bonds2tr': ('DGS2', 'D'),
    'gdpworld': ('NYGDPMKTPCDWLD', 'A'),
    'gdp': ('GDP', 'Q'),
    'recession': ('JHDUSRGDPBR', 'Q'),
    'bonds10tr': ('DGS10', 'D'),
    'ppi': ('PPIACO', 'M'),
    'cpi': ('CPIAUCSL', 'M'),
    'unrate': ('UNRATE', 'M'),
    # 'debt': ('GFDEBTN', 'Q'),
    # 'fedrate': ('FEDFUNDS', 'M'),
    # 'm0': ('BOGMBASE', 'M'),
    # 'm3': ('MABMM301USM189S', 'M'),
    # 'cbasstogdp': ('DDDI06USA156NWDB', 'A'),
    # 'resins': ('TOTRESNS', 'M'),
    # 'oil': ('WTISPLC', 'M'),
    # 'indpro': ('INDPRO', 'M'),
    # 'houses': ('MSPUS', 'Q'),
    # 'cp': ('CP', 'Q')
}

# Define dates
start_date = datetime.datetime(1970, 1, 1)
end_date = datetime.datetime(2022, 12, 31)

# Initialize DataFrames
dfs = {'D': pd.DataFrame(), 'M': pd.DataFrame(), 'Q': pd.DataFrame(), 'A': pd.DataFrame()}


def fetch_and_merge(df, var_name, series_id, start, end):
    series_data = fred.get_series(series_id)
    series_data = series_data.loc[start_date:end_date]
    series_df = series_data.to_frame(name=var_name)
    series_df.index.name = 'date'
    return df.join(series_df, how='inner') if not df.empty else series_df


# Loop through each series
for var_name, (series_id, freq) in series_dict.items():
    dfs[freq] = fetch_and_merge(
        dfs[freq], var_name, series_id, start_date, end_date)

In [73]:
df = {}
df['D_M'] = dfs['D'].resample('M').last()
df['M_M'] = dfs['M'].resample('M').last()
df['Q_M'] = dfs['Q'].resample('M').ffill()
new_dates = pd.date_range(
    start=df['Q_M'].index.min(), end='2022-12-31', freq='M')
df['Q_M'] = df['Q_M'].reindex(new_dates).ffill()

data = pd.concat([df['D_M'], df['M_M'], df['Q_M']], axis=1)
data

Unnamed: 0,bonds2tr,bonds10tr,ppi,cpi,unrate,gdp,recession
1970-01-31,,,36.500,37.900,3.9,1051.200,1.0
1970-02-28,,,36.700,38.100,4.2,1051.200,1.0
1970-03-31,,,36.700,38.300,4.4,1051.200,1.0
1970-04-30,,,36.800,38.500,4.6,1067.375,1.0
1970-05-31,,,36.800,38.600,4.8,1067.375,1.0
...,...,...,...,...,...,...,...
2022-08-31,3.45,3.15,269.546,295.320,3.7,25994.639,0.0
2022-09-30,4.22,3.83,267.898,296.539,3.5,25994.639,0.0
2022-10-31,4.51,4.10,265.061,297.987,3.7,26408.405,0.0
2022-11-30,4.38,3.68,263.157,298.598,3.6,26408.405,0.0


In [74]:
data.index.name = 'date'
data = data.reset_index()

data_to_insert = data.to_dict(orient='records')
collection.insert_many(data_to_insert)

<pymongo.results.InsertManyResult at 0x215e7c2e8c0>