In [1]:
from pymongo import MongoClient
import pandas as pd
import os
import pathlib
from dotenv import load_dotenv
import datetime
from fredapi import Fred

env_path = pathlib.Path('..') / '.env'
load_dotenv(dotenv_path=env_path)
FRED_API_KEY = os.getenv("FRED_API_KEY")
fred = Fred(api_key=FRED_API_KEY)


client = MongoClient('localhost', 27017)
db = client.project2_db
collection = db.project2_collection

# Define your series dictionary, start and end dates as before
series_dict = {
    'bonds2tr': ('DGS2', 'D'),
    'gdpworld': ('NYGDPMKTPCDWLD', 'A'),
    'gdp': ('GDP', 'Q'),
    'recession': ('JHDUSRGDPBR', 'Q'),
    'bonds10tr': ('DGS10', 'D'),
    'ppi': ('PPIACO', 'M'),
    'cpi': ('CPIAUCSL', 'M'),
    'unrate': ('UNRATE', 'M'),
    'debt': ('GFDEBTN', 'Q'),
    'fedrate': ('FEDFUNDS', 'M'),
    'm0': ('BOGMBASE', 'M'),
    'm3': ('MABMM301USM189S', 'M'),
    'cbasstogdp': ('DDDI06USA156NWDB', 'A'),
    'resins': ('TOTRESNS', 'M'),
    'oil': ('WTISPLC', 'M'),
    'indpro': ('INDPRO', 'M'),
    'houses': ('MSPUS', 'Q'),
    'wages': ('CES0500000003', 'M'),
    'cp': ('CP', 'Q')
}

# Define dates
start_date = datetime.datetime(1970, 1, 1)
end_date = datetime.datetime(2022, 12, 31)

# Initialize DataFrames
dfs = {'D': pd.DataFrame(), 'M': pd.DataFrame(), 'Q': pd.DataFrame(), 'A': pd.DataFrame()}


def fetch_data(df, var_name, series_id):
    series_data = fred.get_series(series_id)
    series_df = series_data.to_frame(name=var_name)
    series_df.index.name = 'date'
    return df.join(series_df, how='outer') if not df.empty else series_df


for var_name, (series_id, freq) in series_dict.items():
    dfs[freq] = fetch_data(dfs[freq], var_name, series_id)

In [2]:
df = {}
df['D_M'] = dfs['D'].resample('M').last()
df['M_M'] = dfs['M'].resample('M').last()
df['Q_M'] = dfs['Q'].resample('M').ffill()
new_dates = pd.date_range(
    start=df['Q_M'].index.min(), end='2022-12-31', freq='M')
df['Q_M'] = df['Q_M'].reindex(new_dates).ffill()
df['Q_M']

Unnamed: 0,gdp,recession,debt,houses,cp
1946-01-31,,,,,
1946-02-28,,,,,
1946-03-31,,,,,
1946-04-30,,,,,
1946-05-31,,,,,
...,...,...,...,...,...
2022-08-31,25994.639,0.0,30928912.0,468000.0,3013.486
2022-09-30,25994.639,0.0,30928912.0,468000.0,3013.486
2022-10-31,26408.405,0.0,31419689.0,479500.0,2850.085
2022-11-30,26408.405,0.0,31419689.0,479500.0,2850.085


In [3]:
data = pd.concat([df['D_M'], df['M_M'], df['Q_M']], axis=1)

In [7]:
data['iyc'] = round(data['bonds10tr'] - data['bonds2tr'], 2)
data['gdp_pct'] = data['gdp'].pct_change() * 100
data['debt_to_gdp'] = round(data['debt'] / data['gdp']/1000, 2)
data['m0_to_gdp'] = round(data['m0'] / data['gdp']/1000 * 100, 2)
data['m3_to_gdp'] = round((data['m3']/1000000) / data['gdp']/1000 * 100, 2)
data['cpi_pct'] = data['cpi'].pct_change() * 100
data['ppi_pct'] = data['ppi'].pct_change() * 100
data['ma_cpi'] = data['cpi_pct'].rolling(window=8).mean()
data['ma_unemp'] = data['unrate'].rolling(window=8).mean()

# data

In [9]:
# data = data[start_date:end_date]

In [12]:
# data.index.name = 'date'
# data = data.reset_index()

data_to_insert = data.to_dict(orient='records')
collection.insert_many(data_to_insert)

<pymongo.results.InsertManyResult at 0x238876b2290>

In [184]:
# data.to_csv('data.csv')
# data['date'] = pd.to_datetime(data['date'])
# data = data.set_index('date', inplace=True)
data.index.name = 'date'
data


Unnamed: 0_level_0,bonds2tr,bonds10tr,ppi,cpi,unrate,fedrate,m0,m3,resins,oil,...,debt,houses,cp,iyc,gdp_pct,debt_to_gdp,m0_to_gdp,m3_to_gdp,cpi_pct,ppi_pct
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1970-01-31,,7.75,36.500,37.900,3.9,8.98,76400.0,5.896000e+11,28.9,3.35,...,372007.0,23900.0,55.323,,1.257336,0.35,7.27,56.09,0.530504,0.550964
1970-02-28,,6.90,36.700,38.100,4.2,8.98,75200.0,5.863000e+11,27.9,3.35,...,372007.0,23900.0,55.323,,0.000000,0.35,7.15,55.77,0.527704,0.547945
1970-03-31,,7.08,36.700,38.300,4.4,7.76,75300.0,5.873000e+11,27.5,3.35,...,372007.0,23900.0,55.323,,0.000000,0.35,7.16,55.87,0.524934,0.000000
1970-04-30,,7.82,36.800,38.500,4.6,8.10,76300.0,5.884000e+11,28.1,3.35,...,370094.0,24400.0,55.194,,1.538718,0.35,7.15,55.13,0.522193,0.272480
1970-05-31,,7.95,36.800,38.600,4.8,7.95,76600.0,5.915000e+11,27.9,3.35,...,370094.0,24400.0,55.194,,0.000000,0.35,7.18,55.42,0.259740,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-08-31,3.45,3.15,269.546,295.320,3.7,2.33,5582200.0,2.165990e+13,3305.9,93.67,...,30928912.0,468000.0,3013.486,-0.30,0.000000,1.19,21.47,83.32,0.234872,-1.001932
2022-09-30,4.22,3.83,267.898,296.539,3.5,2.56,5410900.0,2.152520e+13,3131.4,84.26,...,30928912.0,468000.0,3013.486,-0.39,0.000000,1.19,20.82,82.81,0.412773,-0.611398
2022-10-31,4.51,4.10,265.061,297.987,3.7,3.08,5339600.0,2.143270e+13,3055.7,87.55,...,31419689.0,479500.0,2850.085,-0.41,1.591736,1.19,20.22,81.16,0.488300,-1.058985
2022-11-30,4.38,3.68,263.157,298.598,3.6,3.78,5418700.0,2.139870e+13,3126.2,84.37,...,31419689.0,479500.0,2850.085,-0.70,0.000000,1.19,20.52,81.03,0.205043,-0.718325


In [191]:
quaterly_last_record = data.resample('Q').last().dropna(how='all')
quaterly_last_record.to_csv('quaterly_last_record.csv')


In [190]:
# quaterly_last_record