In [1]:
import pandas as pd

In [2]:
# Import csvs

# Monthly data
cpi = pd.read_csv("../other/raw/CPI-AUCSA.csv")
housing_starts = pd.read_csv("../other/raw/HOUSING-STARTS.csv")
unemp_rt = pd.read_csv("../other/raw/UNRATENSA.csv")

# Quarterly data
gdp_pct_chg = pd.read_csv("../other/raw/GDP-PCT-CHG-SAAR.csv")
gdp = pd.read_csv("../other/raw/GDP-SAAR.csv")
output_gap = pd.read_csv("../other/raw/OUTPUT-GAP.csv")
recession_dt = pd.read_csv("../other/raw/RECESSIONDATES.csv")

### Add 'quarter' column to all data sets

In [3]:
# Store data sets in a list
mos_data = [cpi, housing_starts, unemp_rt, gdp_pct_chg, gdp, output_gap, recession_dt]

In [4]:
# Iterate through data sets
for i in mos_data:
    
    # Add empty column to store quarters
    i['quarter'] = '' 
    
    # Convert date column from string to datetime data type
    i.date = pd.to_datetime(i.date, format='%Y-%m-%d')
    
    # Iterate through data frame to define the quarter for each row
    for j in range(len(i)):
    
        # Define month and year variable
        month = i.date[j].month
        year = str(i.date[j].year)

        # Set conditions to choose the quarter
        if month in [1, 2, 3]:
            quarter = 'Q1'

        elif month in [4, 5, 6]:
            quarter = 'Q2'

        elif month in [7, 8, 9]:
            quarter = 'Q3'

        else:
            quarter = 'Q4'

        # Populate 'quarter' column
        i.iloc[j, i.columns.get_loc('quarter')] = year + quarter

#### Group monthly data by quarter and rename columns

In [5]:
# Group by quarter and take average mean
cpi_avg = cpi.groupby('quarter').mean().reset_index()
avg_housing_starts = housing_starts.groupby('quarter').mean().reset_index()
avg_unemp_rt = unemp_rt.groupby('quarter').mean().reset_index()

In [6]:
# Rename columns
cpi_avg.rename(columns={'consumer_price_index': 'avg_consumer_price_index'}, inplace=True)
avg_housing_starts.rename(columns={'housing_starts':'avg_housing_starts'}, inplace=True)
avg_unemp_rt.rename(columns={'unemployment_rate':'avg_unemployment_rate'}, inplace=True)

### Export all data sets to csv

In [7]:
# Export to csv
cpi_avg.to_csv("../resources/cpi_final.csv", index=False)
avg_housing_starts.to_csv("../resources/housing_starts_final.csv", index=False)
avg_unemp_rt.to_csv("../resources/unemployment_rate_final.csv", index=False)
gdp_pct_chg.to_csv("../resources/gdp_pct_chg_final.csv", index=False)
gdp.to_csv("../resources/gdp_final.csv", index=False)
output_gap.to_csv("../resources/output_gap_final.csv", index=False)
recession_dt.to_csv("../resources/recession_dates_final.csv", index=False)