In [2]:
import pandas as pd
import hvplot.pandas


In [3]:
# Read T10505-Q sheet from BEA Data 
# Table 1.5.5. Gross Domestic Product, Expanded Detail
# [Millions of dollars] Seasonally adjusted at annual rates
# Quarterly data from 1947Q1 to 2023Q4

# Bureau of Economic Analysis
# Data published March 28, 2024

# URL for download https://apps.bea.gov/iTable/?isuri=1&reqid=19&step=4&categories=flatfiles&nipa_table_list=1


gdp_df = pd.read_excel("../../resources/Section1All_xls.xlsx", 
              sheet_name="T10505-Q",
             skiprows = 7)

In [4]:
# View the dataframe
gdp_df.head(50)

Unnamed: 0,Line,Unnamed: 1,Unnamed: 2,1947Q1,1947Q2,1947Q3,1947Q4,1948Q1,1948Q2,1948Q3,...,2021Q3,2021Q4,2022Q1,2022Q2,2022Q3,2022Q4,2023Q1,2023Q2,2023Q3,2023Q4
0,1,Gross domestic product,A191RC,243164,245968,249585,259745,265742,272567,279196,...,23828973.0,24654603.0,25029116.0,25544273.0,25994639.0,26408405.0,26813601.0,27063012.0,27610128.0,27956998.0
1,2,Personal consumption expenditures,DPCERC,156161,160031,163543,167672,170372,174142,177072,...,16285113.0,16718195.0,17030634.0,17415115.0,17684189.0,17917039.0,18269569.0,18419007.0,18679496.0,18914478.0
2,3,Goods,DGDSRC,95594,98248,100419,103453,105093,107188,108741,...,5515884.0,5732790.0,5879255.0,6014444.0,6046813.0,6047645.0,6133839.0,6144657.0,6231764.0,6255650.0
3,4,Durable goods,DDURRC,20722,21351,21770,23488,23547,24019,25277,...,1972724.0,2070181.0,2120715.0,2122910.0,2143128.0,2129012.0,2194859.0,2193586.0,2204485.0,2202169.0
4,5,Motor vehicles and parts,DMOTRC,6319,6437,6181,7139,7694,7347,8146,...,670962.0,715448.0,735274.0,725941.0,728151.0,733853.0,776235.0,772653.0,764572.0,758436.0
5,6,Furnishings and durable household equipment,DFDHRC,8844,9234,9767,10383,10015,10690,11047,...,446853.0,458790.0,470848.0,478674.0,482171.0,478011.0,483042.0,475410.0,478856.0,475749.0
6,7,Recreational goods and vehicles,DREQRC,2809,2913,3022,3137,3017,3123,3190,...,601894.0,634595.0,652028.0,652102.0,664931.0,653032.0,666740.0,676402.0,689726.0,693469.0
7,8,Other durable goods,DODGRC,2749,2768,2800,2828,2822,2859,2894,...,253015.0,261349.0,262565.0,266193.0,267875.0,264116.0,268842.0,269121.0,271331.0,274516.0
8,9,Nondurable goods,DNDGRC,74872,76897,78649,79965,81546,83169,83465,...,3543160.0,3662608.0,3758540.0,3891535.0,3903685.0,3918634.0,3938980.0,3951071.0,4027279.0,4053481.0
9,10,Food and beverages purchased for off-pre...,DFXARC,39079,39964,40652,41073,41553,42455,41763,...,1297332.0,1329324.0,1353917.0,1381986.0,1409236.0,1428887.0,1430619.0,1434099.0,1447771.0,1456605.0


In [5]:
# Rename Key Metrics values to reflect the respective buckets


# 46 - Exports Goods
# 49 - Imports Good

gdp_df.at[46, "Unnamed: 1"] = "Goods - Exports"
gdp_df.at[49, "Unnamed: 1"] = "Goods - Imports"

# 47 - Exports Services 
# 50 - Imports Services

gdp_df.at[47, "Unnamed: 1"] = "Services - Exports"
gdp_df.at[50, "Unnamed: 1"] = "Services - Imports"

# 54 - Fed- National Defense - Consumption expenditures
# 57 - Fed - Nondefense - Consumption expenditures
# 60 - State and local - Consumption expenditures
gdp_df.at[54, "Unnamed: 1"] = "Fed- National Defense - Consumption expenditures"
gdp_df.at[57, "Unnamed: 1"] = "Fed - Nondefense - Consumption expenditures"
gdp_df.at[60, "Unnamed: 1"] = "State and local - Consumption expenditures"

# 55 - Fed- National Defense - Gross Investment
# 58 - Fed - Nondefense - Gross Investment
# 61 - State and local - Gross Investment
gdp_df.at[55, "Unnamed: 1"] = "Fed- National Defense - Gross Investment"
gdp_df.at[58, "Unnamed: 1"] = "Fed - Nondefense - Gross Investment"
gdp_df.at[61, "Unnamed: 1"] = "State and local - Gross Investment"

# 22 - Final Cons Expenditure nonprofit
gdp_df.at[22, "Unnamed: 1"] = "Final Cons Expenditure nonprofit"

# 9 - Food and beverages (off-premises cons)
gdp_df.at[9, "Unnamed: 1"] = "Food and beverages (off-premises cons)"

# 37 - Software
gdp_df.at[37, "Unnamed: 1"] = "Software"

#38 - Research and development
gdp_df.at[38, "Unnamed: 1"] = "Research and development"


In [6]:
# View the shape of dataframe
gdp_df.shape

(68, 311)

In [7]:
# Drop columns until 1959 Qtr 1
year_range = list(range(3, 51))
gdp_df = gdp_df.drop(columns=gdp_df.iloc[:,year_range])

# Drop Line and Unnamed:2 columns
gdp_df = gdp_df.drop(columns=gdp_df.iloc[:,[0,2]])

# Rename Unnamed:1 column
gdp_df = gdp_df.rename(columns={"Unnamed: 1" : "Key Metrics"})

In [8]:
# View the dataframe
gdp_df.head()

Unnamed: 0,Key Metrics,1959Q1,1959Q2,1959Q3,1959Q4,1960Q1,1960Q2,1960Q3,1960Q4,1961Q1,...,2021Q3,2021Q4,2022Q1,2022Q2,2022Q3,2022Q4,2023Q1,2023Q2,2023Q3,2023Q4
0,Gross domestic product,510330.0,522653.0,525034.0,528600.0,542648.0,541080.0,545604.0,540197.0,545018.0,...,23828973.0,24654603.0,25029116.0,25544273.0,25994639.0,26408405.0,26813601.0,27063012.0,27610128.0,27956998.0
1,Personal consumption expenditures,309449.0,315505.0,320725.0,322842.0,326364.0,332208.0,332126.0,334024.0,334520.0,...,16285113.0,16718195.0,17030634.0,17415115.0,17684189.0,17917039.0,18269569.0,18419007.0,18679496.0,18914478.0
2,Goods,169723.0,172617.0,174524.0,173564.0,175060.0,178396.0,177503.0,177103.0,175641.0,...,5515884.0,5732790.0,5879255.0,6014444.0,6046813.0,6047645.0,6133839.0,6144657.0,6231764.0,6255650.0
3,Durable goods,43649.0,45465.0,46336.0,44103.0,45455.0,46434.0,45922.0,44690.0,42240.0,...,1972724.0,2070181.0,2120715.0,2122910.0,2143128.0,2129012.0,2194859.0,2193586.0,2204485.0,2202169.0
4,Motor vehicles and parts,18090.0,19335.0,20073.0,17658.0,19343.0,19942.0,20130.0,18912.0,16475.0,...,670962.0,715448.0,735274.0,725941.0,728151.0,733853.0,776235.0,772653.0,764572.0,758436.0


In [9]:
# Set Index
gdp_df = gdp_df.set_index("Key Metrics")

In [10]:
# Trim Index names
gdp_df.index = gdp_df.index.str.strip()

In [11]:
# Print all index values
gdp_df.index.values

array(['Gross domestic product', 'Personal consumption expenditures',
       'Goods', 'Durable goods', 'Motor vehicles and parts',
       'Furnishings and durable household equipment',
       'Recreational goods and vehicles', 'Other durable goods',
       'Nondurable goods', 'Food and beverages (off-premises cons)',
       'Clothing and footwear', 'Gasoline and other energy goods',
       'Other nondurable goods', 'Services',
       'Household consumption expenditures (for services)',
       'Housing and utilities', 'Health care', 'Transportation services',
       'Recreation services', 'Food services and accommodations',
       'Financial services and insurance', 'Other services',
       'Final Cons Expenditure nonprofit',
       'Gross output of nonprofit institutions\\2\\',
       'Less: Receipts from sales of goods and services by nonprofit institutions\\3\\',
       'Gross private domestic investment', 'Fixed investment',
       'Nonresidential', 'Structures', 'Equipment',
      

In [12]:
# Transpose the dataframe
gdp_df = gdp_df.transpose()

In [13]:
# View the dataframe
gdp_df.head()

Key Metrics,Gross domestic product,Personal consumption expenditures,Goods,Durable goods,Motor vehicles and parts,Furnishings and durable household equipment,Recreational goods and vehicles,Other durable goods,Nondurable goods,Food and beverages (off-premises cons),...,Fed - Nondefense - Gross Investment,State and local,State and local - Consumption expenditures,State and local - Gross Investment,NaN,NaN.1,NaN.2,NaN.3,NaN.4,NaN.5
1959Q1,510330.0,309449.0,169723.0,43649.0,18090.0,15143.0,6290.0,4125.0,126074.0,61240.0,...,2774.0,44829.0,30172.0,14656.0,,,,,,
1959Q2,522653.0,315505.0,172617.0,45465.0,19335.0,15548.0,6374.0,4208.0,127152.0,61261.0,...,2849.0,44955.0,30541.0,14414.0,,,,,,
1959Q3,525034.0,320725.0,174524.0,46336.0,20073.0,15647.0,6392.0,4224.0,128188.0,61768.0,...,2795.0,44960.0,30964.0,13996.0,,,,,,
1959Q4,528600.0,322842.0,173564.0,44103.0,17658.0,15733.0,6420.0,4293.0,129461.0,62118.0,...,2938.0,44772.0,31494.0,13278.0,,,,,,
1960Q1,542648.0,326364.0,175060.0,45455.0,19343.0,15522.0,6359.0,4232.0,129605.0,61765.0,...,3135.0,45970.0,32436.0,13534.0,,,,,,


In [14]:
# Retain columns that has a valid name
gdp_df = gdp_df.loc[:, gdp_df.columns.notna()]

In [15]:
# Convert to billions of dollars
gdp_df = gdp_df.applymap(lambda x: x/1000)

In [16]:
# Reset index
gdp_df = gdp_df.reset_index()

In [17]:
# Rename old index to 'Year-Qtr'
gdp_df = gdp_df.rename(columns = {"index" : "Year-Qtr"})

In [35]:
# Cast Year-Qtr to String Type
gdp_df["Year-Qtr"] = gdp_df["Year-Qtr"].astype("str")

In [18]:
# Add GDP Growth values
gdp_df["GDP_Growth"] = gdp_df["Gross domestic product"].pct_change()
gdp_df["GDP_Growth"] = gdp_df["GDP_Growth"].fillna(0)

In [49]:
# Function to determine whether there is an economic recession (Negative GDP Growth)
def isRecession(gdp_growth):
    if gdp_growth <= 0.00:
        return True
    else:
        return False

In [50]:
# Add column Is_Recession
gdp_df["Is_Recession"] = gdp_df.apply(lambda row : isRecession(row["GDP_Growth"]), axis=1)

In [51]:
gdp_df.loc[gdp_df["Is_Recession"]==True]

Key Metrics,Year-Qtr,Gross domestic product,Personal consumption expenditures,Goods,Durable goods,Motor vehicles and parts,Furnishings and durable household equipment,Recreational goods and vehicles,Other durable goods,Nondurable goods,...,Fed- National Defense - Gross Investment,Nondefense,Fed - Nondefense - Consumption expenditures,Fed - Nondefense - Gross Investment,State and local,State and local - Consumption expenditures,State and local - Gross Investment,GDP_Growth,is_Recession,Is_Recession
0,1959Q1,510.33,309.449,169.723,43.649,18.09,15.143,6.29,4.125,126.074,...,18.787,11.23,8.456,2.774,44.829,30.172,14.656,0.0,True,True
5,1960Q2,541.08,332.208,178.396,46.434,19.942,15.709,6.474,4.31,131.961,...,17.654,11.544,7.956,3.587,47.314,33.362,13.952,-0.00289,True,True
7,1960Q4,540.197,334.024,177.103,44.69,18.912,15.207,6.335,4.236,132.413,...,18.374,13.197,9.194,4.003,49.039,34.537,14.502,-0.00991,True,True
92,1982Q1,3274.302,2014.155,886.293,246.439,97.178,70.093,49.926,29.242,639.854,...,52.813,102.269,70.173,32.095,353.247,287.922,65.326,-0.001986,True,True
127,1990Q4,6004.733,3867.909,1499.702,480.947,192.921,118.143,104.557,65.326,1018.756,...,106.945,159.442,108.236,51.207,698.812,562.218,136.594,-0.001726,True,True
170,2001Q3,10598.02,7070.337,2515.247,923.888,368.964,214.434,233.983,106.507,1591.359,...,88.029,259.028,182.652,76.375,1278.961,1039.884,239.077,-9.2e-05,True,True
196,2008Q1,14706.538,10004.445,3406.368,1153.509,378.044,274.331,335.346,165.787,2252.859,...,157.509,389.969,288.374,101.595,1798.456,1445.915,352.541,-0.000579,True,True
199,2008Q4,14608.209,9906.942,3175.077,1007.986,295.648,248.762,307.545,156.031,2167.09,...,173.024,408.161,301.112,107.049,1842.046,1480.712,361.334,-0.019517,True,True
200,2009Q1,14430.902,9814.969,3120.027,1004.48,302.076,242.455,304.326,155.622,2115.548,...,169.715,419.802,313.011,106.791,1836.817,1473.276,363.541,-0.012137,True,True
201,2009Q2,14381.236,9805.502,3134.475,994.7,306.089,238.193,293.574,156.845,2139.774,...,173.925,430.574,323.352,107.222,1856.964,1491.582,365.382,-0.003442,True,True


In [55]:
# Look at data for year 1969 thru 1971
gdp_df.loc[gdp_df["Year-Qtr"].str.match('1969|1970|1971')]

Key Metrics,Year-Qtr,Gross domestic product,Personal consumption expenditures,Goods,Durable goods,Motor vehicles and parts,Furnishings and durable household equipment,Recreational goods and vehicles,Other durable goods,Nondurable goods,...,Fed- National Defense - Gross Investment,Nondefense,Fed - Nondefense - Consumption expenditures,Fed - Nondefense - Gross Investment,State and local,State and local - Consumption expenditures,State and local - Gross Investment,GDP_Growth,is_Recession,Is_Recession
40,1969Q1,993.337,587.005,298.845,90.011,37.649,27.296,16.41,8.655,208.834,...,21.189,28.346,17.047,11.299,104.096,75.059,29.037,0.026143,False,False
41,1969Q2,1009.02,598.337,302.664,90.448,37.13,27.851,16.735,8.732,212.216,...,18.878,28.922,17.553,11.369,107.076,77.614,29.462,0.015788,False,False
42,1969Q3,1029.956,608.626,306.616,90.628,37.673,27.539,16.736,8.68,215.988,...,19.832,29.944,18.72,11.224,109.143,80.27,28.874,0.020749,False,False
43,1969Q4,1038.147,620.586,310.588,90.844,37.34,27.668,17.048,8.789,219.744,...,17.923,29.291,18.088,11.203,110.848,82.95,27.898,0.007953,False,False
44,1970Q1,1051.2,631.685,314.095,89.611,34.926,27.769,17.839,9.077,224.484,...,19.606,30.723,19.486,11.236,114.33,86.16,28.169,0.012573,False,False
45,1970Q2,1067.375,641.57,317.554,91.006,36.242,28.0,17.631,9.133,226.548,...,18.778,32.156,20.604,11.552,117.383,88.878,28.506,0.015387,False,False
46,1970Q3,1086.059,653.482,321.727,92.011,36.416,28.111,17.934,9.55,229.716,...,18.381,32.446,20.881,11.565,122.224,91.929,30.295,0.017505,False,False
47,1970Q4,1088.608,660.161,321.8,87.324,30.569,28.725,18.293,9.737,234.475,...,18.962,33.054,21.292,11.762,125.168,94.796,30.372,0.002347,False,False
48,1971Q1,1135.156,679.186,333.88,98.052,40.977,29.021,18.352,9.701,235.828,...,15.928,33.869,22.097,11.772,128.558,98.208,30.35,0.042759,False,False
49,1971Q2,1156.271,693.225,339.82,100.989,42.505,29.501,19.036,9.947,238.832,...,15.49,36.412,24.399,12.013,131.867,101.044,30.823,0.018601,False,False


In [53]:
# View the dataframe
gdp_df.head()

Key Metrics,Year-Qtr,Gross domestic product,Personal consumption expenditures,Goods,Durable goods,Motor vehicles and parts,Furnishings and durable household equipment,Recreational goods and vehicles,Other durable goods,Nondurable goods,...,Fed- National Defense - Gross Investment,Nondefense,Fed - Nondefense - Consumption expenditures,Fed - Nondefense - Gross Investment,State and local,State and local - Consumption expenditures,State and local - Gross Investment,GDP_Growth,is_Recession,Is_Recession
0,1959Q1,510.33,309.449,169.723,43.649,18.09,15.143,6.29,4.125,126.074,...,18.787,11.23,8.456,2.774,44.829,30.172,14.656,0.0,True,True
1,1959Q2,522.653,315.505,172.617,45.465,19.335,15.548,6.374,4.208,127.152,...,18.895,13.29,10.441,2.849,44.955,30.541,14.414,0.024147,False,False
2,1959Q3,525.034,320.725,174.524,46.336,20.073,15.647,6.392,4.224,128.188,...,19.347,13.134,10.339,2.795,44.96,30.964,13.996,0.004556,False,False
3,1959Q4,528.6,322.842,173.564,44.103,17.658,15.733,6.42,4.293,129.461,...,18.722,12.88,9.942,2.938,44.772,31.494,13.278,0.006792,False,False
4,1960Q1,542.648,326.364,175.06,45.455,19.343,15.522,6.359,4.232,129.605,...,18.432,10.957,7.822,3.135,45.97,32.436,13.534,0.026576,False,False


In [54]:
print("BEA_Base_For_ML_Model completed - gdp_df ready for use")

Base_For_ML_Model completed - gdp_df ready for use
