# St.Louis Federal Reserve Bank API

### Description 

Using the St.Louis Fed API, real historical U.S GDP data will be collected.

### API

In [23]:
# Imported necessary packages
import pandas as pd
import requests

# key : 5DC60554-A22B-4D77-AEBE-15D1D2A1DF8C
# User Guide : https://apps.bea.gov/api/_pdf/bea_web_service_api_user_guide.pdf

# Query API Database 1: 1979-1996
url = "http://apps.bea.gov/api/data"
query = {"UserID" : "5DC60554-A22B-4D77-AEBE-15D1D2A1DF8C","method" :"GetData", "datasetname" : "Regional", 
         "LineCode" : "1", "GeoFips" : "STATE", "Year" : "ALL", "tableName" : "SAGDP9S", "ResultFormat" : "JSON"}

response = requests.get(url, query)
data = response.json()
raw_gdp2 = data["BEAAPI"]
raw_gdp2 = raw_gdp2["Results"]
raw_gdp2 = raw_gdp2["Data"]

# Data frame function
def process_data(raw_data):
    return [{
        "Date" :record["TimePeriod"],
        "state": record["GeoName"],
        "unit" : record["CL_UNIT"],
        "gdp": record["DataValue"],
    } for record in raw_data]

print('\n','Annual Real U.S GDP for years 1979 - 1996 link:','\n', response.url,'\n','\n')

# Convert data to data frame & clean data (data set 1)
gdp1 = process_data(raw_gdp2)
gdp_1979_1996 = pd.DataFrame(gdp1)
# Date formatting
gdp_1979_1996["Date"] = pd.to_datetime(gdp_1979_1996["Date"]).dt.strftime('%Y-%m-%d')
# String to float type
gdp_1979_1996['gdp'] = gdp_1979_1996['gdp'].str.replace(',', '')
gdp_1979_1996['gdp'] = gdp_1979_1996['gdp'].astype(float)
# Change string data to compliment chgange in data
gdp_1979_1996['unit'] = gdp_1979_1996['unit'].str.replace('Millions of chained 1997 dollars', 
                                                          'Millions of chained 2012 dollars')
# Set column names
gdp_1979_1996.columns = ['date', 'rg_state','rg_unit', 'rg_gdp']
# Retain United States data
gdp_1979_1996 = gdp_1979_1996.iloc[(gdp_1979_1996['rg_state'] == 'United States').values,[0,1,2,3]]  
# Chain Annual Real U.S GDP dollars to 2012 dollars
gdp_1979_1996['rg_gdp'] = gdp_1979_1996['rg_gdp'].multiply(1.390794011857204)
gdp_1979_1996 = gdp_1979_1996.drop([20, 0, 1])


# Query API Database 2: 1979-2019
url = "http://apps.bea.gov/api/data"
query = {"UserID" : "5DC60554-A22B-4D77-AEBE-15D1D2A1DF8C","method" :"GetData", "datasetname" : "Regional", 
         "LineCode" : "1", "GeoFips" : "STATE", "Year" : "ALL", "tableName" : "SAGDP9N", "ResultFormat" : "JSON"}

response = requests.get(url, query)
data = response.json()
raw_gdp = data["BEAAPI"]
raw_gdp = raw_gdp["Results"]
raw_gdp = raw_gdp["Data"]

print('Annual Real U.S GDP for years 1997 - 2019 link:','\n', response.url,'\n')

# Convert data to data frame & clean data (data set 2)
gdp = process_data(raw_gdp)
gdp_1997_2019 = pd.DataFrame(gdp)
# Date formatting
gdp_1997_2019["Date"] = pd.to_datetime(gdp_1997_2019["Date"]).dt.strftime('%Y-%m-%d')
# String to float type
gdp_1997_2019['gdp'] = gdp_1997_2019['gdp'].str.replace(',', '')
gdp_1997_2019['gdp'] = gdp_1997_2019['gdp'].astype(float)
# Set column names
gdp_1997_2019.columns = ['date', 'rg_state','rg_unit', 'rg_gdp']
# Retain United States data
gdp_1997_2019 = gdp_1997_2019.iloc[(gdp_1997_2019['rg_state'] == 'United States').values,[0,1,2,3]]

# Concatenate gdp_1996_1979 & gdp_1979_2019 data frames
gdp_com = pd.concat([gdp_1979_1996, gdp_1997_2019]).reset_index()
# Date formating to display year as a float type
gdp_com['year'] = gdp_com['date'].str.replace('-', '').astype(float).divide(10000).round(0)
# Drop unneeded columns
gdp_com = gdp_com.drop(['index','date'], axis=1)
# Data Frame Descriptives
gdp_com.info()

gdp_com.head()


 Annual Real U.S GDP for years 1979 - 1996 link: 
 https://apps.bea.gov/api/data/?UserID=5DC60554-A22B-4D77-AEBE-15D1D2A1DF8C&method=GetData&datasetname=Regional&LineCode=1&GeoFips=STATE&Year=ALL&tableName=SAGDP9S&ResultFormat=JSON 
 

Annual Real U.S GDP for years 1997 - 2019 link: 
 https://apps.bea.gov/api/data/?UserID=5DC60554-A22B-4D77-AEBE-15D1D2A1DF8C&method=GetData&datasetname=Regional&LineCode=1&GeoFips=STATE&Year=ALL&tableName=SAGDP9N&ResultFormat=JSON 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   rg_state  41 non-null     object 
 1   rg_unit   41 non-null     object 
 2   rg_gdp    41 non-null     float64
 3   year      41 non-null     float64
dtypes: float64(2), object(2)
memory usage: 1.4+ KB


Unnamed: 0,rg_state,rg_unit,rg_gdp,year
0,United States,Millions of chained 2012 dollars,6827700.0,1979.0
1,United States,Millions of chained 2012 dollars,6827222.0,1980.0
2,United States,Millions of chained 2012 dollars,7012547.0,1981.0
3,United States,Millions of chained 2012 dollars,6922469.0,1982.0
4,United States,Millions of chained 2012 dollars,7120968.0,1983.0


The API is used to collect real U.S GDP.

In [2]:
# Convert & export final data frame as csv
gdp_com.to_csv('Data/Real_GDP.csv', sep = ',', encoding = 'utf-8')

Exported the data to the project directory for further analysis.