In [1]:
# Import Libraries
# Libraries for folder and file access
import os
import pathlib 


# Libraries for retrieving data using API
from fredapi import Fred

# Libraries for data manipulation   
import pandas as pd
from datetime import datetime

# Libraries for working with AWS
import boto3


In [2]:
# Federal Reserve Economic Data (FRED) API
# https://fred.stlouisfed.org/docs/api/fred/
api_key = os.getenv("FRED_API_KEY")
endpoint = 'https://api.stlouisfed.org/fred/series/observations'
fred = Fred(api_key=api_key)

# Define the parameters
series_id = ['UNRATE', 'LNS14000006','LNS14000009', 'LNS14000003','LNS14000001','LNS14000002',
             'LNS14027662','LNS14027660','LNS14027659']
series_name = ['Unemployment Rate', 'African_American', 'Hispanic', 'White', 'Men','Women',
               'College Degreed','High School Degreed', 'No High School Degreed']
start_date = '12-01-2000'

In [3]:
#Easy way:  Using the fredapi library
fred = Fred(api_key=api_key)
df_unemployment = pd.DataFrame(fred.get_series(series_id[0], observation_start=start_date))
df_unemployment.columns = [series_name[0]]
for id in series_id[1:]:
    df_unemployment[id] = pd.DataFrame(fred.get_series(id, observation_start=start_date))
    
df_unemployment.tail()

Unnamed: 0,Unemployment Rate,LNS14000006,LNS14000009,LNS14000003,LNS14000001,LNS14000002,LNS14027662,LNS14027660,LNS14027659
2024-03-01,3.8,6.4,4.5,3.4,3.7,3.9,2.1,4.1,4.9
2024-04-01,3.9,5.6,4.8,3.5,3.9,3.8,2.2,4.0,6.0
2024-05-01,4.0,6.1,5.0,3.5,4.2,3.7,2.1,4.3,5.9
2024-06-01,4.1,6.3,4.9,3.5,4.1,4.0,2.4,4.2,5.9
2024-07-01,4.3,6.3,5.3,3.8,4.4,4.1,2.3,4.6,6.7


In [4]:
df_unemployment.columns = series_name
df_unemployment.describe()

Unnamed: 0,Unemployment Rate,African_American,Hispanic,White,Men,Women,College Degreed,High School Degreed,No High School Degreed
count,284.0,284.0,284.0,284.0,284.0,284.0,284.0,284.0,284.0
mean,5.78838,9.969718,7.273239,5.132042,5.968662,5.584155,2.93169,5.883803,8.766197
std,1.974459,3.145404,2.678693,1.82685,2.129161,1.846876,1.042752,2.253294,3.112096
min,3.4,4.8,3.9,3.0,3.4,3.3,1.5,3.3,4.3
25%,4.4,7.7,5.175,3.8,4.4,4.3,2.1,4.3,6.5
50%,5.3,9.65,6.6,4.5,5.35,5.2,2.5,5.1,7.95
75%,6.7,11.6,8.525,5.9,6.825,6.4,3.425,7.0,9.825
max,14.8,16.9,18.9,14.2,13.5,16.2,8.4,17.7,21.3


In [5]:
# Define the presidential terms and the party affiliation
presidents = [
    {"name": "Bill Clinton", "start": "1993-01-20", "end": "2001-01-20", "party": "Democrat"},
    {"name": "George W. Bush", "start": "2001-01-20", "end": "2009-01-20", "party": "Republican"},
    {"name": "Barack Obama", "start": "2009-01-20", "end": "2017-01-20", "party": "Democrat"},
    {"name": "Donald Trump", "start": "2017-01-20", "end": "2021-01-20", "party": "Republican"},
    {"name": "Joe Biden", "start": "2021-01-20", "end": "2025-01-20", "party": "Democrat"}
]

# Convert to DataFrame for easier manipulation
presidents_df = pd.DataFrame(presidents)
presidents_df['start'] = pd.to_datetime(presidents_df['start'])
presidents_df['end'] = pd.to_datetime(presidents_df['end'], errors='coerce').fillna(pd.Timestamp('today'))

In [6]:
def get_president(date):
    for i, row in presidents_df.iterrows():
        if date >= row['start'] and date < row['end']:
            return row['name'], row['party']
    return None, None

In [7]:
# Apply the function to each row
df_unemployment['date'] = df_unemployment.index
#df_unemployment['date'] = pd.to_datetime(df_unemployment['date'])   
df_unemployment = df_unemployment.reset_index(drop=True)
df_unemployment['president'], df_unemployment['party'] = zip(*df_unemployment['date'].apply(get_president))

In [8]:
df_unemployment.head()

Unnamed: 0,Unemployment Rate,African_American,Hispanic,White,Men,Women,College Degreed,High School Degreed,No High School Degreed,date,president,party
0,3.9,7.4,5.7,3.5,4.0,3.8,1.5,3.5,5.9,2000-12-01,Bill Clinton,Democrat
1,4.2,8.2,5.8,3.6,4.2,4.1,1.6,3.8,6.7,2001-01-01,Bill Clinton,Democrat
2,4.2,7.7,6.1,3.7,4.3,4.2,1.6,3.7,7.5,2001-02-01,George W. Bush,Republican
3,4.3,8.3,6.2,3.7,4.3,4.2,2.0,3.8,6.8,2001-03-01,George W. Bush,Republican
4,4.4,8.0,6.4,3.9,4.5,4.3,2.1,3.7,6.8,2001-04-01,George W. Bush,Republican


In [9]:
df_unemployment.to_csv('unemployment.csv', index_label='id')

In [12]:
# Export to s3 bucket
s3 = boto3.client('s3')
bucket_url = "http://s3-east-2-economic-data.s3.amazonaws.com/"
bucket_name = "s3-east-2-economic-data"
object_name = "unemployment.csv"
file_name = "unemployment.csv"
response = s3.upload_file(file_name, bucket_name, object_name)  # Uploads the file to the s3 bucket

print(response)


None
