# Ex 7.3: Part II (Outside Class)

- API key: 9ae52fc6c0a4433e5e9a88ff5c633c2d

In [1]:
import pandas as pd
import plotly.express as px

import json
import requests
import pprint

**API Call Steps:** 
1. Build the Request URL  
2. Use the requests library to make the API call  
3. Figure out what *key* (if any) to use to carve out the data we want from the returned data  
4. Get the retrieved data into a Dataframe  

# 1. Build the Request URL

### Choose Desired Data Series 
- This may be the only thing you have to change!

In [2]:
series = "?series_id=GDP"

### Base URL

In [3]:
base_url = "https://api.stlouisfed.org/fred//series/observations" 

### API Key  
- **You will have to copy your FRED API key between the quotes below for the API Call to work!**

In [4]:
api_key = "&api_key=9ae52fc6c0a4433e5e9a88ff5c633c2d"

In [5]:
### Returned data type
returned_data_type = "&file_type=json"

# Put them all together and test URL!

In [6]:
request_url = base_url + series + api_key + returned_data_type
#additional_routing

print("request_url = ", request_url)

request_url =  https://api.stlouisfed.org/fred//series/observations?series_id=GDP&api_key=9ae52fc6c0a4433e5e9a88ff5c633c2d&file_type=json


# 2. Use *requests* library to make the API call

In [7]:
# Make API Call
r = requests.get(request_url)

api_results = r.json()

### 3. Figure out what Key (if any) to use to carve out the data we want from the returned data

In [8]:
api_results

{'realtime_start': '2022-11-14',
 'realtime_end': '2022-11-14',
 'observation_start': '1600-01-01',
 'observation_end': '9999-12-31',
 'units': 'lin',
 'output_type': 1,
 'file_type': 'json',
 'order_by': 'observation_date',
 'sort_order': 'asc',
 'count': 307,
 'offset': 0,
 'limit': 100000,
 'observations': [{'realtime_start': '2022-11-14',
   'realtime_end': '2022-11-14',
   'date': '1946-01-01',
   'value': '.'},
  {'realtime_start': '2022-11-14',
   'realtime_end': '2022-11-14',
   'date': '1946-04-01',
   'value': '.'},
  {'realtime_start': '2022-11-14',
   'realtime_end': '2022-11-14',
   'date': '1946-07-01',
   'value': '.'},
  {'realtime_start': '2022-11-14',
   'realtime_end': '2022-11-14',
   'date': '1946-10-01',
   'value': '.'},
  {'realtime_start': '2022-11-14',
   'realtime_end': '2022-11-14',
   'date': '1947-01-01',
   'value': '243.164'},
  {'realtime_start': '2022-11-14',
   'realtime_end': '2022-11-14',
   'date': '1947-04-01',
   'value': '245.968'},
  {'realtime

In [9]:
type(api_results)


dict

# 4. Get the data into a Dataframe  
- If it doesn't have multiple levels of data, use:   **DataFrame.from_dict()**  
- If it does have multiple levels of data, use **json_normalize()**  

In [10]:
df = pd.DataFrame(api_results['observations'])

print(df.shape)
df.head()

(307, 4)


Unnamed: 0,realtime_start,realtime_end,date,value
0,2022-11-14,2022-11-14,1946-01-01,.
1,2022-11-14,2022-11-14,1946-04-01,.
2,2022-11-14,2022-11-14,1946-07-01,.
3,2022-11-14,2022-11-14,1946-10-01,.
4,2022-11-14,2022-11-14,1947-01-01,243.164


# Clean data

### Change data types (as needed)

In [11]:
df.dtypes

realtime_start    object
realtime_end      object
date              object
value             object
dtype: object

In [12]:
df.query("value != '.'", inplace=True)

In [13]:
df['date'] = pd.to_datetime(df['date'])
df['value'] = pd.to_numeric(df['value']).astype('float')

In [14]:
df.dtypes

realtime_start            object
realtime_end              object
date              datetime64[ns]
value                    float64
dtype: object

In [15]:
print(df.shape)
df.head()

(303, 4)


Unnamed: 0,realtime_start,realtime_end,date,value
4,2022-11-14,2022-11-14,1947-01-01,243.164
5,2022-11-14,2022-11-14,1947-04-01,245.968
6,2022-11-14,2022-11-14,1947-07-01,249.585
7,2022-11-14,2022-11-14,1947-10-01,259.745
8,2022-11-14,2022-11-14,1948-01-01,265.742


# Plot

In [16]:
fig = px.line(df,              
             x='date', 
             y='value',
             template='ggplot2',
             title='Part 2: US Gross Domestic Product (GDP)')


fig.show()
