### Making the State Level Series Table
This table will have the series_id, series metadata, and state_id for all series within every state

In [1]:
import pandas as pd
import numpy as np
import time
import requests
import json

# Function to rename a feature that has the states's name in it
def get_series_name(state_name, series_title):
    ndx = series_title.find(state_name)
    return series_title[:ndx]

In [2]:
# Load the list of all states
df_state_table = pd.read_csv('state_table.csv')
print(df_state_table.shape)
print(df_state_table.head())
states_list = df_state_table['id'].values
states_names = df_state_table['name'].values

# # Initialize the series table
df_state_series_table = pd.DataFrame()
state_series_table_cols = ['id', 'title', 'observation_start', 'observation_end', 'frequency', 'units', 'seasonal_adjustment']

(53, 2)
      id        name
0  27282     Alabama
1  27283      Alaska
2  27284     Arizona
3    149    Arkansas
4  27286  California


In [3]:
# This cell and the next cell is where the series table starts to be filled
# There are a lot of series so I ran these two cells many times back and forth
#  (I wanted to save the data rather than store it in temporary memory)
# df_state_series_table = pd.read_csv('series_table_3100.csv')
print(df_state_series_table.shape)
print(df_state_series_table.head())

(0, 0)
Empty DataFrame
Columns: []
Index: []


In [4]:
for state_id, state_name in zip(states_list, states_names):
    time.sleep(2)
    series_in_state = []
    print(state_name)
    params = {
        'category_id': state_id,
        'api_key': 'e76fcf746d3ca3cc025c0803dd212fc8',
        'file_type': 'json'
    }
    r = requests.get(url = 'https://api.stlouisfed.org/fred/category/series', params=params)
    res = r.json()
    print(res['count'])
    for series in res['seriess']:
        temp = { your_key: series[your_key] for your_key in state_series_table_cols }
        temp['title'] = get_series_name(state_name, temp['title'])
        series_in_state.append(temp)
    
    offset = 1000
    total_iters = np.floor(res['count'] / 1000)
    current_iter = 1
    while current_iter <= total_iters:
        params = {
            'category_id': state_id,
            'api_key': 'e76fcf746d3ca3cc025c0803dd212fc8',
            'file_type': 'json',
            'offset': offset
        }
        r = requests.get(url = 'https://api.stlouisfed.org/fred/category/series', params=params)
        res = r.json()
        
        for series in res['seriess']:
            temp = { your_key: series[your_key] for your_key in state_series_table_cols }
            temp['title'] = get_series_name(state_name, temp['title'])
            series_in_state.append(temp)
        
        current_iter += 1
        offset += 1000
        
    df_cur_state = pd.DataFrame(series_in_state)
    df_cur_state['state_id'] = state_id
    print(df_cur_state.shape)
    df_state_series_table = df_state_series_table.append(df_cur_state)

print(df_state_series_table.shape)

Alabama
1668
(1668, 8)
Alaska
1301
(1301, 8)
Arizona
1637
(1637, 8)
Arkansas
1457
(1457, 8)
California
2296
(2296, 8)
Colorado
1639
(1639, 8)
Connecticut
1702
(1702, 8)
Delaware
1447
(1447, 8)
District of Columbia
1344
(1344, 8)
Florida
1796
(1796, 8)
Georgia
1650
(1650, 8)
Hawaii
1410
(1410, 8)
Idaho
1544
(1544, 8)
Illinois
1971
(1971, 8)
Indiana
1776
(1776, 8)
Iowa
1575
(1575, 8)
Kansas
1532
(1532, 8)
Kentucky
1606
(1606, 8)
Louisiana
1665
(1665, 8)
Maine
1498
(1498, 8)
Maryland
1648
(1648, 8)
Massachusetts
1767
(1767, 8)
Michigan
2050
(2050, 8)
Minnesota
1826
(1826, 8)
Mississippi
1547
(1547, 8)
Missouri
1695
(1695, 8)
Montana
1390
(1390, 8)
Nebraska
1631
(1631, 8)
Nevada
1528
(1528, 8)
New Hampshire
1509
(1509, 8)
New Jersey
1841
(1841, 8)
New Mexico
1340
(1340, 8)
New York
2035
(2035, 8)
North Carolina
1731
(1731, 8)
North Dakota
1446
(1446, 8)
Ohio
1858
(1858, 8)
Oklahoma
1602
(1602, 8)
Oregon
1727
(1727, 8)
Pennsylvania
1828
(1828, 8)
Puerto Rico
375
(375, 8)
Rhode Island
1470
(

In [5]:
print(df_state_series_table.head())

                 id                                              title  \
0       ACTLISCOUAL        Housing Inventory: Active Listing Count in    
1     ACTLISCOUMMAL  Housing Inventory: Active Listing Count Month-...   
2     ACTLISCOUYYAL  Housing Inventory: Active Listing Count Year-O...   
3  AGEXMAL1A647NCEN                Age 65 and Over Tax Exemptions for    
4    AL7320A052SCEN     Value of Exports to Sudan (DISCONTINUED) from    

  observation_start observation_end frequency                 units  \
0        2016-07-01      2020-03-01   Monthly                 Level   
1        2017-07-01      2020-03-01   Monthly               Percent   
2        2017-07-01      2020-03-01   Monthly               Percent   
3        1989-01-01      2017-01-01    Annual  Number of Exemptions   
4        1992-01-01      1992-01-01    Annual               Dollars   

       seasonal_adjustment  state_id  
0  Not Seasonally Adjusted     27282  
1  Not Seasonally Adjusted     27282  
2  Not Seas

In [6]:
print(len(df_state_series_table.state_id.unique()))

53


In [7]:
df_state_series_dedup = df_state_series_table.drop_duplicates()

In [8]:
print(len(df_state_series_table.id.unique()))
print(df_state_series_dedup.shape)
print(df_state_series_table.groupby('id').count())

85514
(85514, 8)
                 title  observation_start  observation_end  frequency  units  \
id                                                                             
2020RATIO011001      1                  1                1          1      1   
ACTLISCOUAK          1                  1                1          1      1   
ACTLISCOUAL          1                  1                1          1      1   
ACTLISCOUAR          1                  1                1          1      1   
ACTLISCOUAZ          1                  1                1          1      1   
...                ...                ...              ...        ...    ...   
X08TDMSBTRI          1                  1                1          1      1   
X08TDMSBTVT          1                  1                1          1      1   
X08TDMSBTWA          1                  1                1          1      1   
X08TDMSBTWI          1                  1                1          1      1   
X08TDMSBTWV          1 

In [10]:
df_state_series_table.to_csv('state_series_table_all.csv', index=False)

In [16]:
agg = df_state_series_table.groupby('title')['state_id'].nunique()
print(agg.shape)

(2839,)


So we have 2839 features. Let's order these by number of states

In [30]:
print(df_state_series_table.groupby('title').count().sort_values('state_id', ascending=False).state_id)

title
All Employees: Total Private in                                                                                                                   200
All Employees: Total Nonfarm in                                                                                                                   200
All Employees: Information in                                                                                                                     155
All Employees: Wholesale Trade in                                                                                                                 153
All Employees: Other Services in                                                                                                                  151
                                                                                                                                                 ... 
Value of Exports to Antigua and Barbuda from U.S.                                             

I don't see how we have more than 53 of anything. Maybe two series have similar metadata

In [40]:
print(df_state_series_table.groupby(['title','frequency','units','seasonal_adjustment', 'observation_end']).count().sort_values('state_id', ascending=False).state_id)

title                                                                                        frequency  units                                seasonal_adjustment      observation_end
Number of Identified Exporters to Turkey from                                                Annual     Number of Firms                      Not Seasonally Adjusted  2016-01-01         52
Number of Identified Exporters to Republic of Korea from                                     Annual     Number of Firms                      Not Seasonally Adjusted  2016-01-01         52
Number of Identified Exporters to Austria from                                               Annual     Number of Firms                      Not Seasonally Adjusted  2016-01-01         52
Number of Identified Exporters to Australia from                                             Annual     Number of Firms                      Not Seasonally Adjusted  2016-01-01         52
Number of Identified Exporters to Argentina from                  

In [43]:
agg.to_csv('agg_state_feat_info.csv')