In [1]:
import json
import pandas as pd
import requests
import sqlite3 as db
from sqlite3 import Error

%run C:\Users\Joshua\Jupyter_Notebook_Folders\APIkeys.py

### List of Bureau of Labor Statistics Surveys

In [2]:
surveys_url = "https://api.bls.gov/publicAPI/v2/surveys"
s = requests.get(surveys_url)
s_json = s.json()                          # Turn request into JSON 
surveys = s_json["Results"]["survey"]      # Get list of surveys from the JSON
surveys

[{'survey_abbreviation': 'AP',
  'survey_name': 'Consumer Price Index - Average Price Data'},
 {'survey_abbreviation': 'BD', 'survey_name': 'Business Employment Dynamics'},
 {'survey_abbreviation': 'BG',
  'survey_name': 'Collective Bargaining Agreements-State and Local Government'},
 {'survey_abbreviation': 'BP',
  'survey_name': 'Collective Bargaining Agreements-Private Sector'},
 {'survey_abbreviation': 'CC',
  'survey_name': 'Employer Costs for Employee Compensation'},
 {'survey_abbreviation': 'CD',
  'survey_name': 'Nonfatal cases involving days away from work: selected characteristics'},
 {'survey_abbreviation': 'CE',
  'survey_name': 'Employment, Hours, and Earnings from the Current Employment Statistics survey (National)'},
 {'survey_abbreviation': 'CF',
  'survey_name': 'Census of Fatal Occupational Injuries'},
 {'survey_abbreviation': 'CH',
  'survey_name': 'Nonfatal cases involving days away from work: selected characteristics (2003 - 2010)'},
 {'survey_abbreviation': 'CI', 

### Get data from BLS

In [22]:
base_url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'  #this will not change
headers = {'Content-type': 'application/json'}  #This will not changed !

# For the key seriesid enter a list of series names you wish to download
# For the key startyear enter the start year inside ""
# For the key endyear enter the end year inside ""

seriesID = "CUUR0000SA0"
parameters = {
    "seriesid":[seriesID], 
    "startyear":"1919", 
    "endyear":"1938",
    "catalog":True, 
    "calculations":False, 
    "annualaverage":False,
    "aspects":False,
    "registrationkey":os.environ['BLS_API_key'] 
 }

data = json.dumps(parameters) # Converts the Python dictionary to JSON

p = requests.post(base_url, data=data, headers=headers)
json_data = json.loads(p.text)
json_data

{'status': 'REQUEST_SUCCEEDED',
 'responseTime': 306,
 'message': [],
 'Results': {'series': [{'seriesID': 'CUUR0000SA0',
    'catalog': {'series_title': 'All items in U.S. city average, all urban consumers, not seasonally adjusted',
     'series_id': 'CUUR0000SA0',
     'seasonality': 'Not Seasonally Adjusted',
     'survey_name': 'CPI for All Urban Consumers (CPI-U)',
     'survey_abbreviation': 'CU',
     'measure_data_type': 'All items',
     'area': 'U.S. city average',
     'item': 'All items'},
    'data': [{'year': '1938',
      'period': 'M12',
      'periodName': 'December',
      'value': '14.0',
      'footnotes': [{}]},
     {'year': '1938',
      'period': 'M11',
      'periodName': 'November',
      'value': '14.0',
      'footnotes': [{}]},
     {'year': '1938',
      'period': 'M10',
      'periodName': 'October',
      'value': '14.0',
      'footnotes': [{}]},
     {'year': '1938',
      'period': 'M09',
      'periodName': 'September',
      'value': '14.1',
      '

### Create Panda Dataframe

In [68]:
df = pd.DataFrame(json_data['Results']['series'][0]['data'])
df['date'] = df['year'] + df['period']
df['date'] = df['date'].str.replace('M', '-')
df['date'] = df['date'].astype('string')
df = df.sort_values(by=['date'])
df = df.drop(columns='footnotes')
df['value'] = df['value'].astype(float)
df['description'] = 'Consumer Price Index - All Urban Consumers - Not Seasonally Adjusted'
df

Unnamed: 0,year,period,periodName,value,date,description
239,1919,M01,January,16.5,1919-01,Consumer Price Index - All Urban Consumers - N...
238,1919,M02,February,16.2,1919-02,Consumer Price Index - All Urban Consumers - N...
237,1919,M03,March,16.4,1919-03,Consumer Price Index - All Urban Consumers - N...
236,1919,M04,April,16.7,1919-04,Consumer Price Index - All Urban Consumers - N...
235,1919,M05,May,16.9,1919-05,Consumer Price Index - All Urban Consumers - N...
...,...,...,...,...,...,...
4,1938,M08,August,14.1,1938-08,Consumer Price Index - All Urban Consumers - N...
3,1938,M09,September,14.1,1938-09,Consumer Price Index - All Urban Consumers - N...
2,1938,M10,October,14.0,1938-10,Consumer Price Index - All Urban Consumers - N...
1,1938,M11,November,14.0,1938-11,Consumer Price Index - All Urban Consumers - N...


In [73]:
print(df.dtypes)

year            object
period          object
periodName      object
value          float64
date            string
description     object
dtype: object


### Functions for working with SQLite

In [74]:
def create_database(db_name):
    """ 
    create an SQLite database
    """
    conn = None
    try:
        conn = db.connect(db_name)
        print(db.version)
    except Error as e:
        print(e)
    finally:
            conn.close()

            
def create_connection(db_name):
    """ 
    create a database connection to the SQLite database
        specified by db_name
    return: Connection object or None
    """
    conn = None
    try:
        conn = db.connect(db_name)
        return conn
    except Error as e:
        print(e)

    return conn


def create_table(tbl_name ,db_name):
    """
    check if table exists
    """
    conn = create_connection(db_name)
    c = conn.cursor()
    c.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = c.fetchall()
    try: 
        df.to_sql(tbl_name, conn)
    except ValueError as e:
        print(e)
        print("Table was not added to the DB")
        c.execute("SELECT name FROM sqlite_master WHERE type='table';")
        print("Tables already in DB ", c.fetchall())
        c.execute("PRAGMA table_info(CUUR0000SA0);")
        print("Table ", tbl_name, "has columns: ", c.fetchall())
        
    conn.close()

### Create MacroData DB if it does not exist

In [None]:
create_database(r"MacroData")

### Create table if it does not already exist

In [75]:
create_table(seriesID, 'MacroData')

### Check if data already exists in series

In [82]:
conn = db.connect('MacroData')
c = conn.cursor()
#c.execute("SELECT date FROM " + seriesID + ";")
#print(c.fetchall())
df_AddDB = pd.DataFrame()
for index, row in df.iterrows():
    #print(row["date"])
    c.execute("SELECT date FROM " + seriesID + " WHERE date = ?;", (row["date"],))
    print(c.fetchall())
    #df_AddDB = pd.concat(df_AddDB, row)
conn.close()
df_AddDB                            

[('1919-01',)]
[('1919-02',)]
[('1919-03',)]
[('1919-04',)]
[('1919-05',)]
[('1919-06',)]
[('1919-07',)]
[('1919-08',)]
[('1919-09',)]
[('1919-10',)]
[('1919-11',)]
[('1919-12',)]
[('1920-01',)]
[('1920-02',)]
[('1920-03',)]
[('1920-04',)]
[('1920-05',)]
[('1920-06',)]
[('1920-07',)]
[('1920-08',)]
[('1920-09',)]
[('1920-10',)]
[('1920-11',)]
[('1920-12',)]
[('1921-01',)]
[('1921-02',)]
[('1921-03',)]
[('1921-04',)]
[('1921-05',)]
[('1921-06',)]
[('1921-07',)]
[('1921-08',)]
[('1921-09',)]
[('1921-10',)]
[('1921-11',)]
[('1921-12',)]
[('1922-01',)]
[('1922-02',)]
[('1922-03',)]
[('1922-04',)]
[('1922-05',)]
[('1922-06',)]
[('1922-07',)]
[('1922-08',)]
[('1922-09',)]
[('1922-10',)]
[('1922-11',)]
[('1922-12',)]
[('1923-01',)]
[('1923-02',)]
[('1923-03',)]
[('1923-04',)]
[('1923-05',)]
[('1923-06',)]
[('1923-07',)]
[('1923-08',)]
[('1923-09',)]
[('1923-10',)]
[('1923-11',)]
[('1923-12',)]
[('1924-01',)]
[('1924-02',)]
[('1924-03',)]
[('1924-04',)]
[('1924-05',)]
[('1924-06',)]
[('1924-07

In [72]:
con = db.connect('MacroData')
c = con.cursor()
c.execute("DROP TABLE " + seriesID)
con.commit()
con.close()

### Insert Dataframe into MacroData DB

In [None]:
con = db.connect('MacroData')
df.to_sql(name = seriesID, con)


con.close()