In [13]:
import json
import pandas as pd
import requests
import sqlite3 as db
from sqlite3 import Error

%run C:\Users\Joshua\Jupyter_Notebook_Folders\APIkeys.py

### List of Bureau of Labor Statistics Surveys

In [2]:
surveys_url = "https://api.bls.gov/publicAPI/v2/surveys"
s = requests.get(surveys_url)
s_json = s.json()                          # Turn request into JSON 
surveys = s_json["Results"]["survey"]      # Get list of surveys from the JSON
surveys

[{'survey_abbreviation': 'AP',
  'survey_name': 'Consumer Price Index - Average Price Data'},
 {'survey_abbreviation': 'BD', 'survey_name': 'Business Employment Dynamics'},
 {'survey_abbreviation': 'BG',
  'survey_name': 'Collective Bargaining Agreements-State and Local Government'},
 {'survey_abbreviation': 'BP',
  'survey_name': 'Collective Bargaining Agreements-Private Sector'},
 {'survey_abbreviation': 'CC',
  'survey_name': 'Employer Costs for Employee Compensation'},
 {'survey_abbreviation': 'CD',
  'survey_name': 'Nonfatal cases involving days away from work: selected characteristics'},
 {'survey_abbreviation': 'CE',
  'survey_name': 'Employment, Hours, and Earnings from the Current Employment Statistics survey (National)'},
 {'survey_abbreviation': 'CF',
  'survey_name': 'Census of Fatal Occupational Injuries'},
 {'survey_abbreviation': 'CH',
  'survey_name': 'Nonfatal cases involving days away from work: selected characteristics (2003 - 2010)'},
 {'survey_abbreviation': 'CI', 

### Get data from BLS

In [53]:
base_url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'  #this will not change
headers = {'Content-type': 'application/json'}  #This will not changed !

# For the key seriesid enter a list of series names you wish to download
# For the key startyear enter the start year inside ""
# For the key endyear enter the end year inside ""

seriesID = "CUUR0000SA0"
parameters = {
    "seriesid":[seriesID], 
    "startyear":"1959", 
    "endyear":"1978",
    "catalog":True, 
    "calculations":False, 
    "annualaverage":False,
    "aspects":False,
    "registrationkey":os.environ['BLS_API_key'] 
 }

data = json.dumps(parameters) # Converts the Python dictionary to JSON

p = requests.post(base_url, data=data, headers=headers)
json_data = json.loads(p.text)
json_data

{'status': 'REQUEST_SUCCEEDED',
 'responseTime': 300,
 'message': [],
 'Results': {'series': [{'seriesID': 'CUUR0000SA0',
    'catalog': {'series_title': 'All items in U.S. city average, all urban consumers, not seasonally adjusted',
     'series_id': 'CUUR0000SA0',
     'seasonality': 'Not Seasonally Adjusted',
     'survey_name': 'CPI for All Urban Consumers (CPI-U)',
     'survey_abbreviation': 'CU',
     'measure_data_type': 'All items',
     'area': 'U.S. city average',
     'item': 'All items'},
    'data': [{'year': '1978',
      'period': 'M12',
      'periodName': 'December',
      'value': '67.7',
      'footnotes': [{}]},
     {'year': '1978',
      'period': 'M11',
      'periodName': 'November',
      'value': '67.4',
      'footnotes': [{}]},
     {'year': '1978',
      'period': 'M10',
      'periodName': 'October',
      'value': '67.1',
      'footnotes': [{}]},
     {'year': '1978',
      'period': 'M09',
      'periodName': 'September',
      'value': '66.5',
      '

### Create Panda Dataframe

In [54]:
df = pd.DataFrame(json_data['Results']['series'][0]['data'])
df['date'] = df['year'] + df['period']
df['date'] = df['date'].str.replace('M', '-')
df['date'] = df['date'].astype('string')
df = df.sort_values(by=['date'])
df = df.drop(columns='footnotes')
df['value'] = df['value'].astype(float)
df['description'] = 'Consumer Price Index - All Urban Consumers - Not Seasonally Adjusted'
df

Unnamed: 0,year,period,periodName,value,date,description
239,1959,M01,January,29.0,1959-01,Consumer Price Index - All Urban Consumers - N...
238,1959,M02,February,28.9,1959-02,Consumer Price Index - All Urban Consumers - N...
237,1959,M03,March,28.9,1959-03,Consumer Price Index - All Urban Consumers - N...
236,1959,M04,April,29.0,1959-04,Consumer Price Index - All Urban Consumers - N...
235,1959,M05,May,29.0,1959-05,Consumer Price Index - All Urban Consumers - N...
...,...,...,...,...,...,...
4,1978,M08,August,66.0,1978-08,Consumer Price Index - All Urban Consumers - N...
3,1978,M09,September,66.5,1978-09,Consumer Price Index - All Urban Consumers - N...
2,1978,M10,October,67.1,1978-10,Consumer Price Index - All Urban Consumers - N...
1,1978,M11,November,67.4,1978-11,Consumer Price Index - All Urban Consumers - N...


In [33]:
print(df.dtypes)

year            object
period          object
periodName      object
value          float64
date            string
description     object
dtype: object


### Functions for working with SQLite

In [56]:
def create_database(db_name):
    """ 
    create an SQLite database
    """
    conn = None
    try:
        conn = db.connect(db_name)
        print(db.version)
    except Error as e:
        print(e)
    finally:
        conn.close()

            
def create_connection(db_name):
    """ 
    create a database connection to the SQLite database
        specified by db_name
    return: Connection object or None
    """
    conn = None
    try:
        conn = db.connect(db_name)
        return conn
    except Error as e:
        print(e)

    return conn


def create_table(tbl_name ,db_name):
    """
    check if table exists
    """
    conn = create_connection(db_name)
    c = conn.cursor()
    c.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = c.fetchall()
    try: 
        df.to_sql(tbl_name, conn)
    except ValueError as e:
        print(e)
        print("Table was not added to the DB")
        c.execute("SELECT name FROM sqlite_master WHERE type='table';")
        print("Tables already in DB ", c.fetchall())
        c.execute("PRAGMA table_info(",seriesID,");")
        print("Table ", tbl_name, "has columns: ", c.fetchall())
        
    conn.close()
    
    
def check_duplicate_data(tbl_name, db_name):
    """
    check if the data to be entered already exists in the table
    """
    conn = create_connection(db_name)
    c = conn.cursor()
    
    df_not_dup_data = pd.DataFrame(columns = list(df.columns))
    df_dup_data = pd.DataFrame(columns = list(df.columns))
    df_inconsistent_data = pd.DataFrame(columns = list(df.columns))
    
    # Check if data already exists in DB
    for i, row in df.iterrows():
        c.execute("SELECT date, value FROM " + seriesID + " WHERE date = ?;", (df.iloc[i]["date"],))
        queryOut = c.fetchall()
        if len(queryOut) == 0:
            df_not_dup_data = pd.concat([df_not_dup_data, df[i:i+1]])
        elif queryOut[0][1] == df.iloc[i]["value"]:
            df_dup_data = pd.concat([df_dup_data, df[i:i+1]])
            #print(seriesID, " data for ", df.iloc[i]["date"], " already exists with same value as API:", queryOut[0][1])
        elif queryOut[0][1] != df.ioc[i]["value"]:
            df_inconsistent_data = pd.concat([df_inconsistent_data, df[i:i+1]])
            #print(seriesID, " ", df.ioc[i]["date"], "DB DATA: ", queryOut[0][1], " DOES NOT MATCH INPUT DATA: ", )
    conn.close()
    
    df_not_dup_data = df_not_dup_data.sort_values(by=['date'])
    
    return df_not_dup_data, df_dup_data, df_inconsistent_data


def add_to_database(df_EntryData, tbl_name, db_name):    
    conn = create_connection(db_name)
    c = conn.cursor()    
    try:
        df_EntryData.to_sql(seriesID, con=conn, if_exists='append')
        print("Data added to DB: \n", df_EntryData)
    except ValueError as e:
        print(e)
        print("Data was not added to the DB")
    conn.close()

### Create MacroData DB if it does not exist

In [10]:
create_database(r"MacroData.sqlite3")

2.6.0


### Create table if it does not already exist

In [35]:
create_table(seriesID, 'MacroData.sqlite3')

Table 'CUUR0000SA0' already exists.
Table was not added to the DB
Tables already in DB  [('CUUR0000SA0',)]
Table  CUUR0000SA0 has columns:  [(0, 'index', 'INTEGER', 0, None, 0), (1, 'year', 'TEXT', 0, None, 0), (2, 'period', 'TEXT', 0, None, 0), (3, 'periodName', 'TEXT', 0, None, 0), (4, 'value', 'REAL', 0, None, 0), (5, 'date', 'TEXT', 0, None, 0), (6, 'description', 'TEXT', 0, None, 0)]


### Check if data already exists in series

In [58]:

df_AddDB, df_dup_data, df_inconsistent_data = check_duplicate_data(seriesID, 'MacroData.sqlite3')

#df_AddDB.to_sql(seriesID, con=conn, if_exists='append')

print("Duplicate Data: \n", df_dup_data)
if not df_inconsistent_data.empty:
    print("DATA FROM DATAFRAME DOES INCONSISTENT WITH DATA IN DATABASE: \n", df_inconsistent_data)
else:
    del df_inconsistent_data

add_to_database(df_AddDB, seriesID, 'MacroData.sqlite3')
#df_AddDB

Duplicate Data: 
 Empty DataFrame
Columns: [year, period, periodName, value, date, description]
Index: []
Data added to DB: 
      year period periodName value     date  \
239  1959    M01    January  29.0  1959-01   
238  1959    M02   February  28.9  1959-02   
237  1959    M03      March  28.9  1959-03   
236  1959    M04      April  29.0  1959-04   
235  1959    M05        May  29.0  1959-05   
..    ...    ...        ...   ...      ...   
4    1978    M08     August  66.0  1978-08   
3    1978    M09  September  66.5  1978-09   
2    1978    M10    October  67.1  1978-10   
1    1978    M11   November  67.4  1978-11   
0    1978    M12   December  67.7  1978-12   

                                           description  
239  Consumer Price Index - All Urban Consumers - N...  
238  Consumer Price Index - All Urban Consumers - N...  
237  Consumer Price Index - All Urban Consumers - N...  
236  Consumer Price Index - All Urban Consumers - N...  
235  Consumer Price Index - All Urba

In [None]:
#.execute("SELECT date, value FROM " + seriesID + " WHERE date = 1800-01;")
#print(len(c.fetchall()))

In [39]:
conn = db.connect('MacroData.sqlite3')
df_fromDB = pd.read_sql_query("SELECT * FROM " + seriesID + ";", conn)
conn.close()
df_fromDB

Unnamed: 0,index,year,period,periodName,value,date,description
0,239,1919,M01,January,16.5,1919-01,Consumer Price Index - All Urban Consumers - N...
1,238,1919,M02,February,16.2,1919-02,Consumer Price Index - All Urban Consumers - N...
2,237,1919,M03,March,16.4,1919-03,Consumer Price Index - All Urban Consumers - N...
3,236,1919,M04,April,16.7,1919-04,Consumer Price Index - All Urban Consumers - N...
4,235,1919,M05,May,16.9,1919-05,Consumer Price Index - All Urban Consumers - N...
...,...,...,...,...,...,...,...
475,4,1958,M08,August,28.9,1958-08,Consumer Price Index - All Urban Consumers - N...
476,3,1958,M09,September,28.9,1958-09,Consumer Price Index - All Urban Consumers - N...
477,2,1958,M10,October,28.9,1958-10,Consumer Price Index - All Urban Consumers - N...
478,1,1958,M11,November,29.0,1958-11,Consumer Price Index - All Urban Consumers - N...


In [22]:
#con = db.connect('MacroData.sqlite3')
#c = con.cursor()
#c.execute("DROP TABLE " + seriesID)
#con.commit()
#con.close()

### Insert Dataframe into MacroData DB

In [None]:
con = db.connect('MacroData')
df.to_sql(name = seriesID, con)


con.close()