In [17]:
import json
import pandas as pd
import requests
import sqlite3 as db
from sqlite3 import Error

%run C:\Users\Joshua\Jupyter_Notebook_Folders\APIkeys.py

### List of Bureau of Labor Statistics Surveys

In [3]:
surveys_url = "https://api.bls.gov/publicAPI/v2/surveys"
s = requests.get(surveys_url)
s_json = s.json()                          # Turn request into JSON 
surveys = s_json["Results"]["survey"]      # Get list of surveys from the JSON
surveys

[{'survey_abbreviation': 'AP',
  'survey_name': 'Consumer Price Index - Average Price Data'},
 {'survey_abbreviation': 'BD', 'survey_name': 'Business Employment Dynamics'},
 {'survey_abbreviation': 'BG',
  'survey_name': 'Collective Bargaining Agreements-State and Local Government'},
 {'survey_abbreviation': 'BP',
  'survey_name': 'Collective Bargaining Agreements-Private Sector'},
 {'survey_abbreviation': 'CC',
  'survey_name': 'Employer Costs for Employee Compensation'},
 {'survey_abbreviation': 'CD',
  'survey_name': 'Nonfatal cases involving days away from work: selected characteristics'},
 {'survey_abbreviation': 'CE',
  'survey_name': 'Employment, Hours, and Earnings from the Current Employment Statistics survey (National)'},
 {'survey_abbreviation': 'CF',
  'survey_name': 'Census of Fatal Occupational Injuries'},
 {'survey_abbreviation': 'CH',
  'survey_name': 'Nonfatal cases involving days away from work: selected characteristics (2003 - 2010)'},
 {'survey_abbreviation': 'CI', 

### Functions for working with SQLite

In [33]:
def create_database(db_name):
    """ 
    create an SQLite database
    """
    conn = None
    try:
        conn = db.connect(db_name)
        print(db.version)
    except Error as e:
        print(e)
    finally:
        conn.close()
            
def create_connection(db_name):
    """ 
    create a database connection to the SQLite database
        specified by db_name
    return: Connection object or None
    """
    conn = None
    try:
        conn = db.connect(db_name)
        return conn
    except Error as e:
        print(e)

    return conn


def check_table_exists(tbl_name, db_name): 
    """
    Check if table exists
    """
    conn = create_connection(db_name)
    c = conn.cursor()
    exists = 0
    c.execute("SELECT count(name) FROM sqlite_master WHERE type='table'")
    if c.fetchone()[0] == 0:
        print("No tables in DB.")
    else:
        c.execute("SELECT count(name) FROM sqlite_master WHERE type='table' AND name=" + tbl_name)
        if c.fetchone()[0] == 1:
            exists = 1
            
        else:
            print("Table does not exist")        
    conn.close()
    
    return exists 
    
def create_table(tbl_name, db_name):
    """
    create table
    """
    conn = create_connection(db_name)
    c = conn.cursor()
    tbl_exists = check_table_exists(tbl_name ,db_name)
    if tbl_exists:
        print("Table ", seriesID, " already exists")
    else:
        try: 
            df.to_sql(tbl_name, conn)
            print("Created ", tbl_name, " table.")
        except ValueError as e:
            print(e)
            #print("Table was not added to the DB")
            #c.execute("SELECT name FROM sqlite_master WHERE type='table';")
            #print("Tables already in DB ", c.fetchall())
            #c.execute("PRAGMA table_info(" + seriesID +");")
            #print("Table ", tbl_name, "has columns: ", c.fetchall())        
    conn.close()
    
    
def check_duplicate_data(tbl_name, db_name):
    """
    check if the data to be entered already exists in the table
    """
    conn = create_connection(db_name)
    c = conn.cursor()
    
    df_not_dup_data = pd.DataFrame(columns = list(df.columns))
    df_dup_data = pd.DataFrame(columns = list(df.columns))
    df_inconsistent_data = pd.DataFrame(columns = list(df.columns))
    
    # Check if data already exists in DB
    for i, row in df.iterrows():
        c.execute("SELECT date, value FROM " + seriesID + " WHERE date = ?;", (df.loc[i]["date"],))
        queryOut = c.fetchall()
        if len(queryOut) == 0:
            df_not_dup_data = pd.concat([df_not_dup_data, df[i:i+1]])
        elif queryOut[0][1] == df.loc[i]["value"]:
            df_dup_data = pd.concat([df_dup_data, df[i:i+1]])
            #print(seriesID, " data for ", df.iloc[i]["date"], " already exists with same value as API:", queryOut[0][1])
        elif queryOut[0][1] != df.loc[i]["value"]:
            df_inconsistent_data = pd.concat([df_inconsistent_data, df[i:i+1]])
            #print(seriesID, " ", df.ioc[i]["date"], "DB DATA: ", queryOut[0][1], " DOES NOT MATCH INPUT DATA: ", )
    conn.close()
    
    df_not_dup_data = df_not_dup_data.sort_values(by=['date'])
    
    return df_not_dup_data, df_dup_data, df_inconsistent_data


def add_to_database(df_EntryData, tbl_name, db_name):    
    conn = create_connection(db_name)    
    try:
        df_EntryData.to_sql(seriesID, con=conn, if_exists='append')
        print("Data added to DB: \n", df_EntryData)
    except ValueError as e:
        print(e)
        print("Data was not added to the DB")
    conn.close()
    
def get_max_index(tbl_name, db_name):
    conn = create_connection(db_name)
    c = conn.cursor()
    try:
        c.execute("SELECT MAX(index) FROM " + seriesID)
        max_index = c.fetchone()[0]
    except Error as e:
        print(e)
    conn.close()
    return max_index

In [31]:
print(check_table_exists(seriesID, "MacroData.sqlite3"))

No tables in DB.
0


### Get data from BLS

In [21]:
base_url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'  #this will not change
headers = {'Content-type': 'application/json'}  #This will not changed !

# For the key seriesid enter a list of series names you wish to download
# For the key startyear enter the start year inside ""
# For the key endyear enter the end year inside ""

seriesID = "CUUR0000SA0"
parameters = {
    "seriesid":[seriesID], 
    "startyear":"1919", 
    "endyear":"1938",
    "catalog":True, 
    "calculations":False, 
    "annualaverage":False,
    "aspects":False,
    "registrationkey":os.environ['BLS_API_key'] 
 }

data = json.dumps(parameters) # Converts the Python dictionary to JSON

p = requests.post(base_url, data=data, headers=headers)
json_data = json.loads(p.text)
json_data

{'status': 'REQUEST_SUCCEEDED',
 'responseTime': 270,
 'message': [],
 'Results': {'series': [{'seriesID': 'CUUR0000SA0',
    'catalog': {'series_title': 'All items in U.S. city average, all urban consumers, not seasonally adjusted',
     'series_id': 'CUUR0000SA0',
     'seasonality': 'Not Seasonally Adjusted',
     'survey_name': 'CPI for All Urban Consumers (CPI-U)',
     'survey_abbreviation': 'CU',
     'measure_data_type': 'All items',
     'area': 'U.S. city average',
     'item': 'All items'},
    'data': [{'year': '1938',
      'period': 'M12',
      'periodName': 'December',
      'value': '14.0',
      'footnotes': [{}]},
     {'year': '1938',
      'period': 'M11',
      'periodName': 'November',
      'value': '14.0',
      'footnotes': [{}]},
     {'year': '1938',
      'period': 'M10',
      'periodName': 'October',
      'value': '14.0',
      'footnotes': [{}]},
     {'year': '1938',
      'period': 'M09',
      'periodName': 'September',
      'value': '14.1',
      '

### Create Panda Dataframe

In [27]:
if check_table_exists(seriesID, "MacroData.sqlite3") != 0:
    index_start = get_max_index(tbl_name, "MacroData.sqlite3") + 1

df = pd.DataFrame(json_data['Results']['series'][0]['data'])
df['date'] = df['year'] + df['period']
df['date'] = df['date'].str.replace('M', '-')
df['date'] = df['date'].astype('string')
df = df.sort_values(by=['date'], ignore_index=True)
df = df.drop(columns='footnotes')
df['value'] = df['value'].astype(float)
df['description'] = 'Consumer Price Index - All Urban Consumers - Not Seasonally Adjusted'
df

Unnamed: 0,year,period,periodName,value,date,description
0,1939,M01,January,14.0,1939-01,Consumer Price Index - All Urban Consumers - N...
1,1939,M02,February,13.9,1939-02,Consumer Price Index - All Urban Consumers - N...
2,1939,M03,March,13.9,1939-03,Consumer Price Index - All Urban Consumers - N...
3,1939,M04,April,13.8,1939-04,Consumer Price Index - All Urban Consumers - N...
4,1939,M05,May,13.8,1939-05,Consumer Price Index - All Urban Consumers - N...
...,...,...,...,...,...,...
235,1958,M08,August,28.9,1958-08,Consumer Price Index - All Urban Consumers - N...
236,1958,M09,September,28.9,1958-09,Consumer Price Index - All Urban Consumers - N...
237,1958,M10,October,28.9,1958-10,Consumer Price Index - All Urban Consumers - N...
238,1958,M11,November,29.0,1958-11,Consumer Price Index - All Urban Consumers - N...


In [28]:
print(df.dtypes)

year            object
period          object
periodName      object
value          float64
date            string
description     object
dtype: object


### Create MacroData DB if it does not exist

In [4]:
create_database(r"MacroData.sqlite3")

2.6.0


### Create table if it does not already exist

In [32]:
create_table(seriesID, 'MacroData.sqlite3')

No tables in DB.


NameError: name 'df' is not defined

### Check if data already exists in Database

In [29]:
df_AddDB, df_dup_data, df_inconsistent_data = check_duplicate_data(seriesID, 'MacroData.sqlite3')
print("Duplicate Data: \n", df_dup_data)
if not df_inconsistent_data.empty:
    print("DATA FROM DATAFRAME DOES INCONSISTENT WITH DATA IN DATABASE: \n", df_inconsistent_data)
else:
    del df_inconsistent_data


df_AddDB

Duplicate Data: 
 Empty DataFrame
Columns: [year, period, periodName, value, date, description]
Index: []


Unnamed: 0,year,period,periodName,value,date,description
0,1939,M01,January,14.0,1939-01,Consumer Price Index - All Urban Consumers - N...
1,1939,M02,February,13.9,1939-02,Consumer Price Index - All Urban Consumers - N...
2,1939,M03,March,13.9,1939-03,Consumer Price Index - All Urban Consumers - N...
3,1939,M04,April,13.8,1939-04,Consumer Price Index - All Urban Consumers - N...
4,1939,M05,May,13.8,1939-05,Consumer Price Index - All Urban Consumers - N...
...,...,...,...,...,...,...
235,1958,M08,August,28.9,1958-08,Consumer Price Index - All Urban Consumers - N...
236,1958,M09,September,28.9,1958-09,Consumer Price Index - All Urban Consumers - N...
237,1958,M10,October,28.9,1958-10,Consumer Price Index - All Urban Consumers - N...
238,1958,M11,November,29.0,1958-11,Consumer Price Index - All Urban Consumers - N...


### Add data to Database if it does not already exist

In [30]:
add_to_database(df_AddDB, seriesID, 'MacroData.sqlite3')

Data added to DB: 
      year period periodName value     date  \
0    1939    M01    January  14.0  1939-01   
1    1939    M02   February  13.9  1939-02   
2    1939    M03      March  13.9  1939-03   
3    1939    M04      April  13.8  1939-04   
4    1939    M05        May  13.8  1939-05   
..    ...    ...        ...   ...      ...   
235  1958    M08     August  28.9  1958-08   
236  1958    M09  September  28.9  1958-09   
237  1958    M10    October  28.9  1958-10   
238  1958    M11   November  29.0  1958-11   
239  1958    M12   December  28.9  1958-12   

                                           description  
0    Consumer Price Index - All Urban Consumers - N...  
1    Consumer Price Index - All Urban Consumers - N...  
2    Consumer Price Index - All Urban Consumers - N...  
3    Consumer Price Index - All Urban Consumers - N...  
4    Consumer Price Index - All Urban Consumers - N...  
..                                                 ...  
235  Consumer Price Index - 

In [None]:
#.execute("SELECT date, value FROM " + seriesID + " WHERE date = 1800-01;")
#print(len(c.fetchall()))

In [20]:
conn = db.connect('MacroData.sqlite3')
try:
    df_fromDB = pd.read_sql_query("SELECT * FROM " + seriesID + ";", conn)
except Error as e:
    print(e)
conn.close()
df_fromDB = df_fromDB.set_index("date")
#df_fromDB.drop
df_fromDB

DatabaseError: Execution failed on sql 'SELECT * FROM CUUR0000SA0;': no such table: CUUR0000SA0

In [8]:
#con = db.connect('MacroData.sqlite3')
#c = con.cursor()
#c.execute("DROP TABLE " + seriesID)
#con.commit()
#con.close()

In [27]:
#Get list of tables in DB
#c.execute("SELECT name FROM sqlite_master WHERE type='table';")
#tables = c.fetchall()

No tables in DB.
