In [1]:
import json
import pandas as pd
import requests
import sqlite3 as db
from sqlite3 import Error

%run C:\Users\Joshua\Jupyter_Notebook_Folders\APIkeys.py

### Functions for working with SQLite

In [2]:
def create_database(db_name):
    """ 
    create an SQLite database
    """
    conn = None
    try:
        conn = db.connect(db_name)
        #print(db.version)
    except Error as e:
        print(e)
    finally:
        conn.close()
            
def create_connection(db_name):
    """ 
    create a database connection to the SQLite database
        specified by db_name
    return: Connection object or None
    """
    conn = None
    try:
        conn = db.connect(db_name)
        return conn
    except Error as e:
        print(e)

    return conn


def check_table_exists(tbl_name, db_name): 
    """
    Check if table exists
    """
    conn = create_connection(db_name)
    c = conn.cursor()
    exists = 0
    c.execute("SELECT count(name) FROM sqlite_master WHERE type='table'")
    if c.fetchone()[0] == 0:
        print("No tables in DB.")
    else:
        try:
            c.execute("SELECT COUNT(name) FROM sqlite_master WHERE type = 'table' AND name='" + seriesID+"';")
        except Error as e:
            print(e)
        if c.fetchone()[0] == 1:
            exists = 1
            
        else:
            print("Table does not exist")        
    conn.close()
    
    return exists 
    
    
def create_table(input_df, tbl_name, db_name):
    """
    create table
    """
    conn = create_connection(db_name)
    c = conn.cursor()
    tbl_exists = check_table_exists(tbl_name ,db_name)
    if tbl_exists:
        print("Table ", tbl_name, " already exists")
    else:
        try: 
            input_df.to_sql(tbl_name, conn)
            print("Created ", tbl_name, " table.")
        except ValueError as e:
            print(e)
            print("Table was not added to the DB")       
    conn.close()
    
    
def check_duplicate_data(df, tbl_name, db_name):
    """
    check if the data to be entered already exists in the table
    """
    conn = create_connection(db_name)
    c = conn.cursor()
    
    df_not_dup_data = pd.DataFrame(columns = list(df.columns))
    df_dup_data = pd.DataFrame(columns = list(df.columns))
    df_inconsistent_data = pd.DataFrame(columns = list(df.columns))
    
    # Check if data already exists in DB
    for i, row in df.iterrows():
        c.execute("SELECT date, value FROM " + tbl_name + " WHERE date = ?;", (df.loc[i]["date"],))
        queryOut = c.fetchall()
        if len(queryOut) == 0:
            df_not_dup_data = pd.concat([df_not_dup_data, df.loc[[i]]])
        elif queryOut[0][1] == df.loc[i]["value"]:
            df_dup_data = pd.concat([df_dup_data, df.loc[[i]]])
            #print(seriesID, " data for ", df.iloc[i]["date"], " already exists with same value as API:", queryOut[0][1])
        elif queryOut[0][1] != df.loc[i]["value"]:
            df_inconsistent_data = pd.concat([df_inconsistent_data, df.loc[[i]]])
            #print(seriesID, " ", df.ioc[i]["date"], "DB DATA: ", queryOut[0][1], " DOES NOT MATCH INPUT DATA: ", )
    conn.close()
    
    df_not_dup_data = df_not_dup_data.sort_values(by=['date'])
    
    return df_not_dup_data, df_dup_data, df_inconsistent_data


def add_to_database(df_EntryData, tbl_name, db_name):  
    """
    Add dataframe to 
    database: db_name 
    table: tbl_name
    """
    conn = create_connection(db_name)    
    try:
        df_EntryData.to_sql(seriesID, con=conn, if_exists='append')
        print("Data added to DB: \n", df_EntryData)
    except ValueError as e:
        print(e)
        print("Data was not added to the DB")
    conn.close()
    
    
def get_max_index(tbl_name, db_name):
    '''
    Get the maximum index value of the specified tbl_name
    '''
    conn = create_connection(db_name)
    c = conn.cursor()
    try:
        c.execute("SELECT MAX([index]) FROM " + seriesID)
        max_index = c.fetchone()[0]
        return max_index
    except Error as e:
        print(e)
    finally:
        conn.close()
    

def table_to_df(tbl_name, db_name):
    conn = db.connect(db_name)
    try:
        df_fromDB = pd.read_sql_query("SELECT * FROM " + tbl_name + ";", conn)
    except Error as e:
        print(e)
    conn.close()
    
    return df_fromDB

### Get data from BLS

In [3]:
def get_BLS_data(seriesID, startyear, endyear):
    base_url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'  #this will not change
    headers = {'Content-type': 'application/json'}  #This will not changed !

    # For the key seriesid enter a list of series names you wish to download
    # For the key startyear enter the start year inside ""
    # For the key endyear enter the end year inside ""

    seriesID = "CUUR0000SA0"
    parameters = {
        "seriesid":[seriesID], 
        "startyear":str(startyear), 
        "endyear":str(endyear),
        "catalog":True, 
        "calculations":False, 
        "annualaverage":False,
        "aspects":False,
        "registrationkey":os.environ['BLS_API_key'] 
     }

    data = json.dumps(parameters) # Converts the Python dictionary to JSON

    p = requests.post(base_url, data=data, headers=headers)
    json_data = json.loads(p.text)
    
    return json_data

### Create and Format Dataframe

In [7]:
def create_df_from_json_for_sqlite(seriesID, db_name):
    # If table exists get the max 
    index_start = 0
    if check_table_exists(seriesID, "MacroData.sqlite3") != 0:
        index_start = get_max_index(seriesID, "MacroData.sqlite3") + 1

    # Create dataframe from JSON
    df = pd.DataFrame(json_data['Results']['series'][0]['data'])

    # Format date column and sort by date
    df['date'] = df['year'] + df['period']
    df['date'] = df['date'].str.replace('M', '-')
    df['date'] = df['date'].astype('string')
    df = df.sort_values(by=['date'], ignore_index=True)

    # Set the index using the value from the max index in the db table
    df["index"] = range(index_start, index_start + len(df.index))
    df = df.set_index(df["index"])

    df = df.drop(columns=['footnotes', 'index'])
    df['value'] = df['value'].astype(float)
    df['description'] = json_data['Results']['series'][0]['catalog']['survey_name'] + ', ' + json_data['Results']['series'][0]['catalog']['series_title']
    
    return df

Unnamed: 0_level_0,year,period,periodName,value,date,description
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
480,1959,M01,January,29.0,1959-01,"CPI for All Urban Consumers (CPI-U), All items..."
481,1959,M02,February,28.9,1959-02,"CPI for All Urban Consumers (CPI-U), All items..."
482,1959,M03,March,28.9,1959-03,"CPI for All Urban Consumers (CPI-U), All items..."
483,1959,M04,April,29.0,1959-04,"CPI for All Urban Consumers (CPI-U), All items..."
484,1959,M05,May,29.0,1959-05,"CPI for All Urban Consumers (CPI-U), All items..."
...,...,...,...,...,...,...
715,1978,M08,August,66.0,1978-08,"CPI for All Urban Consumers (CPI-U), All items..."
716,1978,M09,September,66.5,1978-09,"CPI for All Urban Consumers (CPI-U), All items..."
717,1978,M10,October,67.1,1978-10,"CPI for All Urban Consumers (CPI-U), All items..."
718,1978,M11,November,67.4,1978-11,"CPI for All Urban Consumers (CPI-U), All items..."


### Create MacroData DB if it does not exist

In [4]:
#create_database(r"MacroData.sqlite3")

2.6.0


### Create table if it does not already exist

In [8]:
# If the table does not already exist it will be created and with the columns and values in df
#create_table(df, seriesID, 'MacroData.sqlite3')

Table  CUUR0000SA0  already exists


### Check if data already exists in Database

In [9]:
'''
df_AddDB, df_dup_data, df_inconsistent_data = check_duplicate_data(df, seriesID, 'MacroData.sqlite3')

# Deal with duplicate data
if not df_dup_data.empty:
    print("Duplicate Data: \n", df_dup_data)
else:
    del df_dup_data
    
# Deal with inconsistent data
if not df_inconsistent_data.empty:
    print("DATA FROM DATAFRAME DOES INCONSISTENT WITH DATA IN DATABASE: \n", df_inconsistent_data)
else:
    del df_inconsistent_data

df_AddDB
'''

Unnamed: 0,year,period,periodName,value,date,description
480,1959,M01,January,29.0,1959-01,"CPI for All Urban Consumers (CPI-U), All items..."
481,1959,M02,February,28.9,1959-02,"CPI for All Urban Consumers (CPI-U), All items..."
482,1959,M03,March,28.9,1959-03,"CPI for All Urban Consumers (CPI-U), All items..."
483,1959,M04,April,29.0,1959-04,"CPI for All Urban Consumers (CPI-U), All items..."
484,1959,M05,May,29.0,1959-05,"CPI for All Urban Consumers (CPI-U), All items..."
...,...,...,...,...,...,...
715,1978,M08,August,66.0,1978-08,"CPI for All Urban Consumers (CPI-U), All items..."
716,1978,M09,September,66.5,1978-09,"CPI for All Urban Consumers (CPI-U), All items..."
717,1978,M10,October,67.1,1978-10,"CPI for All Urban Consumers (CPI-U), All items..."
718,1978,M11,November,67.4,1978-11,"CPI for All Urban Consumers (CPI-U), All items..."


### Add data to Database if it does not already exist

In [10]:
#add_to_database(df_AddDB, seriesID, 'MacroData.sqlite3')

Data added to DB: 
      year period periodName value     date  \
480  1959    M01    January  29.0  1959-01   
481  1959    M02   February  28.9  1959-02   
482  1959    M03      March  28.9  1959-03   
483  1959    M04      April  29.0  1959-04   
484  1959    M05        May  29.0  1959-05   
..    ...    ...        ...   ...      ...   
715  1978    M08     August  66.0  1978-08   
716  1978    M09  September  66.5  1978-09   
717  1978    M10    October  67.1  1978-10   
718  1978    M11   November  67.4  1978-11   
719  1978    M12   December  67.7  1978-12   

                                           description  
480  CPI for All Urban Consumers (CPI-U), All items...  
481  CPI for All Urban Consumers (CPI-U), All items...  
482  CPI for All Urban Consumers (CPI-U), All items...  
483  CPI for All Urban Consumers (CPI-U), All items...  
484  CPI for All Urban Consumers (CPI-U), All items...  
..                                                 ...  
715  CPI for All Urban Consu

In [11]:
#df_cpi_not_seasonal = table_to_df(seriesID, 'MacroData.sqlite3')
#df_cpi_not_seasonal

Unnamed: 0,index,year,period,periodName,value,date,description
0,0,1919,M01,January,16.5,1919-01,"CPI for All Urban Consumers (CPI-U), All items..."
1,1,1919,M02,February,16.2,1919-02,"CPI for All Urban Consumers (CPI-U), All items..."
2,2,1919,M03,March,16.4,1919-03,"CPI for All Urban Consumers (CPI-U), All items..."
3,3,1919,M04,April,16.7,1919-04,"CPI for All Urban Consumers (CPI-U), All items..."
4,4,1919,M05,May,16.9,1919-05,"CPI for All Urban Consumers (CPI-U), All items..."
...,...,...,...,...,...,...,...
715,715,1978,M08,August,66.0,1978-08,"CPI for All Urban Consumers (CPI-U), All items..."
716,716,1978,M09,September,66.5,1978-09,"CPI for All Urban Consumers (CPI-U), All items..."
717,717,1978,M10,October,67.1,1978-10,"CPI for All Urban Consumers (CPI-U), All items..."
718,718,1978,M11,November,67.4,1978-11,"CPI for All Urban Consumers (CPI-U), All items..."


In [33]:
#con = db.connect('MacroData.sqlite3')
#c = con.cursor()
#c.execute("DROP TABLE " + seriesID)
#con.commit()
#con.close()