DSC540 Term Project
Kurt Stoneburner

California COVID019 Ethnicity Data
https://data.ca.gov/dataset/covid-19-cases/resource/7e477adb-d7ab-4d4b-a198-dc4c6dc634c9

API Example: https://data.ca.gov/api/3/action/datastore_search?resource_id=7e477adb-d7ab-4d4b-a198-dc4c6dc634c9&limit=5

Requests Documentation: https://www.w3schools.com/python/ref_requests_response.asp


In [1]:
import requests
import json
import pandas as pd
from bs4 import BeautifulSoup
import sqlite3
import os
import numpy as np
import datetime

In [2]:
#//*** Build Dictionary to hold Global values.
#//*** Placing Globals in a dictionary, keeps things tidy and helps with scope.
g = {
    #//*** Values for the API call
    "api" : {
        "url" : "https://data.ca.gov",
        "ethnic" : {
            "url" : "/api/3/action/datastore_search?resource_id=7e477adb-d7ab-4d4b-a198-dc4c6dc634c9",
            "colnames" : [], #//*** Column names
            #//*** Additional Column name attributes. Probably not needed. But ingesting anyway.
            #//*** Key - colname, value is attributes
            "attrib" : {}
        },#//*** end Ethnic
        "cases" : {
            "url" : "/api/3/action/datastore_search?resource_id=926fd08f-cc91-4828-af38-bd45de97f8c3",
            "colnames" : [], #//*** Column names
            #//*** Additional Column name attributes. Probably not needed. But ingesting anyway.
            #//*** Key - colname, value is attributes
            "attrib" : {}, 
        }#//*** end Cases
            
        
        
    }, #//*** CLOSE api
    "weburl" : "https://covidtracking.com/data/state/california/cases",

} #//***CLOSE g

In [3]:
#//*** Get the whole database 100 records at a time.
#//*** This request gets the first 100 records. Future calls are handled in a loop
#response = requests.get(g['api']['url']+g["api"]["ethnic"]["url"])

#print(response)
#print(cases_url)


In [4]:
#//*** Build a data frame returning all values from a California Data Source API
def build_df_from_CA_API(url):
    
    #//*** Build the attributes for the API. This includes column names and column attributes which includes column
    #//*** Type and other details

    #//*** Request the URL
    response = requests.get(url)
    
    #//*** Check for valid response
    if response.ok == False:
        #//*** Trouble with API, so some error handling here.
        print("Trouble fetching API data")
        print(response)
    else:
        #//*** Valid Response
        #//*** Convert response.content to a dictionary using JSON
        rawOBJ=json.loads(response.content)
        
        #//*** Peek at the results
        #print(rawOBJ["result"].keys())
        #for key, value in rawOBJ.ites():
        #    print(f"{key} : {value}")
        
        #//*** Initialize list of column names
        colnames = []
        
        #//*** Attrib_dict contains the attributes of each column
        #//*** key = Column name
        #//*** value = dictionary of attributes
        attrib_dict = {}
        
        #//*** Parse the [results][fields] key for data
        rawFields = rawOBJ["result"]['fields']
        
        #//*** Loop through the rawfields dictionary.
        #//*** each LoopOBJ contains a column name and column attributes
        for loopOBJ in rawFields:
            
            #//*** Build temporary attributes for each loop instance
            loopAttrib = {}

            #//*** All Columns have an info field except _id.
            if 'info' in loopOBJ.keys():
                loopAttrib = loopOBJ["info"]

            #//*** Add Type to loopAttrib
            loopAttrib['type'] = loopOBJ['type']

            #//*** The column name is the ID field. Append the id field to the colnames list
            colnames.append(loopOBJ['id'])

            #//*** Assign the attributes dictionary based on column name
            attrib_dict[ loopOBJ['id'] ] = loopAttrib
        
        """
        #//*** Display column names and attributes
        print(f"Column Names: {colnames}")
        
        print("Attrib_dict")
        for x in colnames:
            print(attrib_dict[x])
        """
        
        #//*************************************
        #//*** Process the row an column data 
        #//*************************************

        #//*** Build dictionary to hold raw data (rd)
        rd = {}

        #//*** Use each column as a key, create and empty list for each column
        for x in colnames:
            rd[x] = []

        #//################################################################################################
        #//*** While rawOBJ['success'] is true. Which implies we've successfully retrieved and API request
        #//*** And is our loop mechanism to keep requesting records in 100 record incremenets.
        #//################################################################################################
        while rawOBJ["success"]:

            #//*** Get Records as a List for each entry
            rawRecords = rawOBJ['result']["records"]

            #//*** Print a visual note for each loop iteraction / API call
            print(f"Processing {len(rawRecords)}")

            #//*** Parse Each Record.
            for record in rawRecords:
                #//*** Each Record is an object.
                #//*** Each key is a column name.
                #//*** Loop through the Column names and append the value to the column stored in rd

                #//*** This is the sauce to that converts the object values into lists based on columns
                #//*** It's kind of cool that the sausage is essentially made with two lines of code
                #//*** The rest is just setup and control code.
                for col in colnames:
                    #//*** Assign each element to the appropriate column
                    rd[col].append(record[col])

            #//################################
            #//*** Check if loop needs to end.
            #//################################
            #//*** If the number of records returned is less than the limit, we are done
            if len(rawOBJ['result']['records']) < rawOBJ['result']['limit']:
                print("Quitting Loop")
                break

            #//*** Check if there are more records to grab
            #//*** next contains the URL of the next request
            #//*** The API is limited to 100 records per API request.
            if 'next' in rawOBJ['result']['_links'].keys():
                ##//***API CODE HERE
                nextCall = rawOBJ['result']['_links']['next']

                #//*** Add the Next value to the base API call
                response = requests.get(g['api']['url']+nextCall)
                rawOBJ=json.loads(response.content)
                if rawOBJ["success"] == False:
                    #//*** Break if Success returns False
                    break

            else:
                #//*** Quit Here
                break
        ########################################################
        #//*** END while rawOBJ['success'] == True
        #//*** Data successfully gathered to the rd dictionary
        ########################################################

        #//*** Build the dataframe
        df = pd.DataFrame()

        #//*** Create a column based on the values gathered in rd[column name]
        for col in colnames:
            df[col] = rd[col]

        #//*** return the dataframe, column names, attribute dictionary
        return df,colnames,attrib_dict

#//*** END build_df_from_CA_API
        


In [5]:
####################################################################################################
#//*** Build ethnic_df from the API
#//*** This is broken out as a function to keep the code cleaner
####################################################################################################

covid_ethnic_df = pd.DataFrame

ethnic_url = g['api']['url']+g["api"]["ethnic"]["url"]
covid_ethnic_df = build_df_from_CA_API(ethnic_url)[0]
#covid_ethnic_df, g['api']['ethnic']['colnames'], g['api']['ethnic']['attrib'] = buil_df_from_CA_API(ethnic_url)



Trouble fetching API data
<Response [404]>


TypeError: 'NoneType' object is not subscriptable

In [None]:
####################################################################################################
#//*** Build the covid_cases_df from the API. These are the county COVID numbers by date
#//*** This is broken out as a function to keep the code cleaner
####################################################################################################
cases_url = g['api']['url']+g["api"]["cases"]["url"]
covid_cases_df = build_df_from_CA_API(cases_url)[0]


In [None]:
covid_cases_df.head(20)

In [None]:
print(covid_ethnic_df.columns)
print(covid_ethnic_df['race_ethnicity'].unique())
print(covid_ethnic_df['_id'].unique())
print(covid_ethnic_df.head(10))


In [None]:
print(covid_ethnic_df[covid_ethnic_df['race_ethnicity'] == 'Multi-Race'].iloc[0]['percent_ca_population'])
print(covid_ethnic_df[covid_ethnic_df['race_ethnicity'] == 'Multiracial'].iloc[0]['percent_ca_population'])
print(covid_ethnic_df[covid_ethnic_df['race_ethnicity'] == 'Other'].iloc[0]['percent_ca_population'])
print(covid_ethnic_df[covid_ethnic_df['race_ethnicity'] == 'Other'].iloc[0]['deaths'])
print(covid_ethnic_df[covid_ethnic_df['race_ethnicity'] == 'Other'].tail(5))



In [None]:
#//*** Process Website using Beautiful Soup
response = requests.get(g['weburl'])

if response.ok == True:
    #//*** Make soup...Beautiful Soup
    soup = BeautifulSoup(response.content,'html.parser')

In [None]:
table = soup.find('table')

#//*** Get the Table Headers. These will be our data frame Columns.
ths = table.find_all("th")

#//*** initialize a list to hold the column names
colnames = []

#//*** Columnnames are the first value contained in contents
for th in ths:
    if th.contents[0] == 'Cases (confirmed plus probable)':
        colnames.append('confirmed')    
    else: 
        colnames.append(th.contents[0])

print(colnames)

#//**********************************
#//*** Initialize tableDict.
#//**********************************
#//*** tableDict is a dictionary container to hold row data.
#//*** The tableDict will hold each of the row lists. The keys will be each colname
tableDict = {}

#//*** Initialize tableDict
for name in colnames:
    tableDict[name] = []

#//***********************************************
#//*** Process each tablerow
#//*** The sausage is primarily made here
#//***********************************************

#//*** Get a BS list of table rows 
trs = table.find_all("tr")

#//*** For each table row in tablerows
for tr in trs:
    #//*** Skip the table header
    if len(tr.find_all("th")) == 0:
        #//*** Loop through the colnames Index
        #//*** The gets the key value to store the TD data
        #//*** Get a TD with a corresponding index value and extract the text
        for x in range(0,len(colnames)):
            #//*** Append the text to the appropriate colname list.
            #//*** Using index values keeps everthing aligned.
            #print(tr.find_all('td')[x].find_all('span')[1].contents)
            tableDict[colnames[x]].append(tr.find_all('td')[x].find_all('span')[1].contents[0])

#print(tableDict)

#//*** Remove the Probable Cases Column. These are all N/A
colnames.remove('Probable Cases')
colnames.remove('Confirmed cases')
print(colnames)

#//*** Build the initial dataframe
covid_project_df = pd.DataFrame()

#//*** Convert the Date Object to a datetime object
tableDict['Date'] = [datetime.datetime.strptime(tableDict['Date'][x],'%B %d, %Y') for x in range(0,len(tableDict['Date'])) ]
print(tableDict.keys())
for x in range(0,len(tableDict['confirmed'])):
    tableDict['confirmed'][x] = tableDict['confirmed'][x].replace(",","")
    tableDict['New cases'][x] = tableDict['New cases'][x].replace(",","")

#//*** Loop through each column name in colnames.
#//*** Each col is a key in tableDict. Add each key / list to the dataframe
for col in colnames:
    if col in ['confirmed','New cases']:
        covid_project_df[col] = pd.Series(tableDict[col])
    else:
        covid_project_df[col] = pd.Series(tableDict[col])

covid_project_df.rename(columns = {"New cases":"confirmed_new","Date" : "date"},inplace=True)
print(covid_project_df.head(5))



In [None]:
#//*** Process Flat File: California Ethnicity demographics - cc-est2019-alldata-06.csv
raw_ethnic_pop_df = pd.read_csv("cc-est2019-alldata-06.csv")

#//*** Data includes values for last twelve years. We only want data for the last year.

#//*** Rebuild raw_ethnic_pop_df using only the last year (most recent) data
raw_ethnic_pop_df = raw_ethnic_pop_df[raw_ethnic_pop_df['YEAR']==raw_ethnic_pop_df['YEAR'].max()]

#//*** Ethnic data is broken down by age. At this stage we will only use the totals of all ages
#//*** Only use AGEGRP == 0
raw_ethnic_pop_df = raw_ethnic_pop_df[raw_ethnic_pop_df['AGEGRP']==raw_ethnic_pop_df['AGEGRP'].min()]


raw_ethnic_pop_df.head(20)

#//*** More processing below. 

In [None]:
#//*** Import data from the community resillance estimate
raw_res_df = pd.read_csv('cre-2018-a11.csv')

#//*** Only collect California Fields.
#//*** State Code is 6. Reference the value stored in the raw_ethnic_pop_df to keep everything linked dynamically.
#//*** You never know when data sources get moved around. At least the federal definitions of things should remain
#//*** constant if they ever have a reason for change.

#//*** California Only results
raw_res_df = raw_res_df[raw_res_df['state'] == raw_ethnic_pop_df['STATE'].iloc[0]]

#//*** Risk Factor Groups are presummed by county in tract 0
#//*** Gather all values of tract 0
raw_res_df = raw_res_df[raw_res_df['tract'] == 0 ]

#//*** This generates a table reflecting three sets of risk factors by county, 0, 1-2, 3 or more risk factors.
#//*** Each of these rows needs to be converted to a column that can be added as an attribute for each county in 
#//*** pop_attrib_df

#//*** Use rf (risk factor) as a container for lists that will be turned back into Series.
#//*** Adding to existing Series and DataFrames is expensive (whatever that means). 
#//*** Therefore data should be built into lists and converted back into dataframes once it's all assembled. 
#//*** Keeping the county fibs value (rf_fibs) since it's the key to keeping the data linked
rf = {"rf_fibs" : [],
      "0rf_num" : [],
      "0rf_rate" : [],
      "0rf_err" : [],
      "1-2rf_num" : [],
      "1-2rf_rate" : [],
      "1-2rf_err" : [],
      "3plrf_num" : [],
      "3plrf_rate" : [],
      "3plrf_err" : [],
}

#################################################################################################
#//*** rf_key_dict associates the rfgrp value with it's corresponding dictionary list.
#//*** The goal is to hardcode these tables, then handle the work with a generic loop thing
#//*** If we need to adjust data collection at later time we should only have to adjust these
#//*** Structures
#################################################################################################
rf_key_dict = {
    "0RF" : {"prednum" : "0rf_num", "predrt" : "0rf_rate", "predrt_moe" : "0rf_err"},
    "1-2RF" : {"prednum" : "1-2rf_num", "predrt" : "1-2rf_rate", "predrt_moe" : "1-2rf_err"},
    "3PLRF" : {"prednum" : "3plrf_num", "predrt" : "3plrf_rate", "predrt_moe" : "3plrf_err"}
} 

####################################################################################################
#//*** My preference is to get a list of unique counties extract the data with a loop of counties. 
#//*** Let's do right by DSC530 and use the groupby command
############################################################################

#//*** BEGIN - raw_res_df.groupby('county')
for county_tuple in raw_res_df.groupby('county'):
    #//*** The county is the first value of the group tuple, since it's the groupby field.
    rf["rf_fibs"].append(county_tuple[0])
    
    #//*** Get the resulting dataframe containing just the county values
    loop_df = county_tuple[1]
    
    ####################################################################################################
    #//*** Loop through the 3 different Risk Factor Groups, by using the key values in rf_key_dict
    ####################################################################################################
    
    #//*** BEGIN - rf_key in rf_key_dict.keys():
    for rf_key in rf_key_dict.keys():
        
        #//*** Loop through the sub dictionary to associate the column/row value with the correct list
        #//*** if rf.
        #//*** rf_key - Risk Factor group 0rf, 1-2rf, 3plrf. There is one of each value per row
        #//*** column - Data frame Column
        #//*** key_dict_list - is the key value in rf. Each item is stored in a list.
        
        #//*** BEGIN - column, key_dict_list in rf_key_dict[rf_key].items():
        for column, key_dict_list in rf_key_dict[rf_key].items():
            #print(f"{rf_key} - {column} : {key_dict_list}")
            
            #//*** There's a lot going on here, let's spread it out to be more readable
            #//*** Grab each risk factor grop value (rfgrp) by column (prednum,predrt,predrt_moe) 
            loop2_value = loop_df[loop_df['rfgrp'] == rf_key][column].iloc[0]
            
            #//*** Assign the loop2_value to the appropriate list which is defined by the value: key_dict_list
            rf[key_dict_list].append(loop2_value)
        
        #//*** END - column, key_dict_list in rf_key_dict[rf_key].items():    
        
    #//*** END - rf_key in rf_key_dict.keys():
    
#//*** END - raw_res_df.groupby('county')


In [None]:
#//*** Build a dataframe to check our work
risk_factors_df = pd.DataFrame()

#//*** Add columns to the DataFrame by looping through rf
for key,value in rf.items():
        risk_factors_df[key] = pd.Series(value)

#//*** Reindex to county fibs designation
risk_factors_df = risk_factors_df.set_index('rf_fibs')

#//*** Print the dataframes and check the columns match
print(risk_factors_df.head(10))
print(raw_res_df.head(30))

In [None]:
#//*** Convert Applicable federal based census codes to California Census Codes.
#//*** Description of Federal Column Values
#//*** https://www2.census.gov/programs-surveys/popest/technical-documentation/file-layouts/2010-2019/cc-est2019-alldata.pdf

#//*** Notably, Federal census regards Hispanic as an ethnicity not a race. For Example: People can be Hispanic White,
#//*** Hispanic Black, or Hispanic Asian.
#//*** California treats all hispanics as Latino
#//*** Latino = H_MALE, H_FEMALE Hispanic
#//*** White - NHWA_MALE, NHWA_FEMALE (Not Hispanic White)
#//*** Asian - NHAA_MALE, NHAA_FEMALE (Not Hispanic Asian) 
#//*** Black - NHBA_MALE, NHBA_FEMALE (Not Hispanic Black) 

#//*** Amer Indian - NHIA_MALE, NHIA_FEMALE (Not Hispanic, American Indian) 

#//*** Hawaiian - NHNA_MALE, NHNA_FEMALE (Not Hispanic, Hawaiian) 

#//*** California has the following columns: Multiracial, Other, Multirace. I could not find a good definition of these
#//*** These represent less than 5% of the population. Small but not too small to be ignored. These will combined into
#//*** Single attribute Other and combined with NHTOM_MALE, NHTOM_FEMALE - Not Hispanic Two or more races

#//*** Build a new data frame to hold the sanitized values.
pop_attrib_df = pd.DataFrame()

#//*** The County Fibs code is shared between the federal census data and the Community Resilliance Estimate
pop_attrib_df['cty_fibs'] = raw_ethnic_pop_df['COUNTY']

#//*** County Name will be the Common attribute to link to the timeseries Data.
#//*** Standardize the County name. Remove County from the column name 
pop_attrib_df['county'] = raw_ethnic_pop_df['CTYNAME'].str.replace(" County","")
pop_attrib_df['population'] = raw_ethnic_pop_df['TOT_POP']

clean_cols = { 'Latino' : ['H_MALE', 'H_FEMALE'], 
              'White' : ['NHWA_MALE', 'NHWA_FEMALE'],
              'Asian' : ['NHAA_MALE', 'NHAA_FEMALE'],
              'Black' : ['NHBA_MALE', 'NHBA_FEMALE'],
              'American Indian or Alaska Native' : ['NHIA_MALE','NHIA_FEMALE'],
              'Hawaiian' : ['NHNA_MALE', 'NHNA_FEMALE'],
              'Multiracial' : ['NHTOM_MALE', 'NHTOM_FEMALE']
            
            }

#//*** Combine male and female columns and store to column with same name as California Data
#//*** Loop through the clean_cols dictionary, key is California name, value is Federal columns to combine
#//*** These are the easy 1:1 columns
#//*** Hawaiian and Other will need adjustment in the Califnornia Side of the Dataset.


#//*** California Column name = Federal category male + Federal Category female
for ca_name,fed_names in clean_cols.items():
    pop_attrib_df[ca_name] = raw_ethnic_pop_df[fed_names[0]] + raw_ethnic_pop_df[fed_names[1]] 

#              'Native Hawaiian or Pacific Islander' :
#              'Native Hawaiian and other Pacific Islander'
#            'Other'

#//*** Assign the index to the county fibs number
pop_attrib_df = pop_attrib_df.set_index('cty_fibs')

#//*** Join risk factors with pop attrib
pop_attrib_df = pop_attrib_df.join(risk_factors_df)

#//*** We've successfully combined ethnic/racial population data with estimated COVID risk factors.
#//*** I should run some correlations just for fun. I suspect there is something interesting to find

print(pop_attrib_df.head(20))



In [None]:
#//*** Export all Data frames to CSVs

#//*** Export API DataFrames to File
covid_ethnic_df.to_csv("z_covid_ethnic_df.csv")
covid_cases_df.to_csv("z_covid_cases_df.csv")

#//*** Export pop_attrib_df to CSV
pop_attrib_df.to_csv("z_pop_attrib_df.csv")

#//*** Export Covid Tracking Project Dataframe to CSV
covid_project_df.to_csv("z_covid_project_df.csv")

In [None]:
#//*** Columns to remove from imported CSVs. We should be able to kill these on import if we were cool.
#//*** But we're not, so we'll use an expedient column delete list.

del_cols = ['Unnamed: 0', '_id']
#//*** Load datframes from file, because we mess them up
covid_ethnic_df = pd.read_csv("z_covid_ethnic_df.csv")
covid_cases_df = pd.read_csv("z_covid_cases_df.csv")
pop_attrib_df = pd.read_csv("z_pop_attrib_df.csv")
covid_project_df = pd.read_csv("z_covid_project_df.csv")

#//***********************************************************************************
#//*** Remove excess columns from read_csv
#//*** Use the loop in case we need to delete columns that are not exclusive to all
#//***********************************************************************************
for x in del_cols:
    if x in covid_cases_df.columns:
        covid_cases_df.drop([x], axis=1, inplace=True)

    if x in covid_ethnic_df.columns:
        covid_ethnic_df.drop([x], axis=1, inplace=True)
    
    if x in pop_attrib_df.columns:
        pop_attrib_df.drop([x], axis=1, inplace=True)
    
    if x in covid_project_df.columns:
        covid_project_df.drop([x], axis=1, inplace=True)
        
#print(covid_cases_df.head())
#print(covid_ethnic_df.head())
#print(pop_attrib_df.head())
#print(covid_project_df.head())



In [None]:
races = covid_ethnic_df['race_ethnicity'].unique()

for x in races:
    print(f"{x} {covid_ethnic_df[ covid_ethnic_df['race_ethnicity'] == x ]['race_ethnicity'].count()}")

In [None]:
#//*** Sort Time Series by date and reset index
covid_cases_df = covid_cases_df.sort_values(by='date')
covid_ethnic_df = covid_ethnic_df.sort_values(by='date')

#//*** Reset the index
#covid_ethnic_df.reset_index(inplace=True)





#//*** Get first Ethnic_df date
ethnic_start_date = covid_ethnic_df['date'].iloc[0]
print(f"Ethinic State: {ethnic_start_date}")

#//*************************************************************************************************
#//*** Get the iloc (index #) of the first covid_case_df entry to match the date in covid_ethic_df
#//*** Compound code
#//*** 1. Get the entries where the date matches ethnic start date
#//*** 2. Get the first value from the list
#//*** 3. Get the Index (name) of that entry
#//*** 4. Get the iloc value of the name entry. This is the value to slice from covid_cases_df
#//*************************************************************************************************
#//*** I hate these, but I see the appeal
#//*************************************************************************************************
covid_start_iloc = covid_cases_df.index.get_loc(covid_cases_df[ covid_cases_df['date'] == ethnic_start_date].iloc[0].name)

#print(covid_cases_df.iloc[covid_start_iloc])
#//*** Merge Time Series covid_ethnic_df - covid_cases_df


#//*** Start the Bg Table DF with a subset of
bt_df = covid_cases_df.iloc[covid_start_iloc:]
print
print(covid_ethnic_df.head())
print(bt_df.head())
    
    


In [None]:
races = covid_ethnic_df['race_ethnicity'].unique()

for x in races:
    print(f"{x} {covid_ethnic_df[ covid_ethnic_df['race_ethnicity'] == x ]['race_ethnicity'].count()}")
    
#//*** Reokace Native hawaiian and/or Paciic islander with Hawaiian
#//*** It makes sense to combine these categories. Calling them all Hawaiian is a tad insensitive. I should change it.

#covid_ethnic_df['race_ethnicity']=covid_ethnic_df['race_ethnicity'].str.replace('Native Hawaiian or Pacific Islander','Hawaiian')
#covid_ethnic_df['race_ethnicity']=covid_ethnic_df['race_ethnicity'].str.replace('Native Hawaiian and other Pacific Islander','Hawaiian')
#covid_ethnic_df['race_ethnicity']=covid_ethnic_df['race_ethnicity'].str.replace('Multi-Race','Multiracial' )

In [None]:

races = covid_ethnic_df['race_ethnicity'].unique()

for x in races:
    print(f"{x} {covid_ethnic_df[ covid_ethnic_df['race_ethnicity'] == x ]['race_ethnicity'].count()}")

#//*** Convert daily ethnic covid numbers to attributes
ethnic_dates = covid_ethnic_df.groupby('date')

for date_df in ethnic_dates:
   # print(date_df[1].stack())
    print(date_df[1])
    break

bt_dates = bt_df.groupby('date')

for date_df in bt_dates:
    print(date_df[1].sort_values('county'))
    break



In [None]:
#['Native Hawaiian or Pacific Islander','Native Hawaiian and other Pacific Islander']
#'Hawaiian'

print(f"{covid_ethnic_df=}".split('=')[0])

In [None]:
#//**** Dump all data to a database
#//*** Build a new database
#//*** Start from scratch each run. Therefore delete any previous version
#//*** Databases DON'T run this way. This is for expedience. The alternative is to create a living database and 
#//*** Update the tables as needed. That's a bit 
db_filename = 'covid_data.sqldb'

#//*** Delete the previous db instance if it exists.
if os.path.exists(db_filename):
    os.remove(db_filename)
    
#//*** Start a database instance
con = sqlite3.connect(db_filename)

#//*** Send each Dataframe to the database.
#//*** We'll use a string list to name the dataframes, then convert the string name to the loop_df (loop dataframe)
#//*** Which will do the individual database loading process
for df_name in ['covid_ethnic_df','covid_cases_df','pop_attrib_df','covid_project_df']:

    #//******************************************    
    #//*** Build loop_df based on string name
    #//******************************************
    if df_name == 'covid_ethnic_df':
        loop_df = covid_ethnic_df
    elif df_name == 'covid_cases_df':
        loop_df = covid_cases_df
    elif df_name == 'pop_attrib_df':
        loop_df = pop_attrib_df
    elif df_name == 'covid_project_df':
        loop_df = covid_project_df
    else:
        #//*** Display and error message for items missed. 
        print(f"Failed to process: {df_name}")
        continue
    loop_df.to_sql(df_name, con=con)


#//*** Close and Exit the Database. For 
con.close()
con.__exit__

#print(covid_ethnic_df.dtypes)

#for x in range(0,len(covid_ethnic_df.columns)):
#    print(f"{covid_ethnic_df.columns[x]} {covid_ethnic_df.dtypes[x]}")
