In [1]:
import requests
import pandas as pd
from matplotlib import pyplot as plt
from scipy.stats import gamma
from sklearn import linear_model
import numpy as np

In [2]:
import c3aidatalake

In [3]:
# Returns datetime object of the given date string
# Compare dates with >, <, or ==
def timeFormat(s):
  return datetime.strptime(s, "%Y-%m-%dT%H:%M:%SZ")

# Returns float timestamp (seconds since 1 Jan 1970)
# Compare timestamps with >, <, or ==
def timestampFormat(s):
  return timeFormat(s).timestamp()

# How to convert column of date strings to these above formats:
# DFtimeFormat(dataframe name, string of column header which contains the time strings)
def DFtimeFormat(df, column_of_timestrings):
  df[column_of_timestrings] = df[column_of_timestrings].apply(timeFormat)
  return

###Todo (by 11/3):
- Quantify government policy categories (LocationPolicySummary)
    - Bigger values for more restrictive policies
- Convert economic data to usable format
- Produce/visualize distributions and general statistics for data
    - Visualize dates of latest policies for states


In [4]:
policies = c3aidatalake.fetch(
    "locationpolicysummary", 
    {
        "spec" : {
            "limit" : -1
        }
    })

Generating numerical columns from statePolicies:

Sora's Dictionaries:
'mandatoryQuarantine',
       'nonEssentialBusiness', 'largeGatherings', 'schoolClosure',
       'restaurantLimit', 'barClosures', 'faceCoveringRequirement',
       'PrimaryElectionPostponement', 'emergencyDeclaration',


In [5]:
{
    "tags": [
        "hide-input",
    ]
}
quantifyDicts = {}
quantifyDicts["easingOrder"] = {
    "Reopened" : 0, 
    "Proceeding with Reopening" : 1,
    "Paused" : 2, 
    "New Restrictions Imposed" : 3
}
quantifyDicts["stayAtHome"] = {
    "No Action" : 0,
    "Lifted" : 0,
    "Rolled Back to High Risk Groups" : 1,
    "New Stay at Home Order" : 2,
    "Statewide" : 2
}
quantifyDicts["mandatoryQuarantine"]  = {
    "No Action" : 0,
    "Lifted" : 0,
    "From Certain States (New)" : 1,
    "Rolled Back to Certain States" : 1,
    "From Certain States" : 1,
    "Rolled Back to International Travel" : 2,
    "All Travelers" : 3
}
quantifyDicts["nonEssentialBusiness"] = {
    "No Action" : 0,
    "All Non-Essential Businesses Permitted to Reopen" : 0,
    "Some Non-Essential Businesses Permitted to Reopen" : 1,
    "All Non-Essential Businesses Permitted to Reopen with Reduced Capacity" : 1,
    "Some Non-Essential Businesses Permitted to Reopen with Reduced Capacity" : 2,
    "New Business Closures or Limits" : 3
}
quantifyDicts["largeGatherings"] = {
    "Lifted" : 0,
    "No Action" : 0,
    "Expanded to New Limit Above 25" : 1,
    "New Limit on Large Gatherings in Place" : 1,
    "Expanded to New Limit of 25 or Fewer" : 2,
    ">10 People Prohibited" : 3,
    "All Gatherings Prohibited" : 4
}
quantifyDicts["schoolClosure"] = {
    "Rescinded" : 0,
    "Recommended Closure for School Year" : 1,
    "Recommended Closure" : 2,
    "Closed for School Year" : 3,
    "Closed" : 4
}
quantifyDicts["restaurantLimit"] = {
    "No Action" : 0,
    "Reopened to Dine-in Service" : 1,
    "Reopened to Dine-in Service with Capacity Limits" : 2,
    "New Service Limits" : 3,
    "Newly Closed to Dine-in Service" : 3
}
quantifyDicts["barClosures"] = {
    "Reopened" : 0,
    "New Service Limits" : 1,
    "Closed" : 2,
    "Newly Closed" : 2
}
quantifyDicts["faceCoveringRequirement"] = {
    "No" : 0,
    "Required for Certain Employees" : 1,
    "Allows Local Officals to Require for General Public" : 1,
    "Required for Certain Employees; Allows Local Officials to Require for General Public" : 1,
    "Required for General Public" : 2
}
quantifyDicts["PrimaryElectionPostponement"] = {
    "No" : 0,
    "Postponed" : 1,
    "Canceled" : 2
}
quantifyDicts["emergencyDeclaration"] = {
    "Yes" : 0
}
quantifyDicts["waiveTreatmentCost"]  = {
    "No Action" : 0,
    "State-Insurer Agreement" : 1,
    "State Requires" : 2
}  
quantifyDicts["freeVaccine"] = {
    "No Action" : 0,
    "State-Insurer Agreement" : 1,
    "State Requires" : 2
}
quantifyDicts["waiverOfPriorAuthorizationRequirements"] = {
    "No Action" : 0,
    "For COVID-19 Testing" : 1,
    "For COVID-19 Testing and Treatment" : 2
}
quantifyDicts["prescriptionRefill"] = {
    "No Action" : 0,
    "Expired" : 1,
    "State Requires" : 2
}
quantifyDicts["premiumPaymentGracePeriod"] = {
    "No Action" : 0,
    "Expired" : 1,
    "COVID-19 Diagnosis/Impacts Only" : 2,
    "Grace Period Extended for All Individual Policies" : 3,
    "All Policies" : 4
}
quantifyDicts["marketplaceSpecialEnrollmentPeriod"] = {
    "No" : 0,
    "Ended" : 1,
    "Active" : 2
}
quantifyDicts["section1135Waiver"] = {
    "Unapproved" : 0,
    "Approved" : 1
}
quantifyDicts["paidSickLeaves"] = {
    "No Action" : 0,
    "Proposed - March 2020" : 1,
    "Enacted" : 2
}
quantifyDicts["expandsAccesstoTelehealthServices"] = {
    "No" : 0,
    "Yes" : 1
}

In [6]:
# dropping U.S. national policy
statePolicies = policies.drop(44)

In [7]:
def mapperGenerator(colName):
    if colName in quantifyDicts.keys():
        def mapper(val):
            return quantifyDicts[colName][val]
    else:
        def mapper(val):
            return val
    return mapper

In [8]:
# quantify policy
for col in statePolicies.columns:
    mper = mapperGenerator(col)
    statePolicies[col] = statePolicies[col].apply(mper)

In [9]:
# read in stategdp info
# adding quarter 2 gdp change as a column to statepolicies
import re
stategdp = pd.read_excel(io = "qgdpstate1020_0.xlsx", index_col = 0, header = 1)
Q2GDPChange = []
for id in statePolicies["id"]:
    m = re.match(r"(.+)_UnitedStates_Policy$", id)
    Q2GDPChange.append(stategdp["2020Q2"][m[1]])
statePolicies.insert(len(statePolicies.columns), "Q2GDPChange", Q2GDPChange)

Unnamed: 0,easingOrder,stayAtHome,mandatoryQuarantine,nonEssentialBusiness,largeGatherings,schoolClosure,restaurantLimit,barClosures,faceCoveringRequirement,PrimaryElectionPostponement,...,marketplaceSpecialEnrollmentPeriod,section1135Waiver,paidSickLeaves,expandsAccesstoTelehealthServices,id,lastSavedTimestamp,numSavedVersions,savedVersion,location.id,Q2GDPChange
0,2,0,0,1,0,3,1,0,2,0,...,0,1,0,0,Alabama_UnitedStates_Policy,2020-09-12T02:52:54Z,6,-1,Alabama_USA,-29.6
1,1,0,3,0,0,3,1,0,1,0,...,0,1,0,1,Alaska_UnitedStates_Policy,2020-09-12T02:52:54Z,5,-1,Alaska_UnitedStates,-33.8
2,3,0,0,3,1,3,3,2,1,0,...,0,1,2,1,Arizona_UnitedStates_Policy,2020-09-12T02:52:54Z,5,-1,Arizona_UnitedStates,-25.3
3,2,0,0,0,0,3,2,0,2,0,...,0,1,0,1,Arkansas_UnitedStates_Policy,2020-09-12T02:52:54Z,4,-1,Arkansas_UnitedStates,-27.9
4,3,2,0,3,4,1,3,2,2,0,...,2,1,2,1,California_UnitedStates_Policy,2020-09-12T02:52:54Z,5,-1,California_UnitedStates,-31.5
5,3,0,0,2,2,3,2,2,2,0,...,1,1,2,1,Colorado_UnitedStates_Policy,2020-09-12T02:52:54Z,5,-1,Colorado_UnitedStates,-28.1
6,1,0,1,1,2,3,2,2,2,1,...,1,1,2,1,Connecticut_UnitedStates_Policy,2020-09-12T02:52:54Z,5,-1,Connecticut_UnitedStates,-31.1
7,3,0,0,2,1,3,2,1,2,1,...,0,1,0,1,Delaware_UnitedStates_Policy,2020-09-12T02:52:54Z,5,-1,Delaware_UnitedStates,-21.9
8,1,0,1,2,1,3,2,2,2,0,...,2,1,2,1,District of Columbia_UnitedStates_Policy,2020-09-12T02:52:54Z,5,-1,DistrictofColumbia_UnitedStates,-20.4
9,3,0,0,1,0,1,2,2,1,0,...,0,1,0,0,Florida_UnitedStates_Policy,2020-09-12T02:52:54Z,5,-1,Florida_UnitedStates,-30.1


In [10]:
statePolicies.columns

Index(['easingOrder', 'stayAtHome', 'mandatoryQuarantine',
       'nonEssentialBusiness', 'largeGatherings', 'schoolClosure',
       'restaurantLimit', 'barClosures', 'faceCoveringRequirement',
       'PrimaryElectionPostponement', 'emergencyDeclaration',
       'waiveTreatmentCost', 'freeVaccine',
       'waiverOfPriorAuthorizationRequirements', 'prescriptionRefill',
       'premiumPaymentGracePeriod', 'marketplaceSpecialEnrollmentPeriod',
       'section1135Waiver', 'paidSickLeaves',
       'expandsAccesstoTelehealthServices', 'id', 'lastSavedTimestamp',
       'numSavedVersions', 'savedVersion', 'location.id', 'Q2GDPChange'],
      dtype='object')

In [11]:
X = statePolicies[['easingOrder', 'stayAtHome', 'mandatoryQuarantine', 'nonEssentialBusiness', 
                   'largeGatherings', 'schoolClosure', 'restaurantLimit', 'barClosures', 
                   'faceCoveringRequirement', 'paidSickLeaves']]
Y = statePolicies["Q2GDPChange"]

LinearRegression()

In [13]:
xVars = ['easingOrder', 'stayAtHome', 'mandatoryQuarantine', 'nonEssentialBusiness', 'largeGatherings', 
         'schoolClosure', 'restaurantLimit', 'barClosures', 'faceCoveringRequirement', 'paidSickLeaves']
for i in range(len(xVars)):
    print("Coefficient of ", xVars[i], ": ", regr.coef_[i])

Coefficient of  easingOrder :  -0.9262141815009662
Coefficient of  stayAtHome :  -1.05923749331456
Coefficient of  mandatoryQuarantine :  -1.4291948106393213
Coefficient of  nonEssentialBusiness :  -0.35188468850207033
Coefficient of  largeGatherings :  -0.16549603747075345
Coefficient of  schoolClosure :  0.628377215294127
Coefficient of  restaurantLimit :  -0.5827684084902
Coefficient of  barClosures :  1.8856612498699614
Coefficient of  faceCoveringRequirement :  -1.0095592159160618
Coefficient of  paidSickLeaves :  1.1788408282659348


In [14]:
print(regr.intercept_)

-29.553278063763223


In [15]:
print(regr.score(X, Y))

0.25676549737962395
