In [11]:
!pip install plotly

import pandas as pd
from urllib.request import urlopen
import json
import plotly.express as px
import seaborn as sb
import numpy as np
import sklearn
from sklearn import linear_model
from sklearn.utils import shuffle

df = pd.read_csv("Vaccine_Hesitancy_for_COVID-19__County_and_local_estimates.csv")

# Shape
print("Number of instances and attributes: {}".format(df.shape))

# Dropping useless columns
df.drop(['County Boundary', 'State Boundary'], axis = 1, inplace = True)

# Sort by State
df.sort_values( 'State' )

# Make sure very state is visible on the maps
df['FIPS Code'] = df['FIPS Code'].apply( lambda x:str(x) if x >= 10000 else '0' + str( x ) )

df['Hesitancy'] = df['Estimated hesitant'] + df['Estimated strongly hesitant']

df.head()

df = df[[
    'Hesitancy',
    'Social Vulnerability Index (SVI)',
    'CVAC level of concern for vaccination rollout',
    ]]

df.fillna(df.mean(), inplace=True)

target = "Hesitancy"

X = np.array(df.drop([target], 1))
y = np.array(df[target])

x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size = 0.25)

linear = linear_model.LinearRegression()

linear.fit(x_train, y_train)
acc = linear.score(x_test, y_test)
print("acc: {}".format(acc))

predictions = linear.predict(x_test)

for x in range( len( predictions ) ) :
    print( predictions[x], x_test[x], y_test[x])
    

# Categories for SVI, CVAC
# Very Low (0.0 - 0.19)
# Low (0.20 - 0.39)
# Moderate (0.40 - 0.59)
# High (0.60 - 0.79)
# Very High (0.80 - 1.0) Concern

# https://plotly.com/python/mapbox-county-choropleth/

with urlopen( 'https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json' ) as response:
    counties = json.load( response )
    
colors_blue  = ["#132C33", "#264D58", '#17869E', '#51C4D3', '#B4DBE9']
colors_dark  = ["#1F1F1F", "#313131", '#636363', '#AEAEAE', '#DADADA']
colors_red   = ["#331313", "#582626", '#9E1717', '#D35151', '#E9B4B4']
colors_green = ['#01411C','#4B6F44','#4F7942','#74C365','#D0F0C0']
#sb.palplot( colors_blue )
#sb.palplot( colors_red )
#sb.palplot( colors_green )
#sb.palplot( colors_dark )

colors = [colors_blue[2], colors_blue[3], colors_blue[4], colors_red[4], colors_red[3], colors_red[2]]

# MAPBOX SVI START
fig = px.choropleth_mapbox(
    df,
    geojson = counties,
    locations = 'FIPS Code',
    color='Social Vulnerability Index (SVI)',
    color_continuous_scale = colors,
    range_color = (0, 1),
    hover_name = df['County Name'],
    hover_data = ['Social Vulnerability Index (SVI)'],
    mapbox_style ="open-street-map",
    zoom = 3,
    center = {"lat": 37.0902, "lon": -95.7129},
    opacity = 0.5,
    title = 'Social Vulnerability Index (SVI)'
)

fig.update_layout(
    margin = {
        'b': 0,
        'l': 0,
        'r':0,
    },
    coloraxis_colorbar = {
        'thickness': 20,
        'outlinecolor': colors_dark[2],
        'outlinewidth': 1,
        'title':''
    },
    title = {
        'font':{
            'family': 'Arial',
            'size': 22,
            'color': colors_dark[2]
        },
        'x': 0.45,
        'y': 0.9
    }
)

fig.show()
# MABOX SVI END

Number of instances and attributes: (3142, 20)
acc: 0.08637472030866644
0.32660573702418627 [0.94 0.99] 0.25
0.2864165650677736 [0.08 0.64] 0.24000000000000002
0.25233668258249 [0.22 0.03] 0.25
0.2867662709412744 [0.54 0.48] 0.14
0.263151743932281 [0.13 0.24] 0.39
0.25810917463217553 [0.15 0.15] 0.22
0.3052619505062796 [0.8  0.69] 0.30000000000000004
0.2489464142688162 [0.01 0.05] 0.41
0.25209024560032073 [0.07 0.08] 0.51
0.3006089862791811 [0.81 0.61] 0.18
0.30667720270746973 [0.92 0.67] 0.35
0.29032200399477087 [0.48 0.56] 0.3
0.2832774278347329 [0.27 0.52] 0.27
0.2793813771046633 [0.37 0.42] 0.28
0.3204659365504787 [0.91 0.9 ] 0.29
0.28837397862973607 [0.53 0.51] 0.29
0.2726865068181262 [0.62 0.22] 0.16999999999999998
0.30962388445323885 [0.86 0.74] 0.38
0.28659611210298785 [0.56 0.47] 0.24000000000000002
0.2746345321831609 [0.57 0.27] 0.28
0.2562550299615446 [0.01 0.17] 0.22
0.3135962133271912 [0.93 0.78] 0.39
0.27506403645567407 [0.09 0.45] 0.43999999999999995
0.29729499610939003 

0.26508216674679463 [0.44 0.16] 0.23
0.26363992379815565 [0.18 0.23] 0.29000000000000004
0.3126691409918757 [0.86 0.79] 0.36
0.298481413878932 [0.38 0.73] 0.39
0.28257918993106557 [0.71 0.35] 0.28
0.2650011945044481 [0.02 0.31] 0.25
0.3211735626510738 [0.97 0.89] 0.38
0.24804633268094958 [0.08 0.01] 0.18
0.2617857791275247 [0.04 0.25] 0.27
0.26903219897176195 [0.62 0.16] 0.27
0.29241789154910725 [0.52 0.58] 0.27
0.2756601793113441 [0.7  0.24] 0.3
0.3150513647278875 [0.58 0.93] 0.27
0.3238022233692548 [0.84 0.98] 0.37
0.2756108919149103 [0.67 0.25] 0.28
0.29875484160855015 [0.67 0.63] 0.33
0.27684659708088616 [0.31 0.4 ] 0.25
0.3130763486154038 [0.49 0.93] 0.25
0.29887571305040284 [0.62 0.65] 0.24000000000000002
0.26436984654773577 [0.13 0.26] 0.26
0.2881099390970457 [0.74 0.43] 0.26
0.2834299841224983 [0.61 0.4 ] 0.21
0.27587023734913685 [0.21 0.42] 0.21000000000000002
0.25913482176035874 [0.28 0.12] 0.37
0.2648803230625954 [0.07 0.29] 0.36
0.3189298129074359 [0.84 0.9 ] 0.28
0.3177340

KeyError: 'County Name'