In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as st
from scipy.stats import linregress
from config import gkey
import gmaps
from ipywidgets.embed import embed_minimal_html
import seaborn as sns
import plotly.express as px
import re
import gmaps.geojson_geometries
from matplotlib.cm import viridis
from matplotlib.cm import Spectral
from matplotlib.colors import to_hex
import country_converter as coco

ModuleNotFoundError: No module named 'config'

# Data Pull

In [None]:
ultimate_no_null_df = pd.read_csv("ultimate_no_null.csv")
ultimate_no_null_df.head(-1)

In [None]:
ultimate_no_null_df.loc[ultimate_no_null_df["standard_names"]=="Vanuatu"]

In [None]:
covid_stats_df =  pd.read_csv("covid_stats_2020.csv")
covid_stats_df.head()

In [None]:
sample = pd.read_csv("samples.csv")
sample

In [None]:
covid_stats_sorted_df = pd.read_csv("covid_stats_sorted.csv")
covid_stats_sorted_df.head()

# Heatmap by gmaps

In [None]:
# Configure gmaps
gmaps.configure(api_key=gkey)

# Store latitude and longitude in locations
locations = covid_stats_df[["lat", "lng"]]

# Store Confirmed in Confirmed series
confirmed = covid_stats_df["Confirmed"].astype(float)

# Calculating the max Confirmed for heatmap
max_infection = confirmed.max()

In [None]:
# Plot Heatmap
fig = gmaps.figure()

info_box_template = """
<dl>
<dt>Name</dt><dd>{standard_names}</dd>
<dt>Confirmed</dt><dd>{Confirmed}</dd>
<dt>Population</dt><dd>{Population (2020)}</dd>
<dt>Infection rate</dt><dd>{Infection_rate_f}</dd>
</dl>
"""
sample_info = [info_box_template.format(**row) for index, row in sample.iterrows()]
sample_locations = sample[["lat", "lng"]]
markers = gmaps.marker_layer(sample_locations, info_box_content = sample_info)

# Create heat layer
heatmap_layer = gmaps.heatmap_layer(locations, weights=confirmed, max_intensity=1000000,
                                    point_radius=5.0, dissipating = False)

# Add layer
fig.add_layer(heatmap_layer)
fig.add_layer(markers)
# Display figure
fig

In [2]:
# Export the map in an interactive HTML file
# embed_minimal_html("Images/Confirmed.html", views=[fig])

# Plotly world map

In [3]:
# fig = px.colors.sequential.swatches_continuous()
# fig.show()

In [4]:
fig = px.choropleth(covid_stats_df[["standard_names", "ISO_3", "Confirmed"]], locations="ISO_3",
                    color="Confirmed", 
                    hover_name="standard_names", # column to add to hover information
                    color_continuous_scale=px.colors.sequential.Rainbow)
fig.write_html("covid_world.html")
fig.show()

NameError: name 'px' is not defined

# GeoJSON layer

In [5]:
covid_dict = covid_stats_df[["standard_names", "Confirmed"]].set_index('standard_names').to_dict('dict')
countries_geojson = gmaps.geojson_geometries.load_geometry('countries')

min_con = min(covid_dict["Confirmed"].keys(), key=(lambda k: covid_dict["Confirmed"][k]))
max_con = max(covid_dict["Confirmed"].keys(), key=(lambda k: covid_dict["Confirmed"][k]))
con_range = covid_dict["Confirmed"][max_con] - covid_dict["Confirmed"][min_con]

def calculate_color(con):
    """
    Convert the confirmed count to a color
    """
    # make gini a number between 0 and 1
    normalized_con = (con - covid_dict["Confirmed"][min_con]) / con_range

    # invert gini so that high inequality gives dark color
    inverse_con = 1.0 - normalized_con

    # transform the gini coefficient to a matplotlib color
    mpl_color = Spectral(inverse_con)

    # transform from a matplotlib color to a valid CSS color
    gmaps_color = to_hex(mpl_color, keep_alpha=False)

    return gmaps_color

NameError: name 'covid_stats_df' is not defined

In [6]:
colors = []
for feature in countries_geojson['features']:
    country_name = coco.convert(names=feature['properties']['name'], to='name_short')
    try:
        con = covid_dict["Confirmed"][country_name]
        color = calculate_color(con)
    except KeyError:
        # no GINI for that country: return default color
        color = (0, 0, 0, 0.3)
    colors.append(color)

NameError: name 'countries_geojson' is not defined

In [7]:
info_box_template = """
<dl>
<dt>Name</dt><dd>{standard_names}</dd>
<dt>Confirmed</dt><dd>{Confirmed}</dd>
<dt>Population</dt><dd>{Population (2020)}</dd>
<dt>Infection rate</dt><dd>{Infection_rate_f}</dd>
</dl>
"""
sample_info = [info_box_template.format(**row) for index, row in sample.iterrows()]
sample_locations = sample[["lat", "lng"]]
markers = gmaps.marker_layer(sample_locations, info_box_content = sample_info)

fig = gmaps.figure(layout={
        'border': '1px solid black'
})
borders = gmaps.geojson_layer(
    countries_geojson,
    fill_color=colors,
    stroke_color=colors,
    fill_opacity=0.8)
fig.add_layer(borders)
fig.add_layer(markers)
fig

NameError: name 'sample' is not defined

In [8]:
# Export the map in an interactive HTML file
# embed_minimal_html("Images/Confirmed.html", views=[fig])

# Bar plot

In [9]:
# # Generate a bar plot showing the total number of measurements taken on each drug regimen using pyplot.
# x_axis = np.arange(len(sample))
# tick_locations = [value for value in x_axis]

# plt.figure(figsize=(30,40))
# plt.bar(x_axis, sample["Confirmed"], color='blue', align="center", width = 0.52, zorder = 3)
# plt.xticks(tick_locations, list(sample["standard_names"]), rotation="vertical", fontsize= 16)

# # Set limits for x & y axes
# plt.xlim(-0.55, len(x_axis)-0.45)
# plt.ylim(0, max(sample["Confirmed"])*1.05)

# # Set labels for axes & title for the chart
# plt.title("Count of cases for each Country")
# plt.xlabel("Countries")
# plt.ylabel("Confirmed")

# # Creating and displaying legend
# column_name = ["Confirmed"]
# plt.legend(column_name,loc="best")
# plt.grid(zorder=0)

# # Display the plot
# # plt.savefig("Images/bar_chart.png")

# plt.show()

In [10]:
# plt.rcdefaults()
# fig, ax = plt.subplots(figsize=(40,40))

# countries = sample["standard_names"]
# y_pos = np.arange(len(countries))
# count = sample["Confirmed"]
# ax.barh(y_pos, count, align='center', zorder = 3)
# ax.set_yticks(y_pos)
# ax.set_yticklabels(countries)
# ax.invert_yaxis()  # labels read top-to-bottom
# ax.set_xlabel('Confirmed', fontsize=50)
# ax.set_title('Count of cases for each Country (10m)', fontsize=50)
# ax.tick_params(axis='both', which='major', labelsize=20)
# ax.grid(alpha = .5 , zorder = 0)
# plt.show()

In [11]:
plt.rcdefaults()
plt.figure(figsize=(30,15))
ax = sns.barplot(x="Confirmed", y="standard_names", data=sample, palette = "muted")
ax.set_title('Count of cases for each Country (10m)', fontsize=20)
plt.xlabel('Confirmed', fontsize = 20)
plt.ylabel("Countries", fontsize = 20)
plt.xticks(fontsize = 20)
plt.yticks(fontsize = 20)
plt.show()

NameError: name 'sns' is not defined

<Figure size 3000x1500 with 0 Axes>

# Line plots and regressions

In [12]:
# Define function for calculating linear regression and scatter plot with linear regression plot
def linear_regression(x,y,country):
    print(f"The correlation coefficient of {country} is : {np.nan_to_num(round(st.pearsonr(x, y)[0],4))}")
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
    regress_values = x * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    plt.scatter(x, y, c = "lightblue", edgecolor = "black")
    plt.plot(x,regress_values,"r-")
    plt.ylabel("Infection Rate")
    plt.xlabel("Stringency Index")
    plt.grid(alpha = .2)
    return line_eq

# Define a fuction for annotating
def plot_annotate(line_eq, a, b):
    plt.annotate(line_eq,(a,b),fontsize=15,color="black")

In [13]:
def ln_rg_country(country):
    x = ultimate_no_null_df["stringency_index"].loc[ultimate_no_null_df["standard_names"] == country]
    y = ultimate_no_null_df["Infection_rate"].loc[ultimate_no_null_df["standard_names"] == country]\
            .str.rstrip('%').astype('float')
    a = x.min()
    b = y.max()
    line_eq = linear_regression(x,y, country)
    
    plot_annotate(line_eq, a, b)
    plt.show()

In [17]:
def line_plots(country):    
    months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
    num_rcd = len(ultimate_no_null_df.loc[ultimate_no_null_df["standard_names"] == country])
    x_axis= range(1,num_rcd+1)
    tick_locations = [value for value in x_axis]
    y1 = ultimate_no_null_df["Confirmed"].loc[ultimate_no_null_df["standard_names"] == country]
    y2 = ultimate_no_null_df["stringency_index"].loc[ultimate_no_null_df["standard_names"] == country]
    y3 = ultimate_no_null_df["Infection_rate"].loc[ultimate_no_null_df["standard_names"] == country]\
            .str.rstrip('%').astype('float')
#     plt.figure(1, figsize=(12,10))
#     plt.subplot(311)
#     plt.plot(x_axis, y1, color='orange', linewidth=2, markersize=12, label = "Confirmed")
#     plt.legend(loc='best')
#     plt.xlim(0,13)
#     plt.xlabel("2020")
#     plt.ylabel("Confirmed in millions")
#     plt.title(f"{country} Covid confirmed trend for 2020")
#     plt.xticks(tick_locations, months, rotation="vertical")
#     plt.grid()
#     plt.subplot(312)
#     plt.plot(x_axis, y2, color='blue', linewidth=2, markersize=12, label = "Stringency")
#     plt.legend(loc='best')
#     plt.xlim(0,13)
#     plt.ylim(0,100)
#     plt.xticks(tick_locations, months, rotation="vertical")
#     plt.xlabel("2020")
#     plt.ylabel("Stringency Index")
#     plt.title(f"{country} law stringency trend for 2020")
#     plt.grid()
#     plt.subplot(313)
#     plt.plot(x_axis, y3, color='red', linewidth=2, markersize=12, label = "Infection rate")
#     plt.legend(loc='best')
#     plt.xlim(0,13)
#     plt.xticks(tick_locations, months, rotation="vertical")
#     plt.xlabel("2020")
#     plt.ylabel("Infection rate")
#     plt.title(f"{country} infection rate for 2020")
#     plt.grid()

#     plt.tight_layout()
#     plt.show()
    max_count = y1.max()
    plt.rcdefaults()
    fig,ax1 = plt.subplots()
    plt.grid(alpha = .5, zorder = 0)
#     sns.set_palette("PuBuGn_d")
    data = ultimate_no_null_df.loc[ultimate_no_null_df["standard_names"] == country]
    ax1.set_xlabel("Month", fontsize =16)
    
#     ax1 = sns.lineplot(x= "month", y = "Confirmed", data = data , zorder = 3, palette='red')
    plt.plot(x_axis, y1, color='orange', linewidth=2, markersize=12, label = "Confirmed")
    plt.legend(loc='best')
    plt.xlim(0,13)
    plt.xlabel("2020")
    plt.ylabel("Confirmed")
    plt.title(f"{country} Covid confirmed trend for 2020")
    plt.xticks(tick_locations, months[:num_rcd], rotation="vertical")
    if max_count >= 1000000:
        ax1.set_ylabel("Confirmed in millions", fontsize =16)
   
        
    ax2 = ax1.twinx()
#     sns.set_palette("pastel")    
#     ax2 = sns.lineplot(x= "month", y = "stringency_index", data = data , zorder = 3, palette='blue').set_title(f"Confirmed vs. Stringency Index for {country}")
    plt.plot(x_axis, y2, color='blue', linewidth=2, markersize=12, label = "Stringency")
    
    plt.legend(loc='lower right')
    
    plt.show()

In [18]:
line_plots("Spain")

NameError: name 'ultimate_no_null_df' is not defined

In [19]:
ln_rg_country("Spain")

NameError: name 'ultimate_no_null_df' is not defined

# Correlation coefficients

In [20]:
country = list(set(ultimate_no_null_df["standard_names"]))
country.sort()
corrcoef = []
p_value = []
r_value = []

for name in range(len(country)):
    x = ultimate_no_null_df["stringency_index"].loc[ultimate_no_null_df["standard_names"] == country[name]]
    y = ultimate_no_null_df["Infection_rate"].loc[ultimate_no_null_df["standard_names"] == country[name]]\
            .str.rstrip('%').astype('float')
    corrcoef.append(round(st.pearsonr(x, y)[0],4))
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
    p_value.append(pvalue)
    r_value.append(rvalue)
    
country_r_value_df = pd.DataFrame({"standard_names" : country,
                                  "corrcoef" : corrcoef,
                                  "p_value" : p_value,
                                  "r_value" : r_value})
country_r_value_df.head(-5)

NameError: name 'ultimate_no_null_df' is not defined

In [21]:
x = ultimate_no_null_df["stringency_index"].loc[ultimate_no_null_df["standard_names"] == "India"]
y = ultimate_no_null_df["Infection_rate"].loc[ultimate_no_null_df["standard_names"] == "India"]\
        .str.rstrip('%').astype('float')
print(round(st.pearsonr(x, y)[0],2))
print(round(st.pearsonr(x, y)[1],2))
print(round(np.corrcoef(x,y)[0][1],2))

NameError: name 'ultimate_no_null_df' is not defined

In [None]:
# Generate a bar plot showing the total number of measurements taken on each drug regimen using pyplot.
x_axis = np.arange(len(country_r_value_df))
tick_locations = [value for value in x_axis]

plt.figure(figsize=(55,30))

sns.barplot(x= "standard_names", y = "corrcoef", data = country_r_value_df, palette = "muted" , zorder = 3)
# plt.bar(x_axis, country_r_value_df["corrcoef"], color='blue', align="center", width = 0.52, zorder = 3)
plt.xticks(tick_locations, country, rotation="vertical" , fontsize =20)

# Set limits for x & y axes
# plt.xlim(-0.55, len(x_axis)-0.45)
# plt.ylim(0, max(country_r_value_df["corrcoef"])*1.05)

# Set labels for axes & title for the chart
plt.title("Correlation coefficient between infection and stringency for each country", fontsize =40)
plt.xlabel("Country", fontsize =40)
plt.ylabel("Corrcoef value", fontsize =40)

# Creating and displaying legend
column_name = ["Corrrelation coefficient value"]
plt.legend(column_name,loc="best")
plt.grid(zorder=0)
plt.yticks(fontsize = 40)
# Display the plot
# plt.savefig("Images/bar_chart.png")
plt.show()

In [None]:
def colors_from_values(values, palette_name):
    # normalize the values to range [0, 1]
    normalized = (values - min(values)) / (max(values) - min(values))
    # convert to indices
    indices = np.round(normalized * (len(values) - 1)).astype(np.int32)
    # use the indices to get the colors
    palette = sns.color_palette(palette_name, len(values))
    return np.array(palette).take(indices, axis=0)

In [22]:
for i in range(len(sample)):
    country = sample.iloc[i,0]
    data = ultimate_no_null_df.loc[ultimate_no_null_df["standard_names"] == country]
    max_count = data["Confirmed"].max()
    plt.rcdefaults()
    fig,ax1 = plt.subplots()
    plt.grid(alpha = .5, zorder = 0)
    color = "tab:green"
    
    ax1.set_xlabel("Month", fontsize =16)
    qualitative_colors = sns.color_palette("YlOrRd", 12)
    ax1 = sns.barplot(x= "month", y = "Confirmed", data = data, palette = colors_from_values(data["Confirmed"], "YlOrRd") , zorder = 3)
#
    if max_count >= 1000000:
        ax1.set_ylabel("Confirmed in millions", fontsize =16)
   
        
    ax2 = ax1.twinx()
    color = "tab:red"
    
    ax2 = sns.lineplot(x= "month", y = "stringency_index", data = data , zorder = 3).set_title(f"Confirmed vs. Stringency Index for {country}")
    
    
    plt.show()

NameError: name 'sample' is not defined

In [None]:
for i in range(len(sample)):
    country = sample.iloc[i,0]
    line_plots(country)
    ln_rg_country(country)