# <strong> Import libraries</strong>

In [83]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import seaborn as sns
import folium
import branca.colormap as cm

# <strong> Read data</strong>

In [84]:
covid_df = pd.read_csv('../data/final_data.csv')
covid_df.head()

Unnamed: 0,Country,Date,TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",TotalTests,Population,Continent,1 Caseevery X ppl,lat,lon
0,Afghanistan,2023-03-02,209359,19,7896.0,0.0,191166.0,10,10297.0,45,1201475.0,40754388,Asia,195,33.768006,66.238514
1,Afghanistan,2023-03-03,209362,3,7896.0,0.0,191170.0,4,10296.0,45,1201744.0,40754388,Asia,195,33.768006,66.238514
2,Afghanistan,2023-03-04,209370,4,7896.0,0.0,191181.0,7,10293.0,45,1202018.0,40754388,Asia,195,33.768006,66.238514
3,Afghanistan,2023-03-05,209390,20,7896.0,0.0,191212.0,31,10282.0,45,1202290.0,40754388,Asia,195,33.768006,66.238514
4,Afghanistan,2023-03-06,209394,24,7896.0,0.0,191233.0,52,10265.0,45,1202290.0,40754388,Asia,195,33.768006,66.238514


# <strong> Analytics for data visualization</strong>
### &#9889; **Question 5: Is there any country that has not recorded a new infection in the last 7 days??**
There are many steps to answer this question:
- First, we need to find the last 7 days of the data.
- Second, we need to find the countries that have not recorded a new infection in the last 7 days.
- Last, we need to visualize the result.

#### **Step 1. Find the last 7 days of the data.**

In [85]:
covid_df = covid_df.loc[(covid_df['Date'] >= '2023-03-03') & (covid_df['Date'] <= '2023-03-10')]
covid_df.head(10)

Unnamed: 0,Country,Date,TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",TotalTests,Population,Continent,1 Caseevery X ppl,lat,lon
1,Afghanistan,2023-03-03,209362,3,7896.0,0.0,191170.0,4,10296.0,45,1201744.0,40754388,Asia,195,33.768006,66.238514
2,Afghanistan,2023-03-04,209370,4,7896.0,0.0,191181.0,7,10293.0,45,1202018.0,40754388,Asia,195,33.768006,66.238514
3,Afghanistan,2023-03-05,209390,20,7896.0,0.0,191212.0,31,10282.0,45,1202290.0,40754388,Asia,195,33.768006,66.238514
4,Afghanistan,2023-03-06,209394,24,7896.0,0.0,191233.0,52,10265.0,45,1202290.0,40754388,Asia,195,33.768006,66.238514
5,Afghanistan,2023-03-07,209415,21,7896.0,0.0,191243.0,10,10276.0,45,1202868.0,40754388,Asia,195,33.768006,66.238514
6,Afghanistan,2023-03-08,209441,26,7896.0,0.0,191262.0,19,10283.0,45,1203414.0,40754388,Asia,195,33.768006,66.238514
7,Afghanistan,2023-03-09,209451,10,7896.0,0.0,191272.0,10,10283.0,45,1203807.0,40754388,Asia,195,33.768006,66.238514
8,Afghanistan,2023-03-10,209484,2,7896.0,0.0,191284.0,4,10304.0,45,1204573.0,40754388,Asia,195,33.768006,66.238514
10,Albania,2023-03-03,334427,19,3597.0,1.0,329169.0,17,1661.0,0,1941032.0,2866374,Europe,9,41.000028,19.999962
11,Albania,2023-03-04,334427,0,3597.0,0.0,329169.0,0,1661.0,0,1941032.0,2866374,Europe,9,41.000028,19.999962


In [119]:
# Calculate the number of countries with and without new infections in the last 14 days
# has_new_cases = covid_df[covid_df["NewCases"] > 0]["Country"].nunique()
# no_new_cases = covid_df[covid_df["NewCases"] == 0]["Country"].nunique()

covid_group_df = covid_df.groupby(["Country"])[["NewCases"]].sum().reset_index()
covid_group_df["Infection"] = covid_group_df["NewCases"].apply(lambda x: "New infection" if x > 0 else "No new infections")
covid_group_df = covid_group_df.groupby(["Infection"])[["NewCases"]].count().reset_index()
# Create a pie chart
# labels = ["New infection", "No new infections"]
# sizes = [has_new_cases, no_new_cases]
# colors = ["#DF2E38", "#5D9C59"]

# plt.figure(figsize=(8, 8))
# plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90, textprops={'fontsize': 18})
# plt.axis('equal')
# plt.title("Percentage of countries with/without new infections in the last 14 days", fontsize=20)
# plt.show()

import plotly.express as px
# This dataframe has 244 lines, but 4 distinct values for `day`
fig = px.pie(covid_group_df, values='NewCases', names='Infection')
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

# covid_group_df

&#9889; <font color="yellow">**Comment on the results.**</font>
- The pie chart shows the percentage of countries with no new infections in the last 7 days and it accounts for 54.1% of the total number of countries. This shows that there are many countries that have not had a new infection in the last 7 days.
- This shows that COVID-19 is well under control in a lot of countries. And now prevention and vaccination is essential to prevent the spread of COVID-19 and countries have done a great job in this regard.

In [87]:
covid_df["IsNewCases"] = covid_df["NewCases"].apply(lambda x: 1 if x > 0 else 0)

In [88]:
covid_df[covid_df["IsNewCases"] == 0]

Unnamed: 0,Country,Date,TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",TotalTests,Population,Continent,1 Caseevery X ppl,lat,lon,IsNewCases
11,Albania,2023-03-04,334427,0,3597.0,0.0,329169.0,0,1661.0,0,1941032.0,2866374,Europe,9,41.000028,19.999962,0
12,Albania,2023-03-05,334427,0,3597.0,0.0,329169.0,0,1661.0,0,1941032.0,2866374,Europe,9,41.000028,19.999962,0
13,Albania,2023-03-06,334427,0,3597.0,0.0,329169.0,0,1661.0,0,1941032.0,2866374,Europe,9,41.000028,19.999962,0
14,Albania,2023-03-07,334427,0,3597.0,0.0,329169.0,0,1661.0,0,1941032.0,2866374,Europe,9,41.000028,19.999962,0
15,Albania,2023-03-08,334427,0,3597.0,0.0,329169.0,0,1661.0,0,1941032.0,2866374,Europe,9,41.000028,19.999962,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1416,Zimbabwe,2023-03-05,264127,0,5668.0,0.0,257839.0,0,620.0,12,2525756.0,15331428,Africa,58,-18.455496,29.746841,0
1417,Zimbabwe,2023-03-06,264127,0,5668.0,0.0,257839.0,0,620.0,12,2525756.0,15331428,Africa,58,-18.455496,29.746841,0
1418,Zimbabwe,2023-03-07,264127,0,5668.0,0.0,257839.0,0,620.0,12,2525756.0,15331428,Africa,58,-18.455496,29.746841,0
1419,Zimbabwe,2023-03-08,264127,0,5668.0,0.0,257839.0,0,620.0,12,2525756.0,15331428,Africa,58,-18.455496,29.746841,0


In [89]:
m = folium.Map(location=[30, 0], zoom_start=2)
for i, row in covid_df.iterrows():

    cases = row['TotalCases']
    isNewCases = row['IsNewCases']

    folium.Marker(
        location=[row["lat"],row["lon"]],
        radius=5,
        icon=folium.Icon(color='red' if isNewCases == 1 else 'green'),
        fill_opacity=0.7,
        tooltip=row['Country'] + ' - Total case: ' + str(cases),
    ).add_to(m)

m

>As can be observed that there are several new cases are recorded at the Africa region. It also point out that the Africa controls the disease inefficiently. The reason may be that the Africa has a low level of medical resources and the poor living conditions. 

In [108]:
covid_group_df = covid_df.groupby(["Country", "Continent"])[["NewCases"]].sum().reset_index()
covid_group_df = covid_group_df[covid_group_df["NewCases"] > 0]
covid_group_df = covid_group_df.groupby("Continent").agg({"Country": "count"}).reset_index()
covid_group_df

Unnamed: 0,Continent,Country
0,Africa,29
1,Asia,33
2,Australia/Oceania,3
3,Europe,27
4,North America,12
5,South America,8


In [109]:
import plotly.graph_objects as go

# pull is given as a fraction of the pie radius
fig = go.Figure(data=[go.Pie(labels=covid_group_df["Continent"], values=covid_group_df["Country"], pull=[0.1, 0, 0, 0, 0 ,0 ])])
fig.update_layout(title_text="Num of new infection country in each continent")
# update figure size
fig.update_layout(
    autosize=False,
    width=500,
    height=500,
)
fig.show()