# Import libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import folium
import json
import seaborn as sns
from pycountry_convert import country_alpha2_to_continent_code, country_name_to_country_alpha2
from geopy.geocoders import Nominatim


In [6]:
def get_continent(col):
    try:
        cn_a2_code =  country_name_to_country_alpha2(col)
    except:
        cn_a2_code = 'Unknown' 
    try:
        cn_continent = country_alpha2_to_continent_code(cn_a2_code)
    except:
        cn_continent = 'Unknown' 
    return (cn_a2_code, cn_continent)

geolocator = Nominatim(user_agent="Google Chrome")
def get_location(country):
    try:
        location = geolocator.geocode(country)
        return location.latitude, location.longitude
    except:
        return None

# Read data

In [7]:
covid_df = pd.read_csv('../data/final_data.csv')
covid_df.head()

Unnamed: 0,Country,Date,TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",TotalTests,Population,Continent,1 Caseevery X ppl
0,Afghanistan,2023-03-02,209359,19,7896.0,0.0,191166.0,10,10297.0,45,1201475.0,40754388,Asia,195
1,Afghanistan,2023-03-03,209362,3,7896.0,0.0,191170.0,4,10296.0,45,1201744.0,40754388,Asia,195
2,Afghanistan,2023-03-04,209370,4,7896.0,0.0,191181.0,7,10293.0,45,1202018.0,40754388,Asia,195
3,Afghanistan,2023-03-05,209390,20,7896.0,0.0,191212.0,31,10282.0,45,1202290.0,40754388,Asia,195
4,Afghanistan,2023-03-06,209394,24,7896.0,0.0,191233.0,52,10265.0,45,1202290.0,40754388,Asia,195


In [8]:
covid_df['continent'] = covid_df['Country'].apply(get_continent)
covid_df.head()

Unnamed: 0,Country,Date,TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",TotalTests,Population,Continent,1 Caseevery X ppl,continent
0,Afghanistan,2023-03-02,209359,19,7896.0,0.0,191166.0,10,10297.0,45,1201475.0,40754388,Asia,195,"(AF, AS)"
1,Afghanistan,2023-03-03,209362,3,7896.0,0.0,191170.0,4,10296.0,45,1201744.0,40754388,Asia,195,"(AF, AS)"
2,Afghanistan,2023-03-04,209370,4,7896.0,0.0,191181.0,7,10293.0,45,1202018.0,40754388,Asia,195,"(AF, AS)"
3,Afghanistan,2023-03-05,209390,20,7896.0,0.0,191212.0,31,10282.0,45,1202290.0,40754388,Asia,195,"(AF, AS)"
4,Afghanistan,2023-03-06,209394,24,7896.0,0.0,191233.0,52,10265.0,45,1202290.0,40754388,Asia,195,"(AF, AS)"


In [None]:
covid_df['location'] = covid_df['Country'].apply(get_location)
covid_df['latitude'] = covid_df['location'].apply(lambda x: x[0] if x is not None else None)
covid_df['longitude'] = covid_df['location'].apply(lambda x: x[1] if x is not None else None)


# Analytics for data visualization

### **Question 8: Is there any country that has not recorded a new infection in the last 7 days??**
- There are many steps to answer this question:
    - First, we need to find the last 7 days of the data.
    - Second, we need to find the countries that have not recorded a new infection in the last 7 days.
    - Last, we need to visualize the result.

#### **Step 1. Find the last 7 days of the data.**

In [None]:
covid_df = covid_df.loc[(covid_df['Date'] >= '2023-03-03') & (covid_df['Date'] <= '2023-03-10')]
covid_df.head(10)

Unnamed: 0,Country,Date,TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",TotalTests,Population,Continent,1 Caseevery X ppl
1,Afghanistan,2023-03-03,209362,3,7896.0,0.0,191170.0,4,10296.0,45,1201744.0,40754388,Asia,195
2,Afghanistan,2023-03-04,209370,4,7896.0,0.0,191181.0,7,10293.0,45,1202018.0,40754388,Asia,195
3,Afghanistan,2023-03-05,209390,20,7896.0,0.0,191212.0,31,10282.0,45,1202290.0,40754388,Asia,195
4,Afghanistan,2023-03-06,209394,24,7896.0,0.0,191233.0,52,10265.0,45,1202290.0,40754388,Asia,195
5,Afghanistan,2023-03-07,209415,21,7896.0,0.0,191243.0,10,10276.0,45,1202868.0,40754388,Asia,195
6,Afghanistan,2023-03-08,209441,26,7896.0,0.0,191262.0,19,10283.0,45,1203414.0,40754388,Asia,195
7,Afghanistan,2023-03-09,209451,10,7896.0,0.0,191272.0,10,10283.0,45,1203807.0,40754388,Asia,195
8,Afghanistan,2023-03-10,209484,2,7896.0,0.0,191284.0,4,10304.0,45,1204573.0,40754388,Asia,195
10,Albania,2023-03-03,334427,19,3597.0,1.0,329169.0,17,1661.0,0,1941032.0,2866374,Europe,9
11,Albania,2023-03-04,334427,0,3597.0,0.0,329169.0,0,1661.0,0,1941032.0,2866374,Europe,9


In [None]:
# # Tính tổng số ca nhiễm mới với từng quốc gia
# covid_df = covid_df.groupby(['Country']).sum().reset_index()

# #Sắp xếp theo thứ tự tăng dần với số ca nhiễm mới với từng quốc gia
# covid_df = covid_df.sort_values(by='NewCases', ascending=True)
# covid_df.head(10)


In [None]:
# tính tổng số ca nhiễm mới theo quốc gia
new_cases_by_country = covid_df.groupby('Country')['NewCases'].sum().reset_index()

# hiển thị dataframe mới
print(new_cases_by_country.head())

       Country  NewCases
0  Afghanistan       110
1      Albania        52
2      Algeria        42
3       Angola        11
4    Argentina       832


In [None]:
world_geo = '../data/world-countries.json'

In [None]:
# tạo bản đồ thế giới
world_map = folium.Map(location=[0, 0], zoom_start=2)

# thêm marker cho từng quốc gia
for country in new_cases_by_country.iterrows():
    folium.Marker(location=[new_cases_by_country.loc[new_cases_by_country['Country'] == country[1]['Country'], 'Lat'].values[0], 
                             new_cases_by_country.loc[new_cases_by_country['Country'] == country[1]['Country'], 'Long'].values[0]],
                  tooltip=f"{country[1]['Country']}: {country[1]['NewCases']} new cases",
                  icon=folium.Icon(color='red', icon='info-sign')).add_to(world_map)

# hiển thị bản đồ
world_map

KeyError: 'Lat'