# Global Covid-19 Data Analysis

In [1]:
# Key Questions to ask when analysing this data set
# Questions to ask for Q1
# Population density vs Total infections? (linear regression) 
# GDP/Poverty vs death rate, # Poverty vs infection rate 
# Random sample of HDI vs total cases and death rate



In [2]:
# Importing the relevant dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from citipy import citipy
import scipy.stats as st
import matplotlib
from datetime import datetime


In [3]:
# Opening the file and creating a DataFrame
raw_covid_19_data = pd.read_csv('..\Resources\owid-covid-data.csv')
raw_covid_19_data

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
0,ABW,North America,Aruba,2020-03-13,2.0,2.0,,0.0,0.0,,...,35973.781,,,11.62,,,,,76.29,
1,ABW,North America,Aruba,2020-03-19,,,0.286,,,0.0,...,35973.781,,,11.62,,,,,76.29,
2,ABW,North America,Aruba,2020-03-20,4.0,2.0,0.286,0.0,0.0,0.0,...,35973.781,,,11.62,,,,,76.29,
3,ABW,North America,Aruba,2020-03-21,,,0.286,,,0.0,...,35973.781,,,11.62,,,,,76.29,
4,ABW,North America,Aruba,2020-03-22,,,0.286,,,0.0,...,35973.781,,,11.62,,,,,76.29,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47111,,,International,2020-09-26,696.0,,,7.0,,,...,,,,,,,,,,
47112,,,International,2020-09-27,696.0,,,7.0,,,...,,,,,,,,,,
47113,,,International,2020-09-28,696.0,,,7.0,,,...,,,,,,,,,,
47114,,,International,2020-09-29,696.0,,,7.0,,,...,,,,,,,,,,


In [4]:
# Substituting NaN data with a zero integer
raw_covid_19_data = raw_covid_19_data.fillna(0)
raw_covid_19_data

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
0,ABW,North America,Aruba,2020-03-13,2.0,2.0,0.000,0.0,0.0,0.0,...,35973.781,0.0,0.0,11.62,0.0,0.0,0.0,0.0,76.29,0.0
1,ABW,North America,Aruba,2020-03-19,0.0,0.0,0.286,0.0,0.0,0.0,...,35973.781,0.0,0.0,11.62,0.0,0.0,0.0,0.0,76.29,0.0
2,ABW,North America,Aruba,2020-03-20,4.0,2.0,0.286,0.0,0.0,0.0,...,35973.781,0.0,0.0,11.62,0.0,0.0,0.0,0.0,76.29,0.0
3,ABW,North America,Aruba,2020-03-21,0.0,0.0,0.286,0.0,0.0,0.0,...,35973.781,0.0,0.0,11.62,0.0,0.0,0.0,0.0,76.29,0.0
4,ABW,North America,Aruba,2020-03-22,0.0,0.0,0.286,0.0,0.0,0.0,...,35973.781,0.0,0.0,11.62,0.0,0.0,0.0,0.0,76.29,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47111,0,0,International,2020-09-26,696.0,0.0,0.000,7.0,0.0,0.0,...,0.000,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0
47112,0,0,International,2020-09-27,696.0,0.0,0.000,7.0,0.0,0.0,...,0.000,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0
47113,0,0,International,2020-09-28,696.0,0.0,0.000,7.0,0.0,0.0,...,0.000,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0
47114,0,0,International,2020-09-29,696.0,0.0,0.000,7.0,0.0,0.0,...,0.000,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0


# Corrolation between population density and total cases

In [5]:
# Determining the global correlation between population density and total cases


In [6]:
# Data Cleanup to Answer first question
# Determining columns
raw_covid_19_data.columns

Index([&#39;iso_code&#39;, &#39;continent&#39;, &#39;location&#39;, &#39;date&#39;, &#39;total_cases&#39;, &#39;new_cases&#39;,
       &#39;new_cases_smoothed&#39;, &#39;total_deaths&#39;, &#39;new_deaths&#39;,
       &#39;new_deaths_smoothed&#39;, &#39;total_cases_per_million&#39;,
       &#39;new_cases_per_million&#39;, &#39;new_cases_smoothed_per_million&#39;,
       &#39;total_deaths_per_million&#39;, &#39;new_deaths_per_million&#39;,
       &#39;new_deaths_smoothed_per_million&#39;, &#39;new_tests&#39;, &#39;total_tests&#39;,
       &#39;total_tests_per_thousand&#39;, &#39;new_tests_per_thousand&#39;,
       &#39;new_tests_smoothed&#39;, &#39;new_tests_smoothed_per_thousand&#39;,
       &#39;tests_per_case&#39;, &#39;positive_rate&#39;, &#39;tests_units&#39;, &#39;stringency_index&#39;,
       &#39;population&#39;, &#39;population_density&#39;, &#39;median_age&#39;, &#39;aged_65_older&#39;,
       &#39;aged_70_older&#39;, &#39;gdp_per_capita&#39;, &#39;extreme_poverty&#39;,
      

In [7]:
# ## Removing unnecessary columns
pop_density_cases = raw_covid_19_data.drop(columns=['iso_code','new_cases','new_cases_smoothed', 'total_deaths', 'new_deaths',
       'new_deaths_smoothed', 'total_cases_per_million',
       'new_cases_per_million', 'new_cases_smoothed_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'new_deaths_smoothed_per_million', 'new_tests', 'total_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'tests_per_case', 'positive_rate', 'tests_units', 'stringency_index',
       'population', 'median_age', 'aged_65_older',
       'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
       'cardiovasc_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand',
       'life_expectancy', 'human_development_index','continent'])



In [8]:
# Determining the number of Unique Countries
pop_density_cases['location'].unique()

array([&#39;Aruba&#39;, &#39;Afghanistan&#39;, &#39;Angola&#39;, &#39;Anguilla&#39;, &#39;Albania&#39;, &#39;Andorra&#39;,
       &#39;United Arab Emirates&#39;, &#39;Argentina&#39;, &#39;Armenia&#39;,
       &#39;Antigua and Barbuda&#39;, &#39;Australia&#39;, &#39;Austria&#39;, &#39;Azerbaijan&#39;,
       &#39;Burundi&#39;, &#39;Belgium&#39;, &#39;Benin&#39;, &#39;Bonaire Sint Eustatius and Saba&#39;,
       &#39;Burkina Faso&#39;, &#39;Bangladesh&#39;, &#39;Bulgaria&#39;, &#39;Bahrain&#39;, &#39;Bahamas&#39;,
       &#39;Bosnia and Herzegovina&#39;, &#39;Belarus&#39;, &#39;Belize&#39;, &#39;Bermuda&#39;,
       &#39;Bolivia&#39;, &#39;Brazil&#39;, &#39;Barbados&#39;, &#39;Brunei&#39;, &#39;Bhutan&#39;, &#39;Botswana&#39;,
       &#39;Central African Republic&#39;, &#39;Canada&#39;, &#39;Switzerland&#39;, &#39;Chile&#39;,
       &#39;China&#39;, &quot;Cote d&#39;Ivoire&quot;, &#39;Cameroon&#39;,
       &#39;Democratic Republic of Congo&#39;, &#39;Congo&#39;, &#39;Colombia&#39;, &#39;

In [9]:
# Dropping World and International Data from this frame
pop_density_cases = pop_density_cases[(pop_density_cases['location']!='International') & (pop_density_cases['location']!='World')]
pop_density_cases

Unnamed: 0,location,date,total_cases,population_density
0,Aruba,2020-03-13,2.0,584.800
1,Aruba,2020-03-19,0.0,584.800
2,Aruba,2020-03-20,4.0,584.800
3,Aruba,2020-03-21,0.0,584.800
4,Aruba,2020-03-22,0.0,584.800
...,...,...,...,...
46561,Zimbabwe,2020-09-26,7787.0,42.729
46562,Zimbabwe,2020-09-27,7803.0,42.729
46563,Zimbabwe,2020-09-28,7812.0,42.729
46564,Zimbabwe,2020-09-29,7816.0,42.729


In [10]:
# Filtering out the data to determine the latest reported number of cases in each country
pop_density_cases = pop_density_cases.drop_duplicates('location',keep='last')
pop_density_cases

Unnamed: 0,location,date,total_cases,population_density
196,Aruba,2020-09-30,3934.0,584.800
471,Afghanistan,2020-09-30,39254.0,54.422
664,Angola,2020-09-30,4905.0,23.890
852,Anguilla,2020-09-30,3.0,0.000
1058,Albania,2020-09-30,13518.0,104.871
...,...,...,...,...
45764,Kosovo,2020-09-30,15574.0,168.155
45938,Yemen,2020-09-30,2035.0,53.508
46175,South Africa,2020-09-30,672572.0,46.754
46371,Zambia,2020-09-30,14715.0,22.995


In [15]:
# Sorting Values based on population density
pop_density_cases = pop_density_cases.sort_values('population_density',ascending=True).reset_index
pop_density_cases.index.name = None
pop_density_cases

AttributeError: &#39;function&#39; object has no attribute &#39;index&#39;