# Overview
We will analyze two datasets related to Covid-19, Covid-19 World Vaccination Progress by country and Covid -19 death/cases data by country to try.
We will perfrom hypothesis test to infere if increased Covid-19 vaccination will result in lower Covid-19 case counts.

In [None]:
#Importing Data Exploration and Clean Up File
%run Data_Exploration_and_Clean_Up.ipynb

# Relationship Between Total Covid-19 Vaccinations vs Covid-19 cases for the entire dataset

In [None]:
# stackplots layer the y-axis values. Good for data with multiple y features. In This case, we have 233. Increasing opacity
# (alpha) can help visualize how this layering occurs. Scaled 1e7, or in units per ten million.

In [None]:
plt.stackplot(merge_df['date'],
              merge_df['daily_vaccinations'], 
              color='b', alpha=0.2)

plt.stackplot(merge_df['date'], 
              merge_df['new_cases'], 
              color='r', alpha=0.5)

plt.xlim(merge_df['date'].min(), merge_df['date'].max())
plt.xticks(rotation=45, fontweight='light',  fontsize='small')
plt.xlabel('Jan 1,2020 - Mar 29,2022')
plt.ylabel('Daily Vaccinations vs New Reported Cases')
plt.title('Worldwide Reported Covid Cases plotted Alongside New Cases ', pad=20)
plt.legend(['Daily Vaccinations','New Cases'], loc='upper left')
plt.show()

In [None]:
# I set the alpha to .2 to better visualize the peaks and slopes that were being obscured potentially by 
# countries who reportedcases poorly, resulting in vertical lines between datapoints. 
# Important distinction is that this is 1e6 so in millions, while the previous plot is in 1e7 or ten million.

In [None]:
plt.stackplot(merge_df['date'], merge_df['new_cases'], color='r',alpha=0.2)
plt.xlim(merge_df['date'].min(), merge_df['date'].max())
plt.xticks(rotation=45, fontweight='light',  fontsize='small')
plt.xlabel('Jan 1,2020 - Mar 29,2022')
plt.ylabel('New Reported Cases')
plt.title('Worldwide Reported Covid Cases', pad=20)
plt.legend(['Daily Vaccinations','New Cases'], loc='upper left')
plt.show()

Overwhelmingly, on a global scale, the population of those vaccinated far exceeded the population of recorded cases. Overall, the population of the world far exceeds the population of those vaccinated. Notably, this is all just based on trusting the reported data from each country. Evidence for poor house keeping might even be possible to see from these plots.

# Daily Vaccinations vs New Cases Over Time for Country

In [None]:
#Sorting Large Countries dataframe to find top 3 and bottom 3 countries for 'people_vaccinated_per_hundred'
#from this dataframe by population for analysis
sorted_large_df = large_countries_df.groupby(['country'], as_index=False).max()\
.sort_values(by=['people_vaccinated_per_hundred'], ascending=False)

In [None]:
#Finding top 3 countries for analysis
sorted_large_df.head(3)

In [None]:
#Finding bottom 3 countries for analysis
sorted_large_df.tail(3)

In [None]:
#Create country dataframes
country1_df = large_countries_df.loc[large_countries_df['iso_code'] == 'CHN']
country2_df = large_countries_df.loc[large_countries_df['iso_code'] == 'KOR']
country3_df = large_countries_df.loc[large_countries_df['iso_code'] == 'ITA']
country4_df = large_countries_df.loc[large_countries_df['iso_code'] == 'NGA']
country5_df = large_countries_df.loc[large_countries_df['iso_code'] == 'TZA']
country6_df = large_countries_df.loc[large_countries_df['iso_code'] == 'COD']
usa_df = large_countries_df.loc[large_countries_df['iso_code'] == 'USA']
BRA_df = large_countries_df.loc[large_countries_df['iso_code'] == 'BRA']

# Top 3 Countries by 'people_vaccinated_per_hundred'

In [None]:
#Create Bar Chart for China
print(f"People Vaccinated Per Hundred: {country1_df['people_vaccinated_per_hundred'].max()}")
plt.bar(country1_df['date'], country1_df['daily_vaccinations'], color='b', alpha=0.5)
plt.bar(country1_df['date'], country1_df['new_cases'], color='r', alpha=0.5)
plt.xlim(country1_df['date'].min(), country1_df['date'].max())
plt.xticks(rotation=90, fontweight='light',  fontsize='small')
plt.title("Daily Vaccinations vs New Cases: China")
plt.xlabel('Date')
plt.ylabel ('Daily Vaccinations and New Cases')
plt.show()

In [None]:
#Create Bar Chart for South Korea
print(f"People Vaccinated Per Hundred: {country2_df['people_vaccinated_per_hundred'].max()}")
plt.bar(country2_df['date'], country2_df['daily_vaccinations'], color='b', alpha=0.5)
plt.bar(country2_df['date'], country2_df['new_cases'], color='r', alpha=0.5)
plt.xlim(country2_df['date'].min(), country2_df['date'].max())
plt.xticks(rotation=90, fontweight='light',  fontsize='small')
plt.title("Daily Vaccinations vs New Cases: South Korea")
plt.xlabel('Date')
plt.ylabel ('Daily Vaccinations and New Cases')
plt.show()

In [None]:
#Create Bar Chart for Italy
print(f"People Vaccinated Per Hundred: {country3_df['people_vaccinated_per_hundred'].max()}")
plt.bar(country3_df['date'], country3_df['daily_vaccinations'], color='b', alpha=0.5)
plt.bar(country3_df['date'], country3_df['new_cases'], color='r', alpha=0.5)
plt.xlim(country3_df['date'].min(), country3_df['date'].max())
plt.xticks(rotation=90, fontweight='light',  fontsize='small')
plt.title("Daily Vaccinations vs New Cases: Italy")
plt.xlabel('Date')
plt.ylabel ('Daily Vaccinations and New Cases')
plt.show()

# Bottom 3 Countries by 'people_vaccinated_per_hundred'

In [None]:
#Create Bar Chart for Nigeria
print(f"People Vaccinated Per Hundred: {country4_df['people_vaccinated_per_hundred'].max()}")
plt.bar(country4_df['date'], country4_df['daily_vaccinations'], color='b', alpha=0.5)
plt.bar(country4_df['date'], country4_df['new_cases'], color='r', alpha=0.5)
plt.xlim(country4_df['date'].min(), country4_df['date'].max())
plt.xticks(rotation=90, fontweight='light',  fontsize='small')
plt.title("Daily Vaccinations vs New Cases: Nigeria")
plt.xlabel('Date')
plt.ylabel ('Daily Vaccinations and New Cases')
plt.show()

In [None]:
#Create Bar Chart for Tanzania
print(f"People Vaccinated Per Hundred: {country5_df['people_vaccinated_per_hundred'].max()}")
plt.bar(country5_df['date'], country5_df['daily_vaccinations'], color='b', alpha=0.5)
plt.bar(country5_df['date'], country5_df['new_cases'], color='r', alpha=0.5)
plt.xlim(country5_df['date'].min(), country5_df['date'].max())
plt.xticks(rotation=90, fontweight='light',  fontsize='small')
plt.title("Daily Vaccinations vs New Cases: Tanzania")
plt.xlabel('Date')
plt.ylabel ('Daily Vaccinations and New Cases')
plt.show()

In [None]:
#Create Bar Chart for DRC
print(f"People Vaccinated Per Hundred: {country6_df['people_vaccinated_per_hundred'].max()}")
plt.bar(country6_df['date'], country6_df['daily_vaccinations'], color='b', alpha=0.5)
plt.bar(country6_df['date'], country6_df['new_cases'], color='r', alpha=0.5)
plt.xlim(country6_df['date'].min(), country6_df['date'].max())
plt.xticks(rotation=90, fontweight='light',  fontsize='small')
plt.title("Daily Vaccinations vs New Cases: Democratic Republic of Congo")
plt.xlabel('Date')
plt.ylabel ('Daily Vaccinations and New Cases')
plt.show()

# USA and Brazil

In [None]:
#Create Bar Chart for USA
print(f"People Vaccinated Per Hundred: {usa_df['people_vaccinated_per_hundred'].max()}")
plt.bar(usa_df['date'], usa_df['daily_vaccinations'], color='b', alpha=0.5)
plt.bar(usa_df['date'], usa_df['new_cases'], color='r', alpha=0.5)
plt.xlim(usa_df['date'].min(), usa_df['date'].max())
plt.xticks(rotation=90, fontweight='light',  fontsize='small')
plt.title("Daily Vaccinations vs New Cases: USA")
plt.xlabel('Date')
plt.ylabel ('Daily Vaccinations and New Cases')
plt.show()

In [None]:
#Create Bar Chart for Brazil
print(f"People Vaccinated Per Hundred: {BRA_df['people_vaccinated_per_hundred'].max()}")
plt.bar(BRA_df['date'], BRA_df['daily_vaccinations'], color='b', alpha=0.5)
plt.bar(BRA_df['date'], BRA_df['new_cases'], color='r', alpha=0.5)
plt.xlim(BRA_df['date'].min(), BRA_df['date'].max())
plt.xticks(rotation=90, fontweight='light',  fontsize='small')
plt.title("Daily Vaccinations vs New Cases: Brazil")
plt.xlabel('Date')
plt.ylabel ('Daily Vaccinations and New Cases')
plt.show()

# Linear Regression of Daily Vaccinations vs Daily New Cases for World Data