# -.-.-.-.-. Angela's code BEGINS Here -.-.-.-.-.

In [None]:
%matplotlib notebook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import json
from pprint import pprint
import warnings
import gmaps
import scipy.stats as sts
warnings.filterwarnings('ignore')
from matplotlib.pyplot import figure

In [None]:
#Read in data files as .csv files
mort_data_by_region = pd.read_csv("data/adult_mort_by_region.csv")
mort_data_by_country = pd.read_csv("data/adult_mort_by_country.csv")
country_centroids = pd.read_csv("data/country_centroids.csv")
gdp_data = pd.read_csv("data/GDP last 50.csv")
lifex_data = pd.read_csv("data/Life_Expectancy.csv")
gdpmort= pd.read_csv("data/xyz.csv")

## Adult morality rate reflects the probability of dying between 15 and 60 years of age per population of 1000.

## Mortality Data by Region

In [None]:
#Display cleaned-up dataframe of mortality by region
mort_data_by_region = mort_data_by_region.rename(columns = {
    "Unnamed: 0": "Region", "Unnamed: 1" : "Year", "Adult mortality rate (probability of dying between 15 and 60 years per 1000 population)" : "Adult Mortality Rate, Both Sexes", "Adult mortality rate (probability of dying between 15 and 60 years per 1000 population).1" : "Adult Mortality Rate, Male", "Adult mortality rate (probability of dying between 15 and 60 years per 1000 population).2" : "Adult Mortality Rate, Female"
})
mort_data_by_region = mort_data_by_region.dropna(axis=0)
mort_data_by_region.head()

In [None]:
gdp_df = pd.DataFrame(gdp_data)
gdp_df
reduced_gdp_df = gdp_df[["Series Name", "Country Name", "Country Code", "2000 [YR2000]", "2001 [YR2001]", "2002 [YR2002]", "2003 [YR2003]", "2004 [YR2004]", "2005 [YR2005]", "2006 [YR2006]", "2007 [YR2007]", "2008 [YR2008]", "2009 [YR2009]", "2010 [YR2010]", "2011 [YR2011]", "2012 [YR2012]", "2013 [YR2013]", "2014 [YR2014]", "2015 [YR2015]","2016 [YR2016]"]]
reduced_gdp_df
reduced_gdp_df = reduced_gdp_df.rename(columns={"Country Code": "CountryCode"})
reduced_gdp_df.to_csv("data/reduced_gdp_df.csv")

In [None]:
#Convert region mortality rate to numeric
mort_data_by_region["Adult Mortality Rate, Both Sexes"] = pd.to_numeric(mort_data_by_region["Adult Mortality Rate, Both Sexes"], errors='coerce')

In [None]:
xy = mort_data_by_region.groupby(["Region"]).mean()
xy_df = pd.DataFrame(xy)
xy_df
region_avgs = xy_df.rename(columns=
                           {"Adult Mortality Rate, Both Sexes": "Average Mortality Rate"})
region_avgs = round(region_avgs,1)
region_avgs = region_avgs.drop("WHO region")
region_avgs

In [None]:
regions = region_avgs.index
x_axis = regions
reg_avg_rts = region_avgs["Average Mortality Rate"]
f, ax = plt.subplots(figsize=(9,9), facecolor=(1, 1, 1))
plt.bar(x_axis, reg_avg_rts, color="darkblue", edgecolor="yellowgreen", linewidth=3, align="center")
tick_locations = [value for value in x_axis]
plt.xticks(rotation=45)
plt.ylabel("Average Mortality Rates", fontsize=15)
plt.title("17-year Average Mortality Rates by Region", fontsize=15)
plt.margins(0.2)
plt.show()
plt.savefig("Images/17YrAvgRegion.png")

## Mortality Data by COUNTRY

In [None]:
#Display cleaned-up dataframe of mortality by country
mort_data_by_country = mort_data_by_country.rename(columns = {"Unnamed: 0" : "Country", "Unnamed: 1" : "Year", "Adult mortality rate (probability \
of dying between 15 and 60 years per 1000 population)" : "Adult Mortality Rate, Both Sexes", "Adult mortality rate \
(probability of dying between 15 and 60 years per 1000 population).1" : "Adult Mortality Rate, Male", "Adult mortality rate \
(probability of dying between 15 and 60 years per 1000 population).2" : "Adult Mortality Rate, Female"    
})
mort_data_by_country
mort_data_by_country = mort_data_by_country.dropna(axis=0)
mort_data_by_country.head()

In [None]:
#Convert country mortality rate to numeric
mort_data_by_country["Adult Mortality Rate, Both Sexes"] = pd.to_numeric(mort_data_by_country["Adult Mortality Rate, Both Sexes"], errors= 'coerce')

In [None]:
#Find overall (17 year) low:
low_mort_value = mort_data_by_country["Adult Mortality Rate, Both Sexes"].min()
low_mort_value
low_country = mort_data_by_country.loc[(mort_data_by_country["Adult Mortality Rate, Both Sexes"] == 49)]
low_country = low_country["Country"]
low_country
print(f"The lowest mortality rate recorded over 17 years was {low_mort_value} in {low_country}")

In [None]:
#Find overall (17 year) High:
hi_mort_value = mort_data_by_country["Adult Mortality Rate, Both Sexes"].max()
hi_mort_value
hi_country = mort_data_by_country.loc[(mort_data_by_country["Adult Mortality Rate, Both Sexes"] == 697)]
hi_country = hi_country["Country"]
hi_country
print(f"The highest mortality rate recorded over 17 years was {hi_mort_value} in {hi_country}")

In [None]:
#Call in centroid data for heat map (b/c Google api not working)
centroids_df = pd.DataFrame(country_centroids)
centroids_df.head()

In [None]:
rate_over17yrs = mort_data_by_country.groupby(["Country"]).mean()
avg_rate_over17yrs = rate_over17yrs.rename(columns= {"Adult Mortality Rate, Both Sexes": "Average Mortality Rate 2000-2016"})
avg_rate_over17yrs["Average Mortality Rate 2000-2016"] = round(avg_rate_over17yrs["Average Mortality Rate 2000-2016"], 2)
avg_rate_df = pd.DataFrame(avg_rate_over17yrs)
avg_rate_df.head()

In [None]:
merged_table = pd.merge(avg_rate_df, centroids_df, on="Country", how="left")
merged_table
merged_table = merged_table.dropna()
merged_table
merged_table.to_csv("output_data/merged_table.csv")
merged_table.head(25)

In [None]:
from config import google_api_key
gmaps.configure(api_key = google_api_key)
locations = merged_table[["latitude","longitude"]]
rates = merged_table["Average Mortality Rate 2000-2016"]                          
fig = gmaps.figure(map_type="HYBRID", center=[0,0], zoom_level = 2)
heat_layer = gmaps.heatmap_layer(locations, weights=rates, dissipating=False, max_intensity=50, point_radius=3)
fig.add_layer(heat_layer)
plt.savefig("Images/GlobalMortalityHeatMap.png")
fig

### Heat map of average adult mortality rates around the world based on a 17-year average.

In [None]:
country_rate_by_year = mort_data_by_country.groupby(["Country", "Year"]).mean()
country_rate_by_year
yrly_avg = country_rate_by_year.rename(columns = {"Adult Mortality Rate, Both Sexes" : "Annual Average"})
yrly_avg = yrly_avg.reset_index("Country")
yrly_avg
us_yrly_avg = yrly_avg.loc[yrly_avg["Country"] == "United States of America"]
us_yrly_avg
af_yrly_avg = yrly_avg.loc[yrly_avg["Country"] == "Afghanistan"]
af_yrly_avg
ukr_yrly_avg = yrly_avg.loc[yrly_avg["Country"] == "Ukraine"]
ukr_yrly_avg
prc_yrly_avg = yrly_avg.loc[yrly_avg["Country"] == "China"]
prc_yrly_avg
gu_yrly_avg = yrly_avg.loc[yrly_avg["Country"]== "Guatemala"]
gu_yrly_avg
lib_yrly_avg = yrly_avg.loc[yrly_avg["Country"]== "Liberia"]
lib_yrly_avg
ind_yrly_avg = yrly_avg.loc[yrly_avg["Country"] == "India"]
ind_yrly_avg
sm_yrly_avg = yrly_avg.loc[yrly_avg["Country"] == "Somalia"]
sm_yrly_avg
br_yrly_avg = yrly_avg.loc[yrly_avg["Country"] == "Brazil"]
br_yrly_avg
ger_yrly_avg = yrly_avg.loc[yrly_avg["Country"]== "Germany"]
ger_yrly_avg

In [None]:
data_2016 = mort_data_by_country.loc[(mort_data_by_country["Year"]=="2016")]
data_2016 = data_2016.rename(columns= {"Adult Mortality Rate, Both Sexes": "2016 Adult Mort. Rate, Both Sexes"})
round(data_2016.describe(), 2)

data_2014 = mort_data_by_country.loc[(mort_data_by_country["Year"]=="2014")]
data_2014 = data_2014.rename(columns= {"Adult Mortality Rate, Both Sexes": "2014 Adult Mort. Rate, Both Sexes"})
round(data_2014.describe(), 2)

data_2012 = mort_data_by_country.loc[(mort_data_by_country["Year"]=="2012")]
data_2012 = data_2012.rename(columns= {"Adult Mortality Rate, Both Sexes": "2012 Adult Mort. Rate, Both Sexes"})
round(data_2012.describe(), 2)

data_2010 = mort_data_by_country.loc[(mort_data_by_country["Year"]=="2010")]
data_2010 = data_2010.rename(columns= {"Adult Mortality Rate, Both Sexes": "2010 Adult Mort. Rate, Both Sexes"})
round(data_2010.describe(), 2)

data_2008 = mort_data_by_country.loc[(mort_data_by_country["Year"]=="2008")]
data_2008 = data_2008.rename(columns= {"Adult Mortality Rate, Both Sexes": "2008 Adult Mort. Rate, Both Sexes"})
round(data_2008.describe(), 2)

data_2006 = mort_data_by_country.loc[(mort_data_by_country["Year"]=="2006")]
data_2006 = data_2006.rename(columns= {"Adult Mortality Rate, Both Sexes": "2006 Adult Mort. Rate, Both Sexes"})
round(data_2006.describe(), 2)

data_2004 = mort_data_by_country.loc[(mort_data_by_country["Year"]=="2004")]
data_2004 = data_2004.rename(columns= {"Adult Mortality Rate, Both Sexes": "2004 Adult Mort. Rate, Both Sexes"})
round(data_2004.describe(), 2)

data_2002 = mort_data_by_country.loc[(mort_data_by_country["Year"]=="2002")]
data_2002 = data_2002.rename(columns= {"Adult Mortality Rate, Both Sexes": "2002 Adult Mort. Rate, Both Sexes"})
round(data_2002.describe(), 2)

data_2000 = mort_data_by_country.loc[(mort_data_by_country["Year"]=="2000")]
data_2000 = data_2000.rename(columns= {"Adult Mortality Rate, Both Sexes": "2000 Adult Mort. Rate, Both Sexes"})
round(data_2000.describe(), 2)

In [None]:
years = [2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016]
us_values = us_yrly_avg["Annual Average"]
prc_values = prc_yrly_avg["Annual Average"]
ind_values = ind_yrly_avg["Annual Average"]
af_values = af_yrly_avg["Annual Average"]
br_values = br_yrly_avg["Annual Average"]
sm_values = sm_yrly_avg["Annual Average"]
lib_values = lib_yrly_avg["Annual Average"] 
ger_values = ger_yrly_avg["Annual Average"]
us_handle, = plt.plot(years, us_values, marker="^", color="darkblue", label="USA")
prc_handle = plt.plot(years, prc_values, color="red", label = "China")
ind_handle = plt.plot(years, ind_values, marker = "o", color="brown", label = "India")
af_handle = plt.plot(years, af_values, marker="*", color="gray", label = "Afghanistan")
br_handle = plt.plot(years, br_values, marker="+", color='green', label="Brazil")
sm_handle = plt.plot(years, sm_values, marker=".", color="yellowgreen", label="Somalia")
lib_handle = plt.plot(years, lib_values, color="purple", label="Liberia")
ger_handle = plt.plot(years, ger_values, marker = "x", color = "black", label = "Germany")
plt.legend(loc=(1.1,0.4))
plt.xlabel("Year", fontsize=14)
plt.ylabel("Average Mortality Rate", fontsize=14)
plt.grid()
plt.ylim(25, 600)
plt.title("Mortality Rate Timeseries by Country", fontsize=14)
plt.savefig("Images/TimeseriesByCountry.png", bbox_inches = 'tight', pad_inches=1.0)
plt.show()

In [None]:
data_2000_low = data_2000["2000 Adult Mort. Rate, Both Sexes"].min()
data_2000_low
data_2000_hi = data_2000["2000 Adult Mort. Rate, Both Sexes"].max()
data_2000_hi
data_2000_avg = data_2000["2000 Adult Mort. Rate, Both Sexes"].mean()
data_2000_avg
data_2016_low = data_2016["2016 Adult Mort. Rate, Both Sexes"].min()
data_2016_low
data_2016_hi = data_2016["2016 Adult Mort. Rate, Both Sexes"].max()
data_2016_hi
data_2016_avg = data_2016["2016 Adult Mort. Rate, Both Sexes"].mean()
data_2016_avg
print(f"In 2000 the mean mortality rate worldwide was {round(data_2000_avg,1)}/1000 people, or ~22%")
print(f"In 2000 the lowest mortality rate on record was {data_2000_low} and the highest mortality rate on record was {data_2000_hi}")
print(f"In 2016 the mean mortality rate worldwide was {round(data_2016_avg,1)}/1000 people, or ~16%")
print(f"In 2016 the lowest mortality rate on record was {data_2016_low} and the highest mortality rate on record was {data_2016_hi}")

In [None]:
gdpmort_df = pd.DataFrame(gdpmort)
gdpmort_df = gdpmort_df.dropna()
gdpmort_df.head()
from scipy.stats import linregress
mean_gdp = gdpmort_df["Avg_GDP"].mean()
median_gdp = gdpmort_df["Avg_GDP"].median()
print(f"Mean GDP over the 17 yr period=  {mean_gdp*10000000}.")
print(f"Median GDP over the 17 yr period=  {median_gdp*10000000}.")
print("GDP is not normally distributed; outliers are skewing the mean GDP.")
print(sts.normaltest(gdpmort_df["Avg_GDP"].sample(50)))

### GDP is not normally distributed.

In [None]:
%matplotlib inline
x = gdpmort_df["Avg_GDP"]
y = gdpmort_df["Avg_Mort"]
plt.scatter(x, y, marker="o", color="b")
plt.xlim(0, 50000)
plt.ylim(0, 750)
plt.xlabel("GDP (x10,000,000)", fontsize=15)
plt.ylabel("Mortality Rate (per 1000)", fontsize=15)
plt.title("Avg Mortality vs. Avg GDP, 2000-2016", fontsize=15)
plt.margins(0.2)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
regress_values = x * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x, regress_values,"r-")
plt.annotate(line_eq,(25000,610),fontsize=15,color="red")
print(f"r = {rvalue}")
plt.savefig("Images/MortVsGDP.png")
var_gdpmort = np.var(gdpmort_df["Avg_GDP"], ddof=0)
sd_gdpmort = np.std(gdpmort_df["Avg_GDP"], ddof=0)
print(f"The population variance is {var_gdpmort}, and the standard deviation is {sd_gdpmort}")  
print("Overall there is almost no correlation between Mortality Rate and GDP over the same 17-year period. Because outlying values are skewing the GDP high this plot is displaying mortality rates for only those GDP values within a range that is equidistant from the median GDP.")
plt.show()

In [None]:
fig, ax1 = plt.subplots()
ax1.set_title('GDP BoxPlot', fontsize=14)
ax1.boxplot(gdpmort_df["Avg_GDP"], showmeans=True, notch=True)
                       
fig, ax2 = plt.subplots()
ax2.set_title('Mortality BoxPlot', fontsize=14)
ax2.boxplot(gdpmort_df["Avg_Mort"], showmeans=True, notch=True)
plt.margins(0.2)
plt.savefig("Images/box_plots.png")
plt.show()
#-----------------Angela's code ENDS-----------------------

## -.-.-.-.-. Angela's code ENDS Here -.-.-.-.-.

# -.-.-.-.-. Katie's code BEGINS Here -.-.-.-.-.

### NOTE: Life expectancy at birth indicates the number of years a newborn infant would live if prevailing patterns of mortality at the time of its birth were to stay the same throughout its life.

In [None]:
#Dependencies

import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import linregress
import seaborn as sns
from scipy import stats

In [None]:
#Import csv file - Life Expectancy.  This is for all countries 2000-2015
# https://www.kaggle.com/kumarajarshi/life-expectancy-who/version/1

#Import csv file - Life Expectancy since 1970 (44 countries)
csv_path = "Resources/Life_Expectancy.csv"
csv_path_2 = "Resources/GDP_1970_to_2018.csv"
csv_path_3 = "Resources/Life_1970.csv"

life_exp = pd.read_csv(csv_path)
gdp_data = pd.read_csv(csv_path_2)
life_exp_1970 = pd.read_csv(csv_path_3)

In [None]:
#clean-up csv

reduced_life = life_exp[["Country","Year","Status","Life expectancy "]]
cleaned_life = reduced_life.rename(columns={"Life expectancy ":"Life Expectancy", "Country":"Country Name"})
cleaned_life.head()

In [None]:
##life expectancy over time 2000-2015 with all countries

cleaned_life.groupby('Year')['Life Expectancy'].mean().plot(kind='line')
plt.title("Life Expectancy Between 2000-2015")
plt.xlabel("Year")
plt.ylabel("Life Expectancy (Age in Years)")

#plt.savefig("LifeExp_overtime.png", bbox_inches = 'tight', pad_inches = 1.0)
plt.show()

In [None]:
# Get mean life expectancy for all countries

cleaned_life.groupby(["Year"]).mean()

In [None]:
#Add GDP mean

gdp_summary_df = gdp_data.describe()
gdp_summary_df = gdp_summary_df.T
gdp_summary_df = gdp_summary_df.reset_index()
gdp_summary_df = gdp_summary_df.rename(columns={'index':'Year'})
gdp_summary_df.dropna()
gdp_summary_df.head()

In [None]:
#Update life expectancy file so that Year is not an index and is able to merge with GDP file

new_life = cleaned_life.groupby(["Year"]).mean()
new_life.reset_index(level=0, inplace=True)
new_life.head()

In [None]:
#Update gdp summary file so that the Year column is integer and can merge with life expectancy

gdp_summary_df['Year']=gdp_summary_df['Year'].astype(int)

In [None]:
#Merge life expectancy and gdp file by year

life_gdp_merge = pd.merge(new_life, gdp_summary_df, on='Year')
life_gdp_merge.head()

In [None]:
#Graph average life expectancy over time versus average gdp
x_values = life_gdp_merge['Life Expectancy']
y_values = life_gdp_merge['mean']
y_values = y_values.fillna(0)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, alpha=0.5)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(69,20),fontsize=14,color="red")
plt.title("Life Expectancy versus GDP")
plt.xlabel("Life Expectancy")
plt.ylabel("GDP (Billion USD)")
print(f"The r-squared is: {rvalue}")

#plt.savefig("LifeExp_vs_GDP.png", bbox_inches = 'tight', pad_inches = 1.0)

plt.show()

In [None]:
#Optional add (for fun)

age = 90
print(f"On average, for the life expectancy of {age} years old, the country GDP would have to be {round(slope * age + intercept,2)} billion USD.")
age2 = 100
print(f"On average, for the life expectancy of {age2} years old, the country GDP would have to be {round(slope * age2 + intercept,2)}. billion USD")

In [None]:
#Life Expectancy over time for 44 countries listed in life exp file from 1970-2017.

life_exp_1970.groupby('TIME')['Value'].mean().plot(kind='line')
plt.title("Life Expectancy Over Time")
plt.xlabel("Year")
plt.ylabel("Life Expectancy (in Years)")

#plt.savefig("LifeExp_all.png", bbox_inches = 'tight', pad_inches = 1.0)
plt.show()

In [None]:
#Read in life expectancy from 1970 file.  File only contains 44 countries, so pulling to help show
#USA trend

usa_1970 = life_exp_1970.loc[life_exp_1970['LOCATION'] =='USA']
usa_1970.head()

In [None]:
#life expectancy over time for both men and women in USA 1970-2017

total_1970 = usa_1970.loc[usa_1970['SUBJECT'] =='TOT']

total_1970.plot(x="TIME", y="Value", kind='line', legend=False)
plt.title("Life Expectancy in USA from 1970")
plt.xlabel("Year")
plt.ylabel("Life Expectancy (in Years)")
#plt.savefig("LifeExp_USA.png", bbox_inches = 'tight', pad_inches = 1.0)

plt.show()

In [None]:
#Filter out for the year 2000 on the life exp file 2000-2015

year_2000 = cleaned_life.loc[life_exp['Year'] ==2000]
year_2000.head()

In [None]:
#Merge life expectancy for the year 2000 and gdp data

merge_2000_gdp = pd.merge(year_2000, gdp_data, on='Country Name')
merge_2000_gdp

In [None]:
merge_2000_gdp = merge_2000_gdp[merge_2000_gdp['Country Name'] != 'United States of America']
merge_2000_gdp = merge_2000_gdp[merge_2000_gdp['Country Name'] != 'Japan']

In [None]:
#Life expectancy versus GDP in 2000

x_values = merge_2000_gdp['Life Expectancy']
y_values = merge_2000_gdp['2000']
y_values = y_values.fillna(0)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, alpha=0.5)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(40,50),fontsize=14,color="red")
plt.title("Life Expectancy versus GDP in 2000 - (USA/Japan removed as outliers)")
plt.xlabel("Life Expectancy (in Years)")
plt.ylabel("GDP (in Billions USD)")
print(f"The r-squared is: {rvalue}")

print(f"Slope is: {slope}")

#plt.savefig("LifeExp_vs_GDP_2000.png", bbox_inches = 'tight', pad_inches = 1.0)

plt.show()

# -.-.-.-.-. Katie's code ENDS Here -.-.-.-.-.

# -.-.-.-.-. Nabeels's code BEGINS Here -.-.-.-.-.

In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import json
import requests
from config import g_key
import gmaps
import random
from matplotlib.cm import viridis
from matplotlib.colors import to_hex
import gmaps.geojson_geometries
import numpy as np
countries_geojson = gmaps.geojson_geometries.load_geometry('countries')

gmaps.configure(api_key = g_key)

# Adding options to display all of the rows of DataFrame w/o truncation
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

csv_file = os.path.join('Resources','GDP_1970_to_2018_wLatLng.csv')
gdp_data = pd.read_csv(csv_file)

In [None]:
gdp_data.head(50)

In [None]:
gdp_summary_df = gdp_data.describe()
gdp_summary_df = gdp_summary_df.T
gdp_summary_df = gdp_summary_df.reset_index()
gdp_summary_df = gdp_summary_df.rename(columns={'index':'Year'})
gdp_summary_df

## Mean GDP over time for the entire world

In [None]:
mean_GDP_over_time = gdp_summary_df.groupby('Year')['mean'].first()
fig1, ax1 = plt.subplots(figsize=(8,6))
ax1.set_title('Growth of mean GDP over time for the entire world', fontsize = 14);
ax1.set_xlabel('Year', fontsize = 14)
ax1.set_ylabel('Mean GDP (Billion US$)', fontsize = 14);
ax1.set_ylim([0,50]);
ax1.plot(mean_GDP_over_time.drop(['Latitude','Longitude']));
ax1.plot([48],[mean_GDP_over_time['2018']],'o')
ax1.plot([30],[mean_GDP_over_time['2000']],'o', color = 'salmon')
ax1.plot([0],[mean_GDP_over_time['1970']],'o', color='gray')
xticks_values = [_ for _ in range(0,50,5)];
ax1.set_xticks(xticks_values);
ax1.annotate(f'US$ {round(mean_GDP_over_time["1970"], 2)} \nin 1970', \
             xy=(-1, mean_GDP_over_time['1970']*2.25), fontsize = 12, color='gray')
ax1.annotate('US$ 43B \nin 2018', xy=(44, mean_GDP_over_time['2018']+1), fontsize = 12, color='black')
ax1.annotate('US$ 16.6B \nin 2000', xy=(30-4, mean_GDP_over_time['2000']+2), fontsize = 12, color='black')
ax1.tick_params(axis='both', which='major', labelsize=12)
# ax1.legend('Test', loc='right')

# plt.savefig("Images/MeanGDP_over_time.png", bbox_inches = 'tight', pad_inches = 1.0)
plt.show()

## Display countries colored with GDP values for a chosen year

In [None]:
year = input('Enter a year between 1970 - 2018:')

In [None]:
csv_file = os.path.join('Resources','GDP_1970_to_2018_wLatLng.csv')
gdp_data = pd.read_csv(csv_file)

# Pick only those country codes which correspond to the geojson string file
common_country_names = []
for feature in countries_geojson['features']:
    gmap_country_name = feature['properties']['name']
    if gmap_country_name in list(gdp_data['Country Name']):
        common_country_names.append(gmap_country_name)
len(common_country_names)

gdp_list = []
for country in common_country_names:
    if country in list(gdp_data['Country Name']):
        gdp_val = list(gdp_data[gdp_data['Country Name'] == country][year])[0]
        temp_tuple = (country,round(gdp_val,2))
        gdp_list.append(temp_tuple)
country2gdp = dict(gdp_list)

# We will need to scale the GDP values to lie between 0 and 1
min_gdp = min(country2gdp.values())
max_gdp = max(country2gdp.values())
gdp_range = max_gdp - min_gdp

def calculate_color(gdp_value):
    """
    Convert the GDP coefficient to a color
    """
    # make gini a number between 0 and 1
    normalized_gdp = (gdp_value - min_gdp) / gdp_range

    # invert GDP so that high inequality gives dark color
#     inverse_gdp = 1.0 - normalized_gdp NOT INVERTING TO BRING OUT OUTLIERS
    inverse_gdp = normalized_gdp

    # transform the GDP to a matplotlib color
    mpl_color = viridis(inverse_gdp)

    # transform from a matplotlib color to a valid CSS color
    gmaps_color = to_hex(mpl_color, keep_alpha=False)

    return gmaps_color

#  build an array of colors, one for each country
colors = []
for feature in countries_geojson['features']:
    country_name = feature['properties']['name']
    try:
        gdp = country2gdp[country_name]
        color = calculate_color(gdp)
    except KeyError:
        # no gdp for that country: return default color
        color = (0, 0, 0, 0.3)
    colors.append(color)

In [None]:
# Customize the size of the figure
figure_layout = {
    'width': '1000px',
    'height': '800px',
    'border': '1px solid black',
    'padding': '0px',
    'margin': '0 auto 0 auto'
}
fig = gmaps.figure(zoom_level = 2, center = (40,10), layout=figure_layout, display_toolbar = True)
geojson = gmaps.geojson_layer(countries_geojson, fill_opacity=0.05, stroke_weight=2)
fig.add_layer(geojson)
fig

gdp_layer = gmaps.geojson_layer(countries_geojson,fill_color=colors, fill_opacity = 0.8, stroke_weight=2)
fig.add_layer(geojson)
fig

fig.add_layer(gdp_layer)
fig

coordinates = gdp_data[['Latitude','Longitude']]

info_box_list = []
for country in list(gdp_data['Country Name']):
    try:
        gdp_val = list(gdp_data[gdp_data['Country Name'] == country][year])[0]
        info_box_list.append(f'{country}; US$ {round(gdp_val,2)}B')
    except:
        continue
    
symbol_layer = gmaps.symbol_layer(coordinates, \
                                  scale = 2, \
                                  fill_color='red', \
                                  stroke_color='red', \
                                 info_box_content=info_box_list)

# Add layer
fig.add_layer(symbol_layer)

# Display figure
fig

## Mortality data for 25-34 year old Male/Female in USA for 2010 - 2017

In [None]:
# setup lists
year_labels=[str(_) for _ in range(2010,2018,1)]
male_deaths = []
female_deaths = []
male_percentage = []
female_percentage = []

for year in range(2010,2018,1):
    csv_file = os.path.join('Resources',f'CDC_USA_{str(year)}_25-34y.csv')
    mortality_in_USA = pd.read_csv(csv_file)
    mortality_in_USA = mortality_in_USA.drop(['Year','Year Code','State Code', 'Crude Rate', 'Population'], axis = 1)
    death_count = (mortality_in_USA.groupby(['Gender'])['Deaths'].sum())
    death_percentage = (mortality_in_USA.groupby(['Gender'])['Deaths'].sum())/(mortality_in_USA['Deaths'].sum())*100

    male_deaths.append(death_count['Male']/1000)
    male_percentage.append(death_percentage['Male'])

    female_deaths.append(death_count['Female']/1000)
    female_percentage.append(death_percentage['Female'])

In [None]:
N = 8
male_data = tuple(male_deaths)
female_data = tuple(female_deaths)
ind = np.arange(N)    # the x locations for the groups
width = 0.65      # the width of the bars: can also be len(x) sequence

fig, ax = plt.subplots(figsize=(8,6))

p1 = ax.bar(ind, male_deaths, width)
p2 = ax.bar(ind, female_deaths, width, bottom = male_data)

# autolabel(p2)
plt.ylabel('Number of Deaths (Thousands)',fontsize=14)
plt.title('Deaths of 25-34 year old Male and Female in USA for 2010-2017',fontsize=14)
plt.xticks(ind, year_labels,fontsize=14)
# plt.yticks(np.arange(0, 81, 10))
plt.legend((p1[0], p2[0]), ('Male', 'Female'))
plt.tick_params(axis='both', which='major', labelsize=12)

def autolabel(rects, rects2):
    """Attach a text label above each bar in *rects*, displaying its height."""
    counter = 0
    for rect in rects:
        height = rect.get_height() + rects2[counter].get_height() 
        ax.annotate('{}'.format(round(rect.get_height(),1)),
                    xy=(rect.get_x() + rect.get_width() / 2, rects2[counter].get_height()*1.15),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='top', fontsize=12, color = 'white', weight="bold")
        counter += 1
        
def autolabel2(rects, rects2):
    """Attach a text label above each bar in *rects*, displaying its height."""
    counter = 0
    for rect in rects:
        height = rect.get_height() 
        ax.annotate('{}'.format(round(height,1)),
                    xy=(rect.get_x() + rect.get_width() / 2, height/2),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom', fontsize=12, color = 'white', weight="bold")
        counter += 1        
autolabel2(p1,p2)
autolabel(p2,p1)
# plt.savefig("Images/Deaths_in_USA.png", bbox_inches = 'tight', pad_inches = 1.0)
plt.show()

## Top 5 Leading causes of death for 25-34 years old Male/Female in the US in 2010 - 2017

In [None]:
# setup lists
year_labels=[str(_) for _ in range(2010,2018,1)]
results_dict = {}
for year in range(2010,2018,1):
    csv_file = os.path.join('Resources',f'CDC_USA_{str(year)}_25-34y.csv')
    mortality_in_USA = pd.read_csv(csv_file)
    mortality_in_USA = mortality_in_USA.drop(['Year','Year Code','State Code', 'Crude Rate', 'Population'], axis = 1)
    death_counts_per_cause = mortality_in_USA.groupby('Cause of death')['Deaths'].sum()
    cause_df = pd.DataFrame({'Number of Deaths':death_counts_per_cause})
#     cause_df['Number of Deaths'].sort()
    top_five_causes = cause_df.sort_values('Number of Deaths', ascending = False).head()
    top_five_causes = top_five_causes.sort_values('Cause of death')
    top_five_causes = top_five_causes = top_five_causes.reset_index()
    results_dict.update({year:list(top_five_causes['Number of Deaths']/top_five_causes['Number of Deaths'].sum()*100)})
top_five_causes.head()

In [None]:
category_names = ['Narcotics', 'Opioids',
                  'Gun Violence', 'Suicide', 'Accidents']
results = results_dict

def survey(results, category_names):
    """
    Parameters
    ----------
    results : dict
        A mapping from question labels to a list of answers per category.
        It is assumed all lists contain the same number of entries and that
        it matches the length of *category_names*.
    category_names : list of str
        The category labels.
    """
    list_of_rects = []
    labels = list(results.keys())
    data = np.array(list(results.values()))
    data_cum = data.cumsum(axis=1)
    category_colors = plt.get_cmap('RdYlGn')(
        np.linspace(0.25, 0.75, data.shape[1]))

    fig, ax = plt.subplots(figsize=(10, 8))
    ax.invert_yaxis()
    ax.set_xlim(0, np.sum(data, axis=1).max())
    ax.tick_params(axis='both', which='major', labelsize=14)

    for i, (colname, color) in enumerate(zip(category_names, category_colors)):
        widths = data[:, i]        
        starts = data_cum[:, i] - widths
        list_of_rects.append(ax.barh(labels, widths, left=starts, height=0.5,
                label=colname, color=color, edgecolor = 'darkgray'))
        
    ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 0.935),
              loc='lower left', fontsize=12, frameon = False)
    return fig, ax, list_of_rects


fig1, ax1, rects = survey(results, category_names)
for _ in range(0,len(rects),1):    
    for rect in rects[_]:
        width = rect.get_width() 
        ax1.annotate('{}%'.format(round(width,1)),
                    xy=(rect.get_x() + rect.get_width()/2, rect.get_y()+rect.get_height()/2),
                    xytext=(0, 0),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='center', fontsize='12', color = 'black')
plt.title('Top 5 leading causes of deaths for 25-34 year old Male/Female in US', fontsize = 16)
plt.xlabel('Percentage of total deaths', fontsize = 14)
plt.ylabel('Year', fontsize = 14)
plt.tick_params(axis='both', which='major', labelsize=14)
ax1.spines['left'].set_visible(False)
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
# plt.show()    
# plt.savefig("Images/Leading_causes_of_deaths_in_USA.png", bbox_inches = 'tight', pad_inches = 1.0)
plt.show()

# -.-.-.-.-. Nabeels's code ENDS Here -.-.-.-.-.