In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

In [2]:
obs = pd.read_csv('./obesity_clean.csv', index_col = 0)

In [3]:
obs.head(3)

Unnamed: 0,country,year,pct_obese,country_code,sex,latest
7,Afghanistan,2016,4.5,AFG,BTSX,True
9,Afghanistan,2015,4.3,AFG,BTSX,False
11,Afghanistan,2014,4.1,AFG,BTSX,False


In [4]:
obs.dtypes

country          object
year              int64
pct_obese       float64
country_code     object
sex              object
latest             bool
dtype: object

In [33]:
fig = px.choropleth(obs.query('year == 2016'), 
                    locations = 'country_code',
                    color = 'pct_obese',
                    hover_name = 'country',
                    labels={'pct_obese':'Percent Obese'},
                    color_continuous_scale = px.colors.sequential.Reds)

annotations = []

# Adding labels
# Title
annotations.append(dict(x = 0.0, y = .95,
                              xanchor = 'left', yanchor = 'bottom',
                              text = 'World Obesity Prevalence by Country',
                              font = dict(family = 'Arial', 
                                          size = 24, 
                                          color = 'rgb(37,37,37)'),
                              showarrow = False))
# Source
annotations.append(dict(x = 0.5, y = 0,
                              xanchor = 'center', yanchor = 'top',
                              text = 'Source: World Health Organization',
                              font = dict(family = 'Arial', 
                                          size = 12, 
                                          color = 'rgb(150,150,150)'),
                              showarrow=False))

fig.update_layout(annotations = annotations)

fig.show()

In [6]:
# I need to get the obesity percent for each year for each country, then get the mean
obs_means = obs.pivot_table(index = 'year', columns = 'country', values = 'pct_obese').\
                                    reset_index().\
                                    rename_axis('', axis = 'columns')

In [7]:
obs_means['mean_yr'] = obs_means.loc[:, obs_means.columns != 'year'].mean(axis = 1, skipna = True)

In [8]:
obs_means.head(3)

Unnamed: 0,year,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,Uruguay,Uzbekistan,Vanuatu,Venezuela (Bolivarian Republic of),Viet Nam,Yemen,Yemen Arab Republic (until 1990),Zambia,Zimbabwe,mean_yr
0,1975,0.4,5.7,5.9,13.4,0.7,5.1,11.1,6.6,10.5,...,11.8,3.9,4.7,8.3,,,2.5,1.3,3.0,6.074346
1,1976,0.4,5.8,6.1,14.0,0.8,5.3,11.5,6.8,10.7,...,12.1,4.0,4.9,8.6,,,2.6,1.4,3.2,6.270681
2,1977,0.5,6.0,6.2,14.5,0.8,5.4,11.8,7.0,11.0,...,12.4,4.2,5.1,8.8,0.1,,2.7,1.5,3.3,6.467539


In [9]:
fig = px.line(obs_means, x = 'year', y = 'mean_yr')

fig.show()

In [10]:
# Static histograms of the prevalence of obesity among adults between 1975 and present
fig = go.Figure()
fig.add_trace(go.Histogram(x = obs[obs['year'] == obs.year.min()]['pct_obese'], nbinsx = 60, name = '1975'))
fig.add_trace(go.Histogram(x = obs[obs['latest'] == True]['pct_obese'], nbinsx = 120, name = '2016'))

fig.update_layout(barmode = 'overlay')
fig.update_traces(opacity = 0.75)

fig.show()

In [11]:
# Animated histograms showing the change in obesity distributions over time.
obs_yrs = obs.sort_values(by = ['year'], ascending = True)

fig = px.histogram(obs_yrs,
                   x = 'pct_obese', 
                   animation_frame = 'year', 
                   nbins = 60,
                   range_x = [0, 65],
                   range_y = [0, 40])

fig.show()

In [12]:
obs_years = obs.pivot_table(values = 'pct_obese', 
                            columns = 'year', 
                            index = 'country').\
                        reset_index().\
                        rename_axis('', axis = 'columns')

obs_years.head(3)

Unnamed: 0,country,1975,1976,1977,1978,1979,1980,1981,1982,1983,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
0,Afghanistan,0.4,0.4,0.5,0.5,0.5,0.6,0.6,0.6,0.7,...,2.8,2.9,3.1,3.3,3.5,3.7,3.9,4.1,4.3,4.5
1,Albania,5.7,5.8,6.0,6.1,6.3,6.4,6.6,6.8,7.0,...,16.9,17.5,18.1,18.7,19.3,19.9,20.5,21.1,21.7,22.3
2,Algeria,5.9,6.1,6.2,6.4,6.7,6.9,7.2,7.4,7.8,...,19.2,19.9,20.7,21.4,22.2,23.1,23.9,24.8,25.7,26.6


In [13]:
print('Lowest Reported Obesity Rates:', obs_years[[1975, 2016]].min().to_string())

Lowest Reported Obesity Rates: 
1975    0.1
2016    2.1


In [14]:
print('Highest Reported Obesity Rates:', obs_years[[1975, 2016]].max().to_string())

Highest Reported Obesity Rates: 
1975    41.5
2016    60.7


In [15]:
print('Average Worldwide Obesity Rates:', obs_years[[1975, 2016]].mean().to_string())

Average Worldwide Obesity Rates: 
1975     6.074346
2016    19.790052


In [16]:
obs_years.loc[obs_years.country.isin(['India', 'China', 'United States of America']), ['country', 1975, 2016]]

Unnamed: 0,country,1975,2016
35,China,0.4,6.6
79,India,0.3,3.8
185,United States of America,11.7,37.3


In [17]:
obs_years.sort_values(by = 2016, ascending = False).head(10)[['country', 1975, 2016]]

Unnamed: 0,country,1975,2016
120,Nauru,41.5,60.7
39,Cook Islands,25.1,55.3
131,Palau,25.1,54.9
109,Marshall Islands,23.8,52.4
179,Tuvalu,15.7,51.0
127,Niue,16.7,49.3
174,Tonga,17.8,45.9
91,Kiribati,,45.6
148,Samoa,17.6,45.5
113,Micronesia (Federated States of),16.8,41.6


In [18]:
obs_means.to_csv('./obesity_means.csv')
obs_years.to_csv('./obesity_years.csv')