In [19]:
# Dependencies and Setup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import gmaps
import os
import json 
import scipy.stats as st
from sklearn.linear_model import LinearRegression
import urllib
from scipy.stats import linregress
import time

In [20]:
# File to Load
file_to_load = "Resources/BRFSS__Table_of_Overweight_and_Obesity__BMI_.csv"

# Read Obesity CSV and store into Pandas data frame
obesity_df_full = pd.read_csv(file_to_load)
obesity_df_full.head(10)

Unnamed: 0,Year,Locationabbr,Locationdesc,Class,Topic,Question,Response,Break_Out,Break_Out_Category,Sample_Size,...,Data_Value_Footnote,DataSource,ClassId,TopicId,LocationID,BreakoutID,BreakOutCategoryID,QuestionID,ResponseID,GeoLocation
0,2019,AK,Alaska,Overweight and Obesity (BMI),BMI Categories,Weight classification by Body Mass Index (BMI)...,Overweight (BMI 25.0-29.9),"Black, non-Hispanic",Race/Ethnicity,22,...,Prevalence estimate not available if the unwei...,BRFSS,CLASS14,TOPIC09,2,RACE02,CAT4,_BMI5CAT,RESP040,"(64.84507995700051, -147.72205903599973)"
1,2019,AK,Alaska,Overweight and Obesity (BMI),BMI Categories,Weight classification by Body Mass Index (BMI)...,Underweight (BMI 12.0-18.4),"Multiracial, non-Hispanic",Race/Ethnicity,2,...,Prevalence estimate not available if the unwei...,BRFSS,CLASS14,TOPIC09,2,RACE07,CAT4,_BMI5CAT,RESP042,"(64.84507995700051, -147.72205903599973)"
2,2019,AK,Alaska,Overweight and Obesity (BMI),BMI Categories,Weight classification by Body Mass Index (BMI)...,Obese (BMI 30.0 - 99.8),H.S. or G.E.D.,Education Attained,239,...,,BRFSS,CLASS14,TOPIC09,2,EDUCA2,CAT5,_BMI5CAT,RESP039,"(64.84507995700051, -147.72205903599973)"
3,2019,AK,Alaska,Overweight and Obesity (BMI),BMI Categories,Weight classification by Body Mass Index (BMI)...,Overweight (BMI 25.0-29.9),35-44,Age Group,151,...,,BRFSS,CLASS14,TOPIC09,2,AGE03,CAT3,_BMI5CAT,RESP040,"(64.84507995700051, -147.72205903599973)"
4,2019,AK,Alaska,Overweight and Obesity (BMI),BMI Categories,Weight classification by Body Mass Index (BMI)...,Normal Weight (BMI 18.5-24.9),"$15,000-$24,999",Household Income,95,...,,BRFSS,CLASS14,TOPIC09,2,INCOME2,CAT6,_BMI5CAT,RESP041,"(64.84507995700051, -147.72205903599973)"
5,2019,AK,Alaska,Overweight and Obesity (BMI),BMI Categories,Weight classification by Body Mass Index (BMI)...,Obese (BMI 30.0 - 99.8),65+,Age Group,261,...,,BRFSS,CLASS14,TOPIC09,2,AGE09,CAT3,_BMI5CAT,RESP039,"(64.84507995700051, -147.72205903599973)"
6,2019,AK,Alaska,Overweight and Obesity (BMI),BMI Categories,Weight classification by Body Mass Index (BMI)...,Overweight (BMI 25.0-29.9),"Less than $15,000",Household Income,49,...,,BRFSS,CLASS14,TOPIC09,2,INCOME1,CAT6,_BMI5CAT,RESP040,"(64.84507995700051, -147.72205903599973)"
7,2019,AK,Alaska,Overweight and Obesity (BMI),BMI Categories,Weight classification by Body Mass Index (BMI)...,Normal Weight (BMI 18.5-24.9),"$25,000-$34,999",Household Income,48,...,,BRFSS,CLASS14,TOPIC09,2,INCOME3,CAT6,_BMI5CAT,RESP041,"(64.84507995700051, -147.72205903599973)"
8,2019,AK,Alaska,Overweight and Obesity (BMI),BMI Categories,Weight classification by Body Mass Index (BMI)...,Underweight (BMI 12.0-18.4),"American Indian or Alaskan Native, non-Hispanic",Race/Ethnicity,5,...,Prevalence estimate not available if the unwei...,BRFSS,CLASS14,TOPIC09,2,RACE03,CAT4,_BMI5CAT,RESP042,"(64.84507995700051, -147.72205903599973)"
9,2019,AK,Alaska,Overweight and Obesity (BMI),BMI Categories,Weight classification by Body Mass Index (BMI)...,Overweight (BMI 25.0-29.9),"Other, non-Hispanic",Race/Ethnicity,19,...,Prevalence estimate not available if the unwei...,BRFSS,CLASS14,TOPIC09,2,RACE06,CAT4,_BMI5CAT,RESP040,"(64.84507995700051, -147.72205903599973)"


In [21]:
# Extract Columns that will be used in analysis
reduced_obesity_df = obesity_df_full.loc[:, ["Year", "Locationabbr", "Response", "Break_Out", "Break_Out_Category", 
                                             "Sample_Size", "BreakoutID", "GeoLocation"]]
reduced_obesity_df.head()



Unnamed: 0,Year,Locationabbr,Response,Break_Out,Break_Out_Category,Sample_Size,BreakoutID,GeoLocation
0,2019,AK,Overweight (BMI 25.0-29.9),"Black, non-Hispanic",Race/Ethnicity,22,RACE02,"(64.84507995700051, -147.72205903599973)"
1,2019,AK,Underweight (BMI 12.0-18.4),"Multiracial, non-Hispanic",Race/Ethnicity,2,RACE07,"(64.84507995700051, -147.72205903599973)"
2,2019,AK,Obese (BMI 30.0 - 99.8),H.S. or G.E.D.,Education Attained,239,EDUCA2,"(64.84507995700051, -147.72205903599973)"
3,2019,AK,Overweight (BMI 25.0-29.9),35-44,Age Group,151,AGE03,"(64.84507995700051, -147.72205903599973)"
4,2019,AK,Normal Weight (BMI 18.5-24.9),"$15,000-$24,999",Household Income,95,INCOME2,"(64.84507995700051, -147.72205903599973)"


In [22]:
# Rename columns for clarity
reduced_obesity_df = reduced_obesity_df.rename(columns={"Locationabbr": "State", 
                                                        "Response": "BMI Range", "Break_Out": "Class", 
                                                        "Break_Out_Category": "Class Category", 
                                                        "Sample_Size": "Number of Respondents",
                                                        "BreakoutID": "Class ID"})

reduced_obesity_df.head()

Unnamed: 0,Year,State,BMI Range,Class,Class Category,Number of Respondents,Class ID,GeoLocation
0,2019,AK,Overweight (BMI 25.0-29.9),"Black, non-Hispanic",Race/Ethnicity,22,RACE02,"(64.84507995700051, -147.72205903599973)"
1,2019,AK,Underweight (BMI 12.0-18.4),"Multiracial, non-Hispanic",Race/Ethnicity,2,RACE07,"(64.84507995700051, -147.72205903599973)"
2,2019,AK,Obese (BMI 30.0 - 99.8),H.S. or G.E.D.,Education Attained,239,EDUCA2,"(64.84507995700051, -147.72205903599973)"
3,2019,AK,Overweight (BMI 25.0-29.9),35-44,Age Group,151,AGE03,"(64.84507995700051, -147.72205903599973)"
4,2019,AK,Normal Weight (BMI 18.5-24.9),"$15,000-$24,999",Household Income,95,INCOME2,"(64.84507995700051, -147.72205903599973)"


In [23]:
# Reorder columns for clairty
reduced_obesity_df = reduced_obesity_df[['Year', 'State', 'Number of Respondents', 
                                         'BMI Range', 'Class', 'Class Category', 'Class ID', 'GeoLocation']]
reduced_obesity_df.head()

Unnamed: 0,Year,State,Number of Respondents,BMI Range,Class,Class Category,Class ID,GeoLocation
0,2019,AK,22,Overweight (BMI 25.0-29.9),"Black, non-Hispanic",Race/Ethnicity,RACE02,"(64.84507995700051, -147.72205903599973)"
1,2019,AK,2,Underweight (BMI 12.0-18.4),"Multiracial, non-Hispanic",Race/Ethnicity,RACE07,"(64.84507995700051, -147.72205903599973)"
2,2019,AK,239,Obese (BMI 30.0 - 99.8),H.S. or G.E.D.,Education Attained,EDUCA2,"(64.84507995700051, -147.72205903599973)"
3,2019,AK,151,Overweight (BMI 25.0-29.9),35-44,Age Group,AGE03,"(64.84507995700051, -147.72205903599973)"
4,2019,AK,95,Normal Weight (BMI 18.5-24.9),"$15,000-$24,999",Household Income,INCOME2,"(64.84507995700051, -147.72205903599973)"


In [24]:
# Split GeoLocation into lat and lng for loop
new_column = reduced_obesity_df["GeoLocation"].str.split(",", n = 1, expand = True) 
reduced_obesity_df["Lat"]= new_column[0] 
reduced_obesity_df["Lng"]= new_column[1] 
reduced_obesity_df.head()

Unnamed: 0,Year,State,Number of Respondents,BMI Range,Class,Class Category,Class ID,GeoLocation,Lat,Lng
0,2019,AK,22,Overweight (BMI 25.0-29.9),"Black, non-Hispanic",Race/Ethnicity,RACE02,"(64.84507995700051, -147.72205903599973)",(64.84507995700051,-147.72205903599973)
1,2019,AK,2,Underweight (BMI 12.0-18.4),"Multiracial, non-Hispanic",Race/Ethnicity,RACE07,"(64.84507995700051, -147.72205903599973)",(64.84507995700051,-147.72205903599973)
2,2019,AK,239,Obese (BMI 30.0 - 99.8),H.S. or G.E.D.,Education Attained,EDUCA2,"(64.84507995700051, -147.72205903599973)",(64.84507995700051,-147.72205903599973)
3,2019,AK,151,Overweight (BMI 25.0-29.9),35-44,Age Group,AGE03,"(64.84507995700051, -147.72205903599973)",(64.84507995700051,-147.72205903599973)
4,2019,AK,95,Normal Weight (BMI 18.5-24.9),"$15,000-$24,999",Household Income,INCOME2,"(64.84507995700051, -147.72205903599973)",(64.84507995700051,-147.72205903599973)


In [25]:
# Remove unwanted characters from the Lat and Lng Columns
reduced_obesity_df['Lat'] = reduced_obesity_df['Lat'].str.replace('(', '')
reduced_obesity_df['Lng'] = reduced_obesity_df['Lng'].str.replace(')', '')

In [26]:
# Remove Geolocation
reduced_obesity_df = reduced_obesity_df.drop(columns=['GeoLocation'])

In [27]:
# Remove commas from Number of Respondents
reduced_obesity_df['Number of Respondents'] = reduced_obesity_df['Number of Respondents'].str.replace(',', '')

In [32]:
# Convert Number of Respondents Columns to integer
reduced_obesity_df['Number of Respondents'] = pd.to_numeric(reduced_obesity_df['Number of Respondents'])
reduced_obesity_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47074 entries, 0 to 47073
Data columns (total 9 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   Year                   47074 non-null  int64 
 1   State                  47074 non-null  object
 2   Number of Respondents  47074 non-null  int64 
 3   BMI Range              47074 non-null  object
 4   Class                  47074 non-null  object
 5   Class Category         47074 non-null  object
 6   Class ID               47074 non-null  object
 7   Lat                    47002 non-null  object
 8   Lng                    47002 non-null  object
dtypes: int64(2), object(7)
memory usage: 3.2+ MB


In [43]:
# Keep only most recent year (2019)
recent_df = reduced_obesity_df.loc[reduced_obesity_df['Year'] == 2019]
recent_df

Unnamed: 0,Year,State,Number of Respondents,BMI Range,Class,Class Category,Class ID,Lat,Lng
0,2019,AK,22,Overweight (BMI 25.0-29.9),"Black, non-Hispanic",Race/Ethnicity,RACE02,64.84507995700051,-147.72205903599973
1,2019,AK,2,Underweight (BMI 12.0-18.4),"Multiracial, non-Hispanic",Race/Ethnicity,RACE07,64.84507995700051,-147.72205903599973
2,2019,AK,239,Obese (BMI 30.0 - 99.8),H.S. or G.E.D.,Education Attained,EDUCA2,64.84507995700051,-147.72205903599973
3,2019,AK,151,Overweight (BMI 25.0-29.9),35-44,Age Group,AGE03,64.84507995700051,-147.72205903599973
4,2019,AK,95,Normal Weight (BMI 18.5-24.9),"$15,000-$24,999",Household Income,INCOME2,64.84507995700051,-147.72205903599973
...,...,...,...,...,...,...,...,...,...
5411,2019,WY,34,Underweight (BMI 12.0-18.4),65+,Age Group,AGE09,43.23554134300048,-108.10983035299967
5412,2019,WY,789,Overweight (BMI 25.0-29.9),65+,Age Group,AGE09,43.23554134300048,-108.10983035299967
5413,2019,WY,67,Underweight (BMI 12.0-18.4),"White, non-Hispanic",Race/Ethnicity,RACE01,43.23554134300048,-108.10983035299967
5414,2019,WY,458,Obese (BMI 30.0 - 99.8),Some post-H.S.,Education Attained,EDUCA3,43.23554134300048,-108.10983035299967


In [44]:
# Drop year column
recent_df = recent_df.drop(columns=['Year'])
recent_df.head()

Unnamed: 0,State,Number of Respondents,BMI Range,Class,Class Category,Class ID,Lat,Lng
0,AK,22,Overweight (BMI 25.0-29.9),"Black, non-Hispanic",Race/Ethnicity,RACE02,64.84507995700051,-147.7220590359997
1,AK,2,Underweight (BMI 12.0-18.4),"Multiracial, non-Hispanic",Race/Ethnicity,RACE07,64.84507995700051,-147.7220590359997
2,AK,239,Obese (BMI 30.0 - 99.8),H.S. or G.E.D.,Education Attained,EDUCA2,64.84507995700051,-147.7220590359997
3,AK,151,Overweight (BMI 25.0-29.9),35-44,Age Group,AGE03,64.84507995700051,-147.7220590359997
4,AK,95,Normal Weight (BMI 18.5-24.9),"$15,000-$24,999",Household Income,INCOME2,64.84507995700051,-147.7220590359997


In [63]:
# Look at data set
recent_df.describe()

Unnamed: 0,Number of Respondents
count,5416.0
mean,412.055761
std,662.256803
min,0.0
25%,22.0
50%,146.0
75%,505.0
max,5688.0


In [90]:
# Keep only number of Respondents and BMI Range for state v. state comparison
recent_df_reduced = recent_df.loc[:, ["State", "Number of Respondents", "BMI Range"]]
recent_df_reduced.head()

Unnamed: 0,State,Number of Respondents,BMI Range
0,AK,22,Overweight (BMI 25.0-29.9)
1,AK,2,Underweight (BMI 12.0-18.4)
2,AK,239,Obese (BMI 30.0 - 99.8)
3,AK,151,Overweight (BMI 25.0-29.9)
4,AK,95,Normal Weight (BMI 18.5-24.9)


In [91]:
# Display the total number of respondents
respondent_total = recent_df_reduced["Number of Respondents"].sum()
respondent_total

2231694

In [134]:
# Get sum of total respondents for each state
state_sum = recent_df_reduced.groupby('State')['Number of Respondents'].sum()

# Get sum of total obese respondents for each state
obese_sum = recent_df_reduced.loc[recent_df_reduced["BMI Range"] == 'Overweight (BMI 25.0-29.9)'].groupby("State")["Number of Respondents"].sum()

# Find Obesity Rates for each state
obesity_rates = (obese_sum / state_sum) * 100
obesity_rates

State
AK    36.340255
AL    35.216498
AR    34.360647
AZ    35.952376
CA    37.178497
CO    36.537435
CT    37.972841
DC    34.004458
DE    35.338611
FL    35.991669
GA    34.436262
GU    36.398324
HI    32.885766
IA    35.028335
ID    34.817630
IL    34.914425
IN    35.390363
KS    35.284653
KY    36.262154
LA    34.648111
MA    36.374281
MD    35.878605
ME    35.618724
MI    35.474128
MN    36.635239
MO    34.247240
MS    32.439688
MT    37.083909
NC    36.425964
ND    36.969258
NE    35.799008
NH    37.241311
NM    35.670956
NV    35.926074
NY    36.224525
OH    34.922721
OK    35.782082
OR    36.091161
PA    35.833564
PR    37.335132
RI    36.114606
SC    35.540610
SD    37.780632
TN    33.646129
TX    35.112590
US    25.000000
UT    35.322835
UW    25.000000
VA    35.942825
VT    34.870553
WA    36.399229
WI    36.327167
WV    33.578113
WY    37.102473
Name: Number of Respondents, dtype: float64

In [137]:
# Create New DF

obese_summary_df = pd.DataFrame({
                                "State Obesity Rate for 2019": obesity_rates
                                    })
obese_summary_df.style.format({
                                "State Obesity Rate for 2019":"%{:,.2f}",
                                   })

Unnamed: 0_level_0,State Obesity Rate for 2019
State,Unnamed: 1_level_1
AK,%36.34
AL,%35.22
AR,%34.36
AZ,%35.95
CA,%37.18
CO,%36.54
CT,%37.97
DC,%34.00
DE,%35.34
FL,%35.99


In [None]:
# Create Bar Graph Representing Info