In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import plotly.express as px
import pandas as pd
import numpy as np
import requests
import time
import json
import string
from datetime import date 
from scipy.stats import linregress

#Notes: https://data.cdc.gov/Nutrition-Physical-Activity-and-Obesity/Nutrition-Physical-Activity-and-Obesity-Behavioral/hn4x-zwk7



In [4]:
#Import the API Key
#from api_key import api_key
#Don't need API Key

#import data
url = "https://data.cdc.gov/resource/hn4x-zwk7.json"

response = requests.get(url)
data = response.json()


In [5]:
# Convert data into a Pandas DataFrame
data_df = pd.DataFrame(data)

data_df = data_df[['yearend','locationabbr','question','data_value','sample_size','age_years']]

data_df['data_value'] = data_df.data_value.astype(float)
data_df['sample_size'] = data_df.sample_size.astype(float)

data_df = data_df[data_df['sample_size'].notna()]

data_df.info()
# Show Record Count

<class 'pandas.core.frame.DataFrame'>
Index: 930 entries, 0 to 999
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   yearend       930 non-null    object 
 1   locationabbr  930 non-null    object 
 2   question      930 non-null    object 
 3   data_value    930 non-null    float64
 4   sample_size   930 non-null    float64
 5   age_years     230 non-null    object 
dtypes: float64(2), object(4)
memory usage: 50.9+ KB


In [6]:
#Add calculated number of obese people
data_df['calculated_num_obese'] = round(data_df['sample_size'] * data_df['data_value'] * .01)
data_df.head()

Unnamed: 0,yearend,locationabbr,question,data_value,sample_size,age_years,calculated_num_obese
0,2021,AL,Percent of adults who report consuming vegetab...,24.7,219.0,18 - 24,54.0
1,2021,AL,Percent of adults aged 18 years and older who ...,25.4,232.0,18 - 24,59.0
2,2021,AL,Percent of adults aged 18 years and older who ...,28.5,232.0,18 - 24,66.0
3,2021,AL,Percent of adults who engage in no leisure-tim...,19.3,254.0,18 - 24,49.0
4,2021,AL,Percent of adults who report consuming fruit l...,44.8,223.0,18 - 24,100.0


In [7]:
num_of_participants = data_df['sample_size'].sum()
num_of_participants

1400392.0

In [8]:
#These are the questions this dataset answers
list_of_questions = data_df['question'].unique()
print(sorted(list_of_questions))

['Percent of adults aged 18 years and older who have an overweight classification', 'Percent of adults aged 18 years and older who have obesity', 'Percent of adults who engage in no leisure-time physical activity', 'Percent of adults who report consuming fruit less than one time daily', 'Percent of adults who report consuming vegetables less than one time daily']


In [9]:
#These are the age groups
age_groups = data_df['age_years'].unique()
print(age_groups)

['18 - 24' '25 - 34' '35 - 44' '45 - 54' '55 - 64' '65 or older' nan]


In [10]:
overweight_df = data_df[data_df['question'] == 'Percent of adults aged 18 years and older who have an overweight classification']
overweight_df.head()

Unnamed: 0,yearend,locationabbr,question,data_value,sample_size,age_years,calculated_num_obese
2,2021,AL,Percent of adults aged 18 years and older who ...,28.5,232.0,18 - 24,66.0
6,2021,AL,Percent of adults aged 18 years and older who ...,21.1,433.0,25 - 34,91.0
13,2021,AL,Percent of adults aged 18 years and older who ...,31.8,521.0,35 - 44,166.0
18,2021,AL,Percent of adults aged 18 years and older who ...,33.8,594.0,45 - 54,201.0
21,2021,AL,Percent of adults aged 18 years and older who ...,32.7,826.0,55 - 64,270.0


In [11]:
obese_df = data_df[data_df['question'] == 'Percent of adults aged 18 years and older who have obesity']
obese_df.head()

Unnamed: 0,yearend,locationabbr,question,data_value,sample_size,age_years,calculated_num_obese
1,2021,AL,Percent of adults aged 18 years and older who ...,25.4,232.0,18 - 24,59.0
7,2021,AL,Percent of adults aged 18 years and older who ...,46.6,433.0,25 - 34,202.0
11,2021,AL,Percent of adults aged 18 years and older who ...,45.8,521.0,35 - 44,239.0
17,2021,AL,Percent of adults aged 18 years and older who ...,46.4,594.0,45 - 54,276.0
20,2021,AL,Percent of adults aged 18 years and older who ...,44.4,826.0,55 - 64,367.0


In [13]:
no_leisure_df = data_df[data_df['question'] == 'Percent of adults who engage in no leisure-time physical activity']
no_leisure_df.head()

Unnamed: 0,yearend,locationabbr,question,data_value,sample_size,age_years,calculated_num_obese
3,2021,AL,Percent of adults who engage in no leisure-tim...,19.3,254.0,18 - 24,49.0
9,2021,AL,Percent of adults who engage in no leisure-tim...,23.3,475.0,25 - 34,111.0
12,2021,AL,Percent of adults who engage in no leisure-tim...,22.4,553.0,35 - 44,124.0
16,2021,AL,Percent of adults who engage in no leisure-tim...,35.6,638.0,45 - 54,227.0
23,2021,AL,Percent of adults who engage in no leisure-tim...,39.1,900.0,55 - 64,352.0


In [14]:
no_fruit_df = data_df[data_df['question'] == 'Percent of adults who report consuming fruit less than one time daily']
no_fruit_df.head()

Unnamed: 0,yearend,locationabbr,question,data_value,sample_size,age_years,calculated_num_obese
4,2021,AL,Percent of adults who report consuming fruit l...,44.8,223.0,18 - 24,100.0
8,2021,AL,Percent of adults who report consuming fruit l...,49.4,419.0,25 - 34,207.0
10,2021,AL,Percent of adults who report consuming fruit l...,44.5,507.0,35 - 44,226.0
19,2021,AL,Percent of adults who report consuming fruit l...,49.2,577.0,45 - 54,284.0
24,2021,AL,Percent of adults who report consuming fruit l...,48.4,809.0,55 - 64,392.0


In [15]:
no_vegetables_df = data_df[data_df['question'] == 'Percent of adults who report consuming vegetables less than one time daily']
no_vegetables_df.head()

Unnamed: 0,yearend,locationabbr,question,data_value,sample_size,age_years,calculated_num_obese
0,2021,AL,Percent of adults who report consuming vegetab...,24.7,219.0,18 - 24,54.0
5,2021,AL,Percent of adults who report consuming vegetab...,20.3,410.0,25 - 34,83.0
14,2021,AL,Percent of adults who report consuming vegetab...,16.7,499.0,35 - 44,83.0
15,2021,AL,Percent of adults who report consuming vegetab...,19.5,570.0,45 - 54,111.0
22,2021,AL,Percent of adults who report consuming vegetab...,21.2,811.0,55 - 64,172.0


In [16]:
alabama_df = data_df[data_df['locationabbr'] == 'AL']
alabama_df.head()

Unnamed: 0,yearend,locationabbr,question,data_value,sample_size,age_years,calculated_num_obese
0,2021,AL,Percent of adults who report consuming vegetab...,24.7,219.0,18 - 24,54.0
1,2021,AL,Percent of adults aged 18 years and older who ...,25.4,232.0,18 - 24,59.0
2,2021,AL,Percent of adults aged 18 years and older who ...,28.5,232.0,18 - 24,66.0
3,2021,AL,Percent of adults who engage in no leisure-tim...,19.3,254.0,18 - 24,49.0
4,2021,AL,Percent of adults who report consuming fruit l...,44.8,223.0,18 - 24,100.0
