In [26]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import plotly.express as px
import pandas as pd
import numpy as np
import requests
import time
import json
import string
from datetime import date 
from scipy.stats import linregress

#Notes: https://data.cdc.gov/Nutrition-Physical-Activity-and-Obesity/Nutrition-Physical-Activity-and-Obesity-Behavioral/hn4x-zwk7



In [27]:
#Import the API Key
#from api_key import api_key
#Don't need API Key

#import data
url = "https://data.cdc.gov/resource/hn4x-zwk7.json"

response = requests.get(url)
data = response.json()

data

[{'yearstart': '2021',
  'yearend': '2021',
  'locationabbr': 'AL',
  'locationdesc': 'Alabama',
  'datasource': 'Behavioral Risk Factor Surveillance System',
  'class': 'Obesity / Weight Status',
  'topic': 'Obesity / Weight Status',
  'question': 'Percent of adults aged 18 years and older who have obesity',
  'data_value_type': 'Value',
  'data_value': '25.4',
  'data_value_alt': '25.4',
  'low_confidence_limit': '19.2',
  'high_confidence_limit': '32.8',
  'sample_size': '232.0',
  'age_years': '18 - 24',
  'geolocation': {'latitude': '32.84057112200048',
   'longitude': '-86.63186076199969',
   'human_address': '{"address": "", "city": "", "state": "", "zip": ""}'},
  'classid': 'OWS',
  'topicid': 'OWS1',
  'questionid': 'Q036',
  'datavaluetypeid': 'VALUE',
  'locationid': '01',
  'stratificationcategory1': 'Age (years)',
  'stratification1': '18 - 24',
  'stratificationcategoryid1': 'AGEYR',
  'stratificationid1': 'AGEYR1824'},
 {'yearstart': '2021',
  'yearend': '2021',
  'loca

In [28]:
# Convert the cities weather data into a Pandas DataFrame
data_df = pd.DataFrame(data)

data_df = data_df[['yearend','locationabbr','question','data_value','sample_size','age_years']]

data_df['data_value'] = data_df.data_value.astype(float)
data_df['sample_size'] = data_df.sample_size.astype(float)

data_df.info()
# Show Record Count
data_df
#data_df.dropna()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   yearend       1000 non-null   object 
 1   locationabbr  1000 non-null   object 
 2   question      1000 non-null   object 
 3   data_value    930 non-null    float64
 4   sample_size   930 non-null    float64
 5   age_years     230 non-null    object 
dtypes: float64(2), object(4)
memory usage: 47.0+ KB


Unnamed: 0,yearend,locationabbr,question,data_value,sample_size,age_years,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10
0,2021,AL,Percent of adults who report consuming fruit l...,44.8,223.0,18 - 24,,,,
1,2021,AL,Percent of adults aged 18 years and older who ...,25.4,232.0,18 - 24,,,,
2,2021,AL,Percent of adults who engage in no leisure-tim...,19.3,254.0,18 - 24,,,,
3,2021,AL,Percent of adults who report consuming vegetab...,24.7,219.0,18 - 24,,,,
3,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who have an overweight classification,28.5,28.5,21.6,36.5,232.0,18 - 24
4,2021,AL,Percent of adults aged 18 years and older who ...,28.5,232.0,18 - 24,,,,
...,...,...,...,...,...,...,,,,
995,2021,DE,Percent of adults who report consuming fruit l...,45.5,479.0,45 - 54,,,,
996,2021,DE,Percent of adults aged 18 years and older who ...,33.9,489.0,45 - 54,,,,
997,2021,DE,Percent of adults who report consuming vegetab...,15.2,465.0,45 - 54,,,,


In [34]:
#df2['age_bmi'] = df['age'] *df['bmi']
data_df['calculated_num_obese'] = round(data_df['sample_size'] * data_df['data_value'] * .01)
data_df

Unnamed: 0,yearend,locationabbr,question,data_value,sample_size,age_years,calculated_num_obese
0,2021,AL,Percent of adults who report consuming fruit l...,44.8,223.0,18 - 24,100.0
1,2021,AL,Percent of adults aged 18 years and older who ...,25.4,232.0,18 - 24,59.0
2,2021,AL,Percent of adults who engage in no leisure-tim...,19.3,254.0,18 - 24,49.0
3,2021,AL,Percent of adults who report consuming vegetab...,24.7,219.0,18 - 24,54.0
4,2021,AL,Percent of adults aged 18 years and older who ...,28.5,232.0,18 - 24,66.0
...,...,...,...,...,...,...,...
995,2021,DE,Percent of adults who report consuming fruit l...,45.5,479.0,45 - 54,218.0
996,2021,DE,Percent of adults aged 18 years and older who ...,33.9,489.0,45 - 54,166.0
997,2021,DE,Percent of adults who report consuming vegetab...,15.2,465.0,45 - 54,71.0
998,2021,DE,Percent of adults aged 18 years and older who ...,39.6,489.0,45 - 54,194.0


In [5]:
num_of_participants = data_df['sample_size'].sum()
num_of_participants

1400392.0

In [6]:
#These are the questions this dataset answers
list_of_questions = data_df['question'].unique()
print(sorted(list_of_questions))

['Percent of adults aged 18 years and older who have an overweight classification', 'Percent of adults aged 18 years and older who have obesity', 'Percent of adults who engage in no leisure-time physical activity', 'Percent of adults who report consuming fruit less than one time daily', 'Percent of adults who report consuming vegetables less than one time daily']


In [7]:
#These are the age groups
age_groups = data_df['age_years'].unique()
print(age_groups)

['18 - 24' '25 - 34' '35 - 44' '45 - 54' '55 - 64' '65 or older' nan]


In [8]:
overweight_df = data_df[data_df['question'] == 'Percent of adults aged 18 years and older who have an overweight classification']
overweight_df

Unnamed: 0,yearend,locationabbr,question,data_value,sample_size,age_years
1,2021,AL,Percent of adults aged 18 years and older who ...,28.5,232.0,18 - 24
9,2021,AL,Percent of adults aged 18 years and older who ...,21.1,433.0,25 - 34
12,2021,AL,Percent of adults aged 18 years and older who ...,31.8,521.0,35 - 44
19,2021,AL,Percent of adults aged 18 years and older who ...,33.8,594.0,45 - 54
20,2021,AL,Percent of adults aged 18 years and older who ...,32.7,826.0,55 - 64
...,...,...,...,...,...,...
978,2021,CT,Percent of adults aged 18 years and older who ...,35.6,5161.0,
980,2021,DE,Percent of adults aged 18 years and older who ...,22.5,180.0,18 - 24
989,2021,DE,Percent of adults aged 18 years and older who ...,31.2,329.0,25 - 34
990,2021,DE,Percent of adults aged 18 years and older who ...,35.5,419.0,35 - 44


In [9]:
obese_df = data_df[data_df['question'] == 'Percent of adults aged 18 years and older who have obesity']
obese_df

Unnamed: 0,yearend,locationabbr,question,data_value,sample_size,age_years
3,2021,AL,Percent of adults aged 18 years and older who ...,25.4,232.0,18 - 24
5,2021,AL,Percent of adults aged 18 years and older who ...,46.6,433.0,25 - 34
14,2021,AL,Percent of adults aged 18 years and older who ...,45.8,521.0,35 - 44
16,2021,AL,Percent of adults aged 18 years and older who ...,46.4,594.0,45 - 54
23,2021,AL,Percent of adults aged 18 years and older who ...,44.4,826.0,55 - 64
...,...,...,...,...,...,...
975,2021,CT,Percent of adults aged 18 years and older who ...,28.8,5161.0,
984,2021,DE,Percent of adults aged 18 years and older who ...,17.5,180.0,18 - 24
986,2021,DE,Percent of adults aged 18 years and older who ...,31.4,329.0,25 - 34
991,2021,DE,Percent of adults aged 18 years and older who ...,38.3,419.0,35 - 44


In [10]:
no_leisure_df = data_df[data_df['question'] == 'Percent of adults who engage in no leisure-time physical activity']
no_leisure_df

Unnamed: 0,yearend,locationabbr,question,data_value,sample_size,age_years
0,2021,AL,Percent of adults who engage in no leisure-tim...,19.3,254.0,18 - 24
9,2021,AL,Percent of adults who engage in no leisure-tim...,23.3,475.0,25 - 34
11,2021,AL,Percent of adults who engage in no leisure-tim...,22.4,553.0,35 - 44
15,2021,AL,Percent of adults who engage in no leisure-tim...,35.6,638.0,45 - 54
21,2021,AL,Percent of adults who engage in no leisure-tim...,39.1,900.0,55 - 64
...,...,...,...,...,...,...
977,2021,CT,Percent of adults who engage in no leisure-tim...,18.4,5949.0,
981,2021,DE,Percent of adults who engage in no leisure-tim...,21.3,210.0,18 - 24
987,2021,DE,Percent of adults who engage in no leisure-tim...,24.7,372.0,25 - 34
992,2021,DE,Percent of adults who engage in no leisure-tim...,22.3,487.0,35 - 44


In [11]:
no_fruit_df = data_df[data_df['question'] == 'Percent of adults who report consuming fruit less than one time daily']
no_fruit_df

Unnamed: 0,yearend,locationabbr,question,data_value,sample_size,age_years
4,2021,AL,Percent of adults who report consuming fruit l...,44.8,223.0,18 - 24
8,2021,AL,Percent of adults who report consuming fruit l...,49.4,419.0,25 - 34
10,2021,AL,Percent of adults who report consuming fruit l...,44.5,507.0,35 - 44
17,2021,AL,Percent of adults who report consuming fruit l...,49.2,577.0,45 - 54
22,2021,AL,Percent of adults who report consuming fruit l...,48.4,809.0,55 - 64
...,...,...,...,...,...,...
976,2021,CT,Percent of adults who report consuming fruit l...,35.7,5113.0,
983,2021,DE,Percent of adults who report consuming fruit l...,44.2,175.0,18 - 24
985,2021,DE,Percent of adults who report consuming fruit l...,39.5,330.0,25 - 34
993,2021,DE,Percent of adults who report consuming fruit l...,39.8,412.0,35 - 44


In [12]:
no_vegetables_df = data_df[data_df['question'] == 'Percent of adults who report consuming vegetables less than one time daily']
no_vegetables_df

Unnamed: 0,yearend,locationabbr,question,data_value,sample_size,age_years
2,2021,AL,Percent of adults who report consuming vegetab...,24.7,219.0,18 - 24
5,2021,AL,Percent of adults who report consuming vegetab...,20.3,410.0,25 - 34
13,2021,AL,Percent of adults who report consuming vegetab...,16.7,499.0,35 - 44
18,2021,AL,Percent of adults who report consuming vegetab...,19.5,570.0,45 - 54
23,2021,AL,Percent of adults who report consuming vegetab...,21.2,811.0,55 - 64
...,...,...,...,...,...,...
979,2021,CT,Percent of adults who report consuming vegetab...,16.2,4932.0,
982,2021,DE,Percent of adults who report consuming vegetab...,35.9,167.0,18 - 24
987,2021,DE,Percent of adults who report consuming vegetab...,24.1,322.0,25 - 34
994,2021,DE,Percent of adults who report consuming vegetab...,20.4,397.0,35 - 44


In [13]:
alabama_df = data_df[data_df['locationabbr'] == 'AL']
alabama_df

Unnamed: 0,yearend,locationabbr,question,data_value,sample_size,age_years,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10
0,2021,AL,Percent of adults who engage in no leisure-tim...,19.3,254.0,18 - 24,,,,
1,2021,AL,Percent of adults aged 18 years and older who ...,28.5,232.0,18 - 24,,,,
2,2021,AL,Percent of adults who report consuming vegetab...,24.7,219.0,18 - 24,,,,
3,2021,AL,Percent of adults aged 18 years and older who ...,25.4,232.0,18 - 24,,,,
3,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who have an overweight classification,28.5,28.5,21.6,36.5,232.0,18 - 24
4,2021,AL,Percent of adults who report consuming fruit l...,44.8,223.0,18 - 24,,,,
...,...,...,...,...,...,...,,,,
135,2021,AL,Percent of adults who engage in no leisure-tim...,31.0,3080.0,,,,,
136,2021,AL,Percent of adults aged 18 years and older who ...,33.3,2888.0,,,,,
137,2021,AL,Percent of adults aged 18 years and older who ...,36.8,2888.0,,,,,


In [14]:
#https://www.geeksforgeeks.org/how-to-print-an-entire-pandas-dataframe-in-python/

# Permanently changes the pandas settings
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 0)

alabama_df

Unnamed: 0,yearend,locationabbr,question,data_value,sample_size,age_years,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10
0,2021,AL,Percent of adults who engage in no leisure-time physical activity,19.3,254.0,18 - 24,,,,
1,2021,AL,Percent of adults aged 18 years and older who have an overweight classification,28.5,232.0,18 - 24,,,,
2,2021,AL,Percent of adults who report consuming vegetables less than one time daily,24.7,219.0,18 - 24,,,,
3,2021,AL,Percent of adults aged 18 years and older who have obesity,25.4,232.0,18 - 24,,,,
3,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who have an overweight classification,28.5,28.5,21.6,36.5,232.0,18 - 24
4,2021,AL,Percent of adults who report consuming fruit less than one time daily,44.8,223.0,18 - 24,,,,
5,2021,AL,Percent of adults who report consuming vegetables less than one time daily,20.3,410.0,25 - 34,,,,
6,2021,AL,Percent of adults aged 18 years and older who have obesity,46.6,433.0,25 - 34,,,,
6,2021,AL,Percent of adults who engage in no leisure-time physical activity,23.3,475.0,25 - 34,,,,
7,2021,AL,Percent of adults who report consuming fruit less than one time daily,49.4,419.0,25 - 34,,,,


In [15]:
#https://stackoverflow.com/questions/26246864/restoring-the-default-display-context-in-pandas
#reset default values
pd.reset_option('all')