In [47]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
import string
from datetime import date 
from scipy.stats import linregress

#Notes: https://data.cdc.gov/Nutrition-Physical-Activity-and-Obesity/Nutrition-Physical-Activity-and-Obesity-Behavioral/hn4x-zwk7



In [48]:
#Import the API Key
from api_key import api_key

#import data
url = "https://data.cdc.gov/resource/hn4x-zwk7.json"

response = requests.get(url)
data = response.json()

data

[{'yearstart': '2021',
  'yearend': '2021',
  'locationabbr': 'AL',
  'locationdesc': 'Alabama',
  'datasource': 'Behavioral Risk Factor Surveillance System',
  'class': 'Fruits and Vegetables',
  'topic': 'Fruits and Vegetables - Behavior',
  'question': 'Percent of adults who report consuming fruit less than one time daily',
  'data_value_type': 'Value',
  'data_value': '44.8',
  'data_value_alt': '44.8',
  'low_confidence_limit': '37.0',
  'high_confidence_limit': '52.9',
  'sample_size': '223.0',
  'age_years': '18 - 24',
  'geolocation': {'latitude': '32.84057112200048',
   'longitude': '-86.63186076199969',
   'human_address': '{"address": "", "city": "", "state": "", "zip": ""}'},
  'classid': 'FV',
  'topicid': 'FV1',
  'questionid': 'Q018',
  'datavaluetypeid': 'VALUE',
  'locationid': '01',
  'stratificationcategory1': 'Age (years)',
  'stratification1': '18 - 24',
  'stratificationcategoryid1': 'AGEYR',
  'stratificationid1': 'AGEYR1824'},
 {'yearstart': '2021',
  'yearend':

In [92]:
# Convert the cities weather data into a Pandas DataFrame
data_df = pd.DataFrame(data)

data_df = obesity_df[['yearend','locationabbr','topic','question','data_value','data_value_alt','low_confidence_limit','high_confidence_limit','sample_size','age_years']]

data_df['data_value'] = data_df.data_value.astype(float)
data_df['data_value_alt'] = data_df.data_value_alt.astype(float)
data_df['low_confidence_limit'] = data_df.low_confidence_limit.astype(float)
data_df['high_confidence_limit'] = data_df.high_confidence_limit.astype(float)
data_df['sample_size'] = data_df.sample_size.astype(float)

data_df.info()
# Show Record Count
data_df
#data_df.dropna()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 10 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   yearend                1000 non-null   object 
 1   locationabbr           1000 non-null   object 
 2   topic                  1000 non-null   object 
 3   question               1000 non-null   object 
 4   data_value             930 non-null    float64
 5   data_value_alt         930 non-null    float64
 6   low_confidence_limit   930 non-null    float64
 7   high_confidence_limit  930 non-null    float64
 8   sample_size            930 non-null    float64
 9   age_years              230 non-null    object 
dtypes: float64(5), object(5)
memory usage: 78.2+ KB


Unnamed: 0,yearend,locationabbr,topic,question,data_value,data_value_alt,low_confidence_limit,high_confidence_limit,sample_size,age_years
0,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,25.4,25.4,19.2,32.8,232.0,18 - 24
1,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming fruit l...,44.8,44.8,37.0,52.9,223.0,18 - 24
2,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,28.5,28.5,21.6,36.5,232.0,18 - 24
3,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming vegetab...,24.7,24.7,18.3,32.6,219.0,18 - 24
4,2021,AL,Physical Activity - Behavior,Percent of adults who engage in no leisure-tim...,19.3,19.3,14.4,25.4,254.0,18 - 24
...,...,...,...,...,...,...,...,...,...,...
995,2021,DE,Fruits and Vegetables - Behavior,Percent of adults who report consuming vegetab...,15.2,15.2,11.4,20.0,465.0,45 - 54
996,2021,DE,Fruits and Vegetables - Behavior,Percent of adults who report consuming fruit l...,45.5,45.5,39.8,51.4,479.0,45 - 54
997,2021,DE,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,33.9,33.9,28.8,39.3,489.0,45 - 54
998,2021,DE,Physical Activity - Behavior,Percent of adults who engage in no leisure-tim...,25.6,25.6,21.1,30.6,562.0,45 - 54


In [94]:
num_of_participants = data_df['sample_size'].sum()
num_of_participants

1400392.0

In [76]:
#These are the questions this dataset answers
list_of_questions = data_df['question'].unique()
print(sorted(list_of_questions))

['Percent of adults aged 18 years and older who have an overweight classification', 'Percent of adults aged 18 years and older who have obesity', 'Percent of adults who engage in no leisure-time physical activity', 'Percent of adults who report consuming fruit less than one time daily', 'Percent of adults who report consuming vegetables less than one time daily']


In [77]:
#These are the age groups
age_groups = data_df['age_years'].unique()
print(age_groups)

['18 - 24' '25 - 34' '35 - 44' '45 - 54' '55 - 64' '65 or older' nan]


In [78]:
overweight_df = data_df[data_df['question'] == 'Percent of adults aged 18 years and older who have an overweight classification']
overweight_df

Unnamed: 0,yearend,locationabbr,topic,question,data_value,data_value_alt,low_confidence_limit,high_confidence_limit,sample_size,age_years
2,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,28.5,28.5,21.6,36.5,232.0,18 - 24
5,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,21.1,21.1,17.1,25.8,433.0,25 - 34
11,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,31.8,31.8,27.2,36.8,521.0,35 - 44
18,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,33.8,33.8,29.4,38.6,594.0,45 - 54
20,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,32.7,32.7,28.9,36.7,826.0,55 - 64
...,...,...,...,...,...,...,...,...,...,...
979,2021,CT,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,35.6,35.6,33.9,37.4,5161.0,
981,2021,DE,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,22.5,22.5,16.1,30.6,180.0,18 - 24
987,2021,DE,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,31.2,31.2,25.3,37.7,329.0,25 - 34
990,2021,DE,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,35.5,35.5,29.6,41.7,419.0,35 - 44


In [79]:
obese_df = data_df[data_df['question'] == 'Percent of adults aged 18 years and older who have obesity']
obese_df

Unnamed: 0,yearend,locationabbr,topic,question,data_value,data_value_alt,low_confidence_limit,high_confidence_limit,sample_size,age_years
0,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,25.4,25.4,19.2,32.8,232.0,18 - 24
6,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,46.6,46.6,41.0,52.2,433.0,25 - 34
13,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,45.8,45.8,40.6,51.1,521.0,35 - 44
15,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,46.4,46.4,41.6,51.2,594.0,45 - 54
21,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,44.4,44.4,40.2,48.7,826.0,55 - 64
...,...,...,...,...,...,...,...,...,...,...
978,2021,CT,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,28.8,28.8,27.1,30.6,5161.0,
983,2021,DE,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,17.5,17.5,12.0,24.9,180.0,18 - 24
988,2021,DE,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,31.4,31.4,25.5,37.9,329.0,25 - 34
994,2021,DE,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,38.3,38.3,32.4,44.6,419.0,35 - 44


In [80]:
no_leisure_df = data_df[data_df['question'] == 'Percent of adults who engage in no leisure-time physical activity']
no_leisure_df

Unnamed: 0,yearend,locationabbr,topic,question,data_value,data_value_alt,low_confidence_limit,high_confidence_limit,sample_size,age_years
4,2021,AL,Physical Activity - Behavior,Percent of adults who engage in no leisure-tim...,19.3,19.3,14.4,25.4,254.0,18 - 24
7,2021,AL,Physical Activity - Behavior,Percent of adults who engage in no leisure-tim...,23.3,23.3,19.2,28.0,475.0,25 - 34
14,2021,AL,Physical Activity - Behavior,Percent of adults who engage in no leisure-tim...,22.4,22.4,18.5,26.8,553.0,35 - 44
17,2021,AL,Physical Activity - Behavior,Percent of adults who engage in no leisure-tim...,35.6,35.6,31.1,40.3,638.0,45 - 54
22,2021,AL,Physical Activity - Behavior,Percent of adults who engage in no leisure-tim...,39.1,39.1,35.2,43.2,900.0,55 - 64
...,...,...,...,...,...,...,...,...,...,...
975,2021,CT,Physical Activity - Behavior,Percent of adults who engage in no leisure-tim...,18.4,18.4,17.0,19.9,5949.0,
982,2021,DE,Physical Activity - Behavior,Percent of adults who engage in no leisure-tim...,21.3,21.3,15.3,28.8,210.0,18 - 24
985,2021,DE,Physical Activity - Behavior,Percent of adults who engage in no leisure-tim...,24.7,24.7,19.4,30.8,372.0,25 - 34
993,2021,DE,Physical Activity - Behavior,Percent of adults who engage in no leisure-tim...,22.3,22.3,17.9,27.4,487.0,35 - 44


In [81]:
no_fruit_df = data_df[data_df['question'] == 'Percent of adults who report consuming fruit less than one time daily']
no_fruit_df

Unnamed: 0,yearend,locationabbr,topic,question,data_value,data_value_alt,low_confidence_limit,high_confidence_limit,sample_size,age_years
1,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming fruit l...,44.8,44.8,37.0,52.9,223.0,18 - 24
9,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming fruit l...,49.4,49.4,43.7,55.1,419.0,25 - 34
10,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming fruit l...,44.5,44.5,39.2,49.8,507.0,35 - 44
19,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming fruit l...,49.2,49.2,44.3,54.0,577.0,45 - 54
23,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming fruit l...,48.4,48.4,44.1,52.8,809.0,55 - 64
...,...,...,...,...,...,...,...,...,...,...
977,2021,CT,Fruits and Vegetables - Behavior,Percent of adults who report consuming fruit l...,35.7,35.7,33.8,37.6,5113.0,
984,2021,DE,Fruits and Vegetables - Behavior,Percent of adults who report consuming fruit l...,44.2,44.2,35.2,53.6,175.0,18 - 24
989,2021,DE,Fruits and Vegetables - Behavior,Percent of adults who report consuming fruit l...,39.5,39.5,33.0,46.3,330.0,25 - 34
992,2021,DE,Fruits and Vegetables - Behavior,Percent of adults who report consuming fruit l...,39.8,39.8,33.8,46.2,412.0,35 - 44


In [82]:
no_vegetables_df = data_df[data_df['question'] == 'Percent of adults who report consuming vegetables less than one time daily']
no_vegetables_df

Unnamed: 0,yearend,locationabbr,topic,question,data_value,data_value_alt,low_confidence_limit,high_confidence_limit,sample_size,age_years
3,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming vegetab...,24.7,24.7,18.3,32.6,219.0,18 - 24
8,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming vegetab...,20.3,20.3,15.8,25.7,410.0,25 - 34
12,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming vegetab...,16.7,16.7,12.9,21.3,499.0,35 - 44
16,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming vegetab...,19.5,19.5,15.7,23.9,570.0,45 - 54
24,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming vegetab...,21.2,21.2,17.8,25.0,811.0,55 - 64
...,...,...,...,...,...,...,...,...,...,...
976,2021,CT,Fruits and Vegetables - Behavior,Percent of adults who report consuming vegetab...,16.2,16.2,14.6,17.9,4932.0,
980,2021,DE,Fruits and Vegetables - Behavior,Percent of adults who report consuming vegetab...,35.9,35.9,27.3,45.5,167.0,18 - 24
986,2021,DE,Fruits and Vegetables - Behavior,Percent of adults who report consuming vegetab...,24.1,24.1,19.1,30.1,322.0,25 - 34
991,2021,DE,Fruits and Vegetables - Behavior,Percent of adults who report consuming vegetab...,20.4,20.4,15.6,26.1,397.0,35 - 44


In [96]:
alabama_df = data_df[data_df['locationabbr'] == 'AL']
alabama_df

Unnamed: 0,yearend,locationabbr,topic,question,data_value,data_value_alt,low_confidence_limit,high_confidence_limit,sample_size,age_years
0,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,25.4,25.4,19.2,32.8,232.0,18 - 24
1,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming fruit l...,44.8,44.8,37.0,52.9,223.0,18 - 24
2,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,28.5,28.5,21.6,36.5,232.0,18 - 24
3,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming vegetab...,24.7,24.7,18.3,32.6,219.0,18 - 24
4,2021,AL,Physical Activity - Behavior,Percent of adults who engage in no leisure-tim...,19.3,19.3,14.4,25.4,254.0,18 - 24
...,...,...,...,...,...,...,...,...,...,...
135,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming vegetab...,17.3,17.3,15.5,19.3,2805.0,
136,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,33.3,33.3,31.1,35.5,2888.0,
137,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming fruit l...,49.6,49.6,47.2,52.0,2854.0,
138,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,36.8,36.8,34.5,39.1,2888.0,


In [101]:
#https://www.geeksforgeeks.org/how-to-print-an-entire-pandas-dataframe-in-python/

# Permanently changes the pandas settings
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 0)
 
alabama_df

Unnamed: 0,yearend,locationabbr,topic,question,data_value,data_value_alt,low_confidence_limit,high_confidence_limit,sample_size,age_years
0,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who have obesity,25.4,25.4,19.2,32.8,232.0,18 - 24
1,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming fruit less than one time daily,44.8,44.8,37.0,52.9,223.0,18 - 24
2,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who have an overweight classification,28.5,28.5,21.6,36.5,232.0,18 - 24
3,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming vegetables less than one time daily,24.7,24.7,18.3,32.6,219.0,18 - 24
4,2021,AL,Physical Activity - Behavior,Percent of adults who engage in no leisure-time physical activity,19.3,19.3,14.4,25.4,254.0,18 - 24
5,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who have an overweight classification,21.1,21.1,17.1,25.8,433.0,25 - 34
6,2021,AL,Obesity / Weight Status,Percent of adults aged 18 years and older who have obesity,46.6,46.6,41.0,52.2,433.0,25 - 34
7,2021,AL,Physical Activity - Behavior,Percent of adults who engage in no leisure-time physical activity,23.3,23.3,19.2,28.0,475.0,25 - 34
8,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming vegetables less than one time daily,20.3,20.3,15.8,25.7,410.0,25 - 34
9,2021,AL,Fruits and Vegetables - Behavior,Percent of adults who report consuming fruit less than one time daily,49.4,49.4,43.7,55.1,419.0,25 - 34


In [102]:
#https://stackoverflow.com/questions/26246864/restoring-the-default-display-context-in-pandas
#reset default values
pd.reset_option('all')