<a href="https://colab.research.google.com/github/rmherman/Team-Healthcare-Project/blob/main/Team_Healthcare_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**This code file will return the dictionary of types of tobacco use and the percentage of use across those surveyed who report CURRENT use (*all ages, all races, all gender, all education*)**

In [None]:
import requests                                                                 # Import the requests and pprint modules
from pprint import pprint

In [None]:
url2 = 'https://data.cdc.gov/resource/wsas-xwh5.json?$limit=50000'              # Set hyperlink to the .json location on CDC website for Tobacco Survey Data

response2 = requests.get(url2)                                                  # Call the url and set the response


tobacco_data = response2.json()                                                 # Assign the response list to tobacco_data


pprint(tobacco_data[:5])                                                       # Print statement to verify function
pprint(len(tobacco_data))

[{':@computed_region_hjsp_umg2': '29',
  'age': 'All Ages',
  'data_value': '7.5',
  'data_value_std_err': '0.5',
  'data_value_type': 'Percentage',
  'data_value_unit': '%',
  'datasource': 'BRFSS',
  'displayorder': '71',
  'education': 'All Grades',
  'gender': 'Overall',
  'geolocation': {'human_address': '{"address": "", "city": "", "state": "", '
                                   '"zip": ""}',
                  'latitude': '32.84057112200048',
                  'longitude': '-86.63186076199969'},
  'high_confidence_limit': '8.6',
  'locationabbr': 'AL',
  'locationdesc': 'Alabama',
  'low_confidence_limit': '6.4',
  'measuredesc': 'Current Use',
  'measureid': '177SCU',
  'race': 'White',
  'sample_size': '4616',
  'stratificationid1': '1GEN',
  'stratificationid2': '8AGE',
  'stratificationid3': '5RAC',
  'stratificationid4': '6EDU',
  'submeasureid': 'BRF71',
  'topicdesc': 'Smokeless Tobacco Use (Adults)',
  'topicid': '150BEH',
  'topictype': 'Tobacco Use – Survey Data',
  '

In [None]:
def make_tob_list(year, tob_info):                                              # Use this function to collect the tobacco data into a list
  '''
  This function will take arguments of year and a list and will create
  a list that will collect the items of interest from the data
  Returns a list of that collected data.
  '''
  tobacco_data_list = []
  temp_list = []

  for item in tob_info:                                                         # Iterate through the list that has been passed into the function
    if (
        ((item["year"] in element for element in year) and "response" in item)  # Check if the "year" value in item is in the year that has been passed to the function
        and item["response"] == "Current" and item['gender'] == "Overall"       # and that "response" is a key in the item and that it only collects the items that have
        and item['age'] == "All Ages" and item["race"] == "All Races"           # the gender is "Overall", race is "All Races", and age is "All Ages", the response is "Current"
        ):

      if "data_value" not in item:                                              # If "data_value" is not in the item, set a "data_value" to state "suppressed due to low sample size"
        item["data_value"] = 'suppressed due to low sample size'

      temp_list = [item["year"],                                                # Sets up a temporary list to collect all the desired info specifically
                   item["locationdesc"],
                   item["topicdesc"],
                   item["data_value"]
                   ]
      tobacco_data_list.append(temp_list)                                       # Appends that temp_list to a list tobacco_data_list that is collecting all of the tobacco data

  return tobacco_data_list                                                      # Returns the total data through tobacco_data_list


In [None]:
def return_tobacco_dict(passed_list):                                           # Use this function to turn the list data into a formatted dictionary
  '''
  This function takes a passed_list and puts the data into a dictionary
  format.
  Returns collective_tobacco_dict
  '''
  collective_tobacco_dict = {'2011' : {}, '2012' : {}, '2013' : {},             # Setting up empty dictionary with desired year keys
                             '2014' : {}, '2015' : {}, '2016' : {},
                             '2017' : {}, '2018' : {}, '2019' : {}
                             }
  for item in passed_list:                                                      # Iterates through the passed list to collect and format the data into
    if item[0] in collective_tobacco_dict.keys():                               # desired dictionary format of {Year: {State: {Tobacco Info}}}
      if item[1] not in collective_tobacco_dict[item[0]].keys():
        collective_tobacco_dict[item[0]][item[1]] = {item[2] : item[3]}
      else:
        collective_tobacco_dict[item[0]][item[1]].update({item[2] : item[3]})

  return collective_tobacco_dict                                                # Returns the formatted dictionary through collective_tobacco_dict

In [None]:
year = [str(x) for x in range(2011, 2020)]                                      # Set up a year array for desired year range
tobacco_list = make_tob_list(year, tobacco_data)                                # Call the make_tob_list function to make the list of tobacco data, passing
#pprint(tobacco_list)                                                           # year array and the original tobacco_data object

final_tobacco_dict = return_tobacco_dict(tobacco_list)                          # Assign final_tobacco_dict with dictionary returned from return_tobacco_dict call
pprint(final_tobacco_dict)                                                      # pprint the dictionary. This will convert the list data into a formatted dictionary.

{'2011': {'Alabama': {'Cigarette Use (Adults)': '24.3',
                      'Smokeless Tobacco Use (Adults)': '6.5'},
          'Alaska': {'Cigarette Use (Adults)': '22.9',
                     'Smokeless Tobacco Use (Adults)': '5.9'},
          'Arizona': {'Cigarette Use (Adults)': '19.3',
                      'Smokeless Tobacco Use (Adults)': '3'},
          'Arkansas': {'Cigarette Use (Adults)': '27',
                       'Smokeless Tobacco Use (Adults)': '7.1'},
          'California': {'Cigarette Use (Adults)': '13.7',
                         'Smokeless Tobacco Use (Adults)': '1.4'},
          'Colorado': {'Cigarette Use (Adults)': '18.3',
                       'Smokeless Tobacco Use (Adults)': '4.5'},
          'Connecticut': {'Cigarette Use (Adults)': '17.1',
                          'Smokeless Tobacco Use (Adults)': '1.5'},
          'Delaware': {'Cigarette Use (Adults)': '21.8',
                       'Smokeless Tobacco Use (Adults)': '2.2'},
          'District of Col

In [None]:
url1 = 'https://data.cdc.gov/resource/bi63-dtpu.json?$limit=15000'              # VM: I added the ?$limit=15000 to make sure we got all the entries!

response1 = requests.get(url1)


mortality_data = response1.json()


pprint(mortality_data[:5])
print(len(mortality_data))
                                                                                #print(len(mortality_data)) outputs 1000

[{'_113_cause_name': 'Nephritis, nephrotic syndrome and nephrosis '
                     '(N00-N07,N17-N19,N25-N27)',
  'aadr': '2.6',
  'cause_name': 'Kidney disease',
  'deaths': '21',
  'state': 'Vermont',
  'year': '2012'},
 {'_113_cause_name': 'Nephritis, nephrotic syndrome and nephrosis '
                     '(N00-N07,N17-N19,N25-N27)',
  'aadr': '3.3',
  'cause_name': 'Kidney disease',
  'deaths': '29',
  'state': 'Vermont',
  'year': '2017'},
 {'_113_cause_name': 'Nephritis, nephrotic syndrome and nephrosis '
                     '(N00-N07,N17-N19,N25-N27)',
  'aadr': '3.7',
  'cause_name': 'Kidney disease',
  'deaths': '30',
  'state': 'Vermont',
  'year': '2016'},
 {'_113_cause_name': 'Nephritis, nephrotic syndrome and nephrosis '
                     '(N00-N07,N17-N19,N25-N27)',
  'aadr': '3.8',
  'cause_name': 'Kidney disease',
  'deaths': '30',
  'state': 'Vermont',
  'year': '2013'},
 {'_113_cause_name': 'Intentional self-harm (suicide) (*U03,X60-X84,Y87.0)',
  'aadr': '

In [None]:
def make_mort_list(year, mort_info):
  mortality_data_list = []
  temp_list = []

  for item in mortality_data:
    if(item["year"] in element for element in year):

      temp_list = [item["year"],                                                # Sets up a temporary list to collect all the desired info specifically
                  item["state"],
                  item["cause_name"],
                  f'{float(item["aadr"])/1000:.4f}'                             #VM: Added the calculation for getting to a percentage! Death rate is per 100,000
                  ]                                                             #    So rate/100,000 = X.XX * 100 = X %  or rate * (100/100000) or rate/1000 and formatted it (f-string and decimal)
      #print(temp_list)
      mortality_data_list.append(temp_list)
                                                                                #check to see if ther year is there
  return mortality_data_list                                                    #collect aadr
                                                                                #collect start
                                                                                #collect cause of death name

In [None]:
def return_mortality_dict(passed_list):                                         # Use this function to turn the list data into a formatted dictionary
  '''
  This function takes a passed_list and puts the data into a dictionary
  format.
  Returns collective_mortality_dict                                             #VM: I changed that from tobacco to mortality!
  '''
  collective_mortality_dict = {'2011' : {}, '2012' : {}, '2013' : {},           # Setting up empty dictionary with desired year keys
                             '2014' : {}, '2015' : {}, '2016' : {},
                             '2017' : {}}
  for item in passed_list:                                                      # Iterates through the passed list to collect and format the data into dictionary
    if item[0] in collective_mortality_dict.keys():
      if item[1] not in collective_mortality_dict[item[0]].keys():
        collective_mortality_dict[item[0]][item[1]] = {item[2] : item[3]}
      else:
        collective_mortality_dict[item[0]][item[1]].update({item[2] : item[3]})

  return collective_mortality_dict                                              #takes the list and puts it into a dicitonary format and returns the collective dictionary


In [None]:
year = [str(x) for x in range(2011, 2017)]
mortality_list = make_mort_list(year, mortality_data)

final_mortality_dict = return_mortality_dict(mortality_list)                    # final_mortality_dict in the format:{'year' : {'state' : {'cause_name' : 'aadr' }}}
pprint(final_mortality_dict)

{'2011': {'Alabama': {'All causes': '0.9336',
                      "Alzheimer's disease": '0.0297',
                      'CLRD': '0.0552',
                      'Cancer': '0.1878',
                      'Diabetes': '0.0239',
                      'Heart disease': '0.2287',
                      'Influenza and pneumonia': '0.0186',
                      'Kidney disease': '0.0200',
                      'Stroke': '0.0494',
                      'Suicide': '0.0132',
                      'Unintentional injuries': '0.0544'},
          'Alaska': {'All causes': '0.7478',
                     "Alzheimer's disease": '0.0205',
                     'CLRD': '0.0418',
                     'Cancer': '0.1755',
                     'Diabetes': '0.0206',
                     'Heart disease': '0.1496',
                     'Influenza and pneumonia': '0.0126',
                     'Kidney disease': '0.0125',
                     'Stroke': '0.0394',
                     'Suicide': '0.0200',
           

In [None]:
combined_dict = {'2011' : {}, '2012' : {}, '2013' : {},
                  '2014' : {}, '2015' : {}, '2016' : {},
                  '2017' : {}}

for year in final_tobacco_dict:
  if year in final_mortality_dict:
    for state in final_tobacco_dict[year]:
      if state in final_mortality_dict[year]:
        for key, value in final_tobacco_dict[year][state].items():
          if state not in combined_dict[year].keys():
            combined_dict[year][state] = {key : float(value)}
          else:
            combined_dict[year][state].update({key : float(value)})
        for key, value in final_mortality_dict[year][state].items():
            combined_dict[year][state].update({'Death Due to ' + key : round((float(value) * 100), 1)})

pprint(combined_dict)

{'2011': {'Alabama': {'Cigarette Use (Adults)': 24.3,
                      'Death Due to All causes': 93.4,
                      "Death Due to Alzheimer's disease": 3.0,
                      'Death Due to CLRD': 5.5,
                      'Death Due to Cancer': 18.8,
                      'Death Due to Diabetes': 2.4,
                      'Death Due to Heart disease': 22.9,
                      'Death Due to Influenza and pneumonia': 1.9,
                      'Death Due to Kidney disease': 2.0,
                      'Death Due to Stroke': 4.9,
                      'Death Due to Suicide': 1.3,
                      'Death Due to Unintentional injuries': 5.4,
                      'Smokeless Tobacco Use (Adults)': 6.5},
          'Alaska': {'Cigarette Use (Adults)': 22.9,
                     'Death Due to All causes': 74.8,
                     "Death Due to Alzheimer's disease": 2.1,
                     'Death Due to CLRD': 4.2,
                     'Death Due to Cancer': 17.5,

In [None]:
import pandas as pd

combined_df = pd.DataFrame.from_dict({(i,j): combined_dict[i][j]
                           for i in combined_dict.keys()
                           for j in combined_dict[i].keys()},
                       orient='index')

combined_df = combined_df.reset_index()
combined_df.columns = ['Year', 'State'] + list(combined_df.columns[2:])

flattened_dict = combined_df.to_dict(orient='records')

combined_dataset = pd.DataFrame(flattened_dict)

display(combined_dataset)

Unnamed: 0,Year,State,Cigarette Use (Adults),Smokeless Tobacco Use (Adults),Death Due to Suicide,Death Due to Influenza and pneumonia,Death Due to Kidney disease,Death Due to Diabetes,Death Due to Alzheimer's disease,Death Due to Stroke,Death Due to Unintentional injuries,Death Due to CLRD,Death Due to Cancer,Death Due to Heart disease,Death Due to All causes,E-Cigarette Use (Adults)
0,2011,Alabama,24.3,6.5,1.3,1.9,2.0,2.4,3.0,4.9,5.4,5.5,18.8,22.9,93.4,
1,2011,Alaska,22.9,5.9,2.0,1.3,1.2,2.1,2.1,3.9,5.7,4.2,17.5,15.0,74.8,
2,2011,Arizona,19.3,3.0,1.8,1.0,0.6,2.4,3.4,3.1,4.7,4.5,14.8,15.0,68.9,
3,2011,Arkansas,27.0,7.1,1.6,2.2,2.4,2.7,3.0,5.1,5.0,5.9,19.1,21.4,89.5,
4,2011,California,13.7,1.4,1.0,1.7,0.7,2.1,3.1,3.6,2.8,3.7,15.2,15.9,64.1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
352,2017,Virginia,16.4,4.0,1.3,1.3,1.7,2.0,2.8,3.8,4.4,3.5,15.3,15.4,71.9,4.9
353,2017,Washington,13.5,3.4,1.7,1.3,0.5,2.2,4.6,3.7,4.4,3.8,14.8,13.9,68.9,4.3
354,2017,West Virginia,26.0,8.9,2.1,1.8,1.7,3.4,3.1,4.2,10.0,6.4,17.9,19.2,95.7,5.7
355,2017,Wisconsin,16.0,4.3,1.5,1.3,1.2,1.9,3.2,3.4,5.8,3.8,15.3,15.8,72.2,4.3
