# Making behavioral/psychological file
## Sandra Tilmon
## 11/29/2023


#### Change log:

Date        Change





**NHANES is not available publicly at the census tract level.**

# Setup

In [None]:
import pandas as pd
import numpy as np

import math
import matplotlib
import matplotlib.pyplot as plt
import scipy.stats

import seaborn as sns

import os
import re
from functools import reduce
import requests

import geopandas as gpd
from shapely.geometry import shape, Point
import geopy
from geopy.extra.rate_limiter import RateLimiter
from geopy import geocoders
from geopy.geocoders import GoogleV3

from timeit import default_timer as timer

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

# No scientific notation
pd.options.display.float_format = '{:.2f}'.format

%matplotlib inline

In [None]:
# Mount drive

from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
# Raw data sources
dir = '/content/gdrive/My Drive/Sociome_Folder/Data/'
print(dir)

# Output curated datasets
curated = '/content/gdrive/My Drive/Sociome_Folder/Data Commons/Curated datasets/'
curated

/content/gdrive/My Drive/Sociome_Folder/Data/


'/content/gdrive/My Drive/Sociome_Folder/Data Commons/Curated datasets/'

In [None]:
# Empty list to collect dataframe names

frames = []

# CDC

## Places

https://data.cdc.gov/500-Cities-Places/PLACES-Local-Data-for-Better-Health-Census-Tract-D/nw2y-v4gm/about_data

https://data.cdc.gov/browse/select_dataset?q=census+tract&sortBy=relevance&tags=brfss&utf8=%E2%9C%93&page=1



Accessed 12/14/2023


In [None]:
# places = pd.read_csv(dir + 'CDC/Places/PLACES__Local_Data_for_Better_Health__Census_Tract_Data_2022_release.csv', dtype='string')
# print(places.shape)

# # Extract census tracts starting with "17031" (Cook County)
# mask = places['LocationID'].astype(str).str.startswith("17031")
# places = places[mask]
# print(places.shape)

# places.head()

(2161543, 23)
(39420, 23)


Unnamed: 0,Year,StateAbbr,StateDesc,CountyName,CountyFIPS,LocationName,DataSource,Category,Measure,Data_Value_Unit,Data_Value_Type,Data_Value,Data_Value_Footnote_Symbol,Data_Value_Footnote,Low_Confidence_Limit,High_Confidence_Limit,TotalPopulation,Geolocation,LocationID,CategoryID,MeasureId,DataValueTypeID,Short_Question_Text
1798,2020,IL,Illinois,Cook,17031,17031020702,BRFSS,Health Outcomes,Stroke among adults aged >=18 years,%,Crude prevalence,3.4,,,3.0,3.8,7194,POINT (-87.70545142 41.99533955),17031020702,HLTHOUT,STROKE,CrdPrv,Stroke
1799,2020,IL,Illinois,Cook,17031,17031040402,BRFSS,Health Outcomes,Stroke among adults aged >=18 years,%,Crude prevalence,1.0,,,0.9,1.1,4468,POINT (-87.6787372 41.9724992),17031040402,HLTHOUT,STROKE,CrdPrv,Stroke
1800,2020,IL,Illinois,Cook,17031,17031050700,BRFSS,Health Outcomes,Depression among adults aged >=18 years,%,Crude prevalence,15.5,,,14.7,16.4,1418,POINT (-87.67602892 41.94508495),17031050700,HLTHOUT,DEPRESSION,CrdPrv,Depression
1801,2020,IL,Illinois,Cook,17031,17031051200,BRFSS,Health Outcomes,Stroke among adults aged >=18 years,%,Crude prevalence,0.9,,,0.8,1.0,1516,POINT (-87.67594323 41.9414376),17031051200,HLTHOUT,STROKE,CrdPrv,Stroke
1802,2020,IL,Illinois,Cook,17031,17031061200,BRFSS,Health Outcomes,Stroke among adults aged >=18 years,%,Crude prevalence,0.7,,,0.6,0.8,1889,POINT (-87.66161258 41.94908085),17031061200,HLTHOUT,STROKE,CrdPrv,Stroke


In [None]:
# pd.set_option('display.max_colwidth', None)
# places['Measure'].value_counts()

Stroke among adults aged >=18 years                                                                                                                                                               1314
Depression among adults aged >=18 years                                                                                                                                                           1314
Diagnosed diabetes among adults aged >=18 years                                                                                                                                                   1314
Arthritis among adults aged >=18 years                                                                                                                                                            1314
Obesity among adults aged >=18 years                                                                                                                                                              1314
All t

In [None]:
# # Keep only behavioral/psychological measures

# places2 = places[(places['Measure'] == 'Depression among adults aged >=18 years') |
#                 (places['Measure'] == 'No leisure-time physical activity among adults aged >=18 years') |
#                 (places['Measure'] == 'Current smoking among adults aged >=18 years') |
#                 (places['Measure'] == 'Sleeping less than 7 hours among adults aged >=18 years') |
#                 (places['Measure'] == 'Mental health not good for >=14 days among adults aged >=18 years') |
#                 (places['Measure'] == 'Binge drinking among adults aged >=18 years')]

# places2 = places2[['LocationID', 'MeasureId', 'Data_Value']]

# places2.head()

Unnamed: 0,LocationID,MeasureId,Data_Value
1800,17031050700,DEPRESSION,15.5
1809,17031080400,DEPRESSION,16.7
1810,17031150401,DEPRESSION,15.4
1811,17031230800,DEPRESSION,15.0
1812,17031231200,DEPRESSION,15.5


In [None]:
# places2['MeasureId'].value_counts()

DEPRESSION    1314
LPA           1314
CSMOKING      1314
SLEEP         1314
MHLTH         1314
BINGE         1314
Name: MeasureId, dtype: Int64

In [None]:
# # Pivot the DataFrame
# places3 = places2.pivot(index='LocationID', columns='MeasureId', values='Data_Value').reset_index()
# places3.head()

MeasureId,LocationID,BINGE,CSMOKING,DEPRESSION,LPA,MHLTH,SLEEP
0,17031010100,13.8,16.4,15.6,26.2,13.5,37.5
1,17031010201,13.9,20.5,16.3,32.2,14.8,38.7
2,17031010202,13.4,19.1,15.7,31.6,13.6,37.3
3,17031010300,14.2,14.5,14.7,25.3,11.9,34.1
4,17031010400,16.6,11.9,17.7,20.3,15.0,32.1


In [None]:
places3 = places3.rename(columns={'LocationID': 'GEOID10'})

## Export and reload

In [None]:
with open(curated +  'colab63_behav_psych.csv', 'w') as f:
  places3.to_csv(f)

In [None]:
Behav63 = pd.read_csv(curated +  'colab63_behav_psych.csv')
Behav63=Behav63[Behav63.columns.drop(list(Behav63.filter(regex='Unnamed')))]
print(Behav63.shape)
Behav63.head()

(1314, 7)


Unnamed: 0,GEOID10,BINGE,CSMOKING,DEPRESSION,LPA,MHLTH,SLEEP
0,17031010100,13.8,16.4,15.6,26.2,13.5,37.5
1,17031010201,13.9,20.5,16.3,32.2,14.8,38.7
2,17031010202,13.4,19.1,15.7,31.6,13.6,37.3
3,17031010300,14.2,14.5,14.7,25.3,11.9,34.1
4,17031010400,16.6,11.9,17.7,20.3,15.0,32.1


In [None]:
Behav63.columns.to_list()

['GEOID10', 'BINGE', 'CSMOKING', 'DEPRESSION', 'LPA', 'MHLTH', 'SLEEP']