# NYC Community Health Survey Public Use Data
* [nyc.gov](https://www.nyc.gov/site/doh/data/data-sets/community-health-survey-public-use-data.page)</br>
* [nyc open data](https://data.cityofnewyork.us/Health/New-York-City-Community-Health-Survey/csut-3wpr)


* [Question Matrix](https://www.nyc.gov/assets/doh/downloads/pdf/episrv/chs-question-matrix.pdf)</br>
* [Questionnaire](https://www.nyc.gov/assets/doh/downloads/pdf/episrv/chs2020survey.pdf)</br>
* [Dataset](https://www.nyc.gov/assets/doh/downloads/sas/episrv/chs2020_public.sas7bdat)</br>
* [Codebook](https://www.nyc.gov/assets/doh/downloads/pdf/episrv/chs2020-codebook.pdf)</br>
* [Data Dictionary](https://data.cityofnewyork.us/api/views/csut-3wpr/files/3ed3d699-790e-48c7-b354-a934685b395c?download=true&filename=DOHMHDataDictionary_CHS_12022021.xlsx)

In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

from mdb_parser import MDBParser, MDBTable

pd.options.display.float_format = "{:,.0f}".format
%matplotlib inline

In [18]:
df = pd.read_sas("chs2020_public.sas7bdat")

In [19]:
df

Unnamed: 0,cid,strata,survey,wt21_dual,wt21_dual_q1,strata_q1,qxvers,mood1,mood2,mood3,...,daysalc30,averagedrink20,heavydrink20,bingenew,ipvphy,insultipv,wt_compare,insure20r,hhsize,child
0,2100001,20201407,21,2193,,,2,5,5,5,...,0,0,2,2,2,2,2193,4,2,2
1,2100002,20201304,21,16,58,20201304,1,5,5,5,...,0,0,2,2,2,2,16,3,1,2
2,2100003,20201502,21,349,578,20201500,1,5,5,5,...,0,0,2,2,2,2,349,1,3,1
3,2100004,20201314,21,111,,,2,5,3,2,...,0,0,2,2,2,,111,6,4,1
4,2100005,20201210,21,231,,,2,5,5,5,...,2,0,2,2,2,2,231,1,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8776,2116976,20204000,21,163,392,20204000,1,2,3,5,...,6,1,2,2,2,2,163,4,5,2
8777,2116977,20204000,21,162,,,2,3,2,2,...,0,0,2,2,2,1,162,2,1,2
8778,2116978,20204000,21,139,405,20204000,1,4,5,5,...,0,0,2,2,2,2,139,4,1,2
8779,2116979,20204000,21,115,,,2,5,5,5,...,0,0,2,2,2,2,115,1,5,1


In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8781 entries, 0 to 8780
Columns: 142 entries, cid to child
dtypes: float64(142)
memory usage: 9.5 MB


In [21]:
df.describe()

Unnamed: 0,cid,strata,survey,wt21_dual,wt21_dual_q1,strata_q1,qxvers,mood1,mood2,mood3,...,daysalc30,averagedrink20,heavydrink20,bingenew,ipvphy,insultipv,wt_compare,insure20r,hhsize,child
count,8781,8781,8781,8781,4336,4336,8781,8781,8781,8781,...,8719,8687,8659,8685,8679,8663,8781,8529,8781,8744
mean,2110908,20202377,21,733,1485,20202371,2,4,4,4,...,5,0,2,2,2,2,733,3,3,2
std,5397,683,0,843,1689,690,0,1,1,1,...,8,1,0,0,0,0,843,2,2,0
min,2100001,20201100,21,13,27,20201100,1,1,1,1,...,0,0,1,1,1,1,13,1,1,1
25%,2110395,20202108,21,177,327,20202108,1,4,3,3,...,0,0,2,2,2,2,177,1,1,1
50%,2112590,20202400,21,449,893,20202401,2,5,4,4,...,1,0,2,2,2,2,449,3,2,2
75%,2114785,20202999,21,979,2050,20202999,2,5,5,5,...,4,0,2,2,2,2,979,4,4,2
max,2116980,20204000,21,6360,12947,20204000,2,5,5,5,...,30,32,2,2,2,2,6360,6,7,2


In [16]:
df.columns

Index(['cid', 'strata', 'survey', 'wt21_dual', 'wt21_dual_q1', 'strata_q1',
       'qxvers', 'mood1', 'mood2', 'mood3',
       ...
       'daysalc30', 'averagedrink20', 'heavydrink20', 'bingenew', 'ipvphy',
       'insultipv', 'wt_compare', 'insure20r', 'hhsize', 'child'],
      dtype='object', length=142)

In [37]:
# Look at all mental health questions

mood = df[['mood1', 'mood2', 'mood3', 'mood4', 'mood5', 'mood6', 'mood8','mood9','mood11','k6', 'nspd', 'mhtreat20_all' ]]
mood

Unnamed: 0,mood1,mood2,mood3,mood4,mood5,mood6,mood8,mood9,mood11,k6,nspd,mhtreat20_all
0,5,5,5,5,5,5,2,2,2,0,2,2
1,5,5,5,5,4,5,2,2,2,1,2,2
2,5,5,5,5,5,5,2,2,2,0,2,2
3,5,3,2,5,4,5,2,2,2,6,2,2
4,5,5,5,5,5,5,2,2,2,0,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...
8776,2,3,5,3,1,2,2,2,2,14,1,2
8777,3,2,2,2,2,3,2,2,1,16,1,2
8778,4,5,5,5,5,5,2,2,2,1,2,2
8779,5,5,5,5,5,5,2,2,2,0,2,2


In [35]:
# Look at all cardiovascular health questions

cardio = df[['toldhighbp20', 'toldprescription20', 'takingmeds20', 'checkedbp20_q1']]
cardio

Unnamed: 0,toldhighbp20,toldprescription20,takingmeds20,checkedbp20_q1
0,2,,,
1,2,,,1
2,2,,,2
3,2,,,
4,1,1,1,
...,...,...,...,...
8776,2,,,1
8777,2,,,
8778,1,1,1,1
8779,2,,,
