# getnsduhdata
### First version:  July 2, 2023
### Lastest update:  July 2, 2023
### Matthew Beattie
### University of Oklahoma

This notebook reads in downloaded state-specific NSDUH data.  The field is the state estimated total number of users of illicit drugs within the past month.  Illicit drugs include marijuana, despite its legality.

In [2]:
# Initialize libraries
import requests
import pandas as pd
import numpy as np
import os

In [3]:
# Set working directory
os.chdir('c:/Users/mjbea/OneDrive/GitHub/homelessness_data')
os.listdir()

['.git',
 '.ipynb_checkpoints',
 '2007-2022-HIC-Counts-by-State.xlsx',
 '2007-2022-PIT-Counts-by-State.xlsx',
 'ACS Feature Dictionary.xlsx',
 'acsdatafeatures.txt',
 'acs_feature_dictionary.txt',
 'Drug Use by State.xlsx',
 'getacsdata.ipynb',
 'getnsduhdata.ipynb',
 'hudpitandhic_2007_2022.txt',
 'nsduh_data_downloader',
 'nsduh_illicit_drug_use.txt',
 'pathutils.py',
 'Political Landscape Study.xlsx',
 'progressiveness_data.txt',
 'README.md',
 'state_abbreviations.txt',
 'step1_sequence_dataprep_multiyear.ipynb']

In [35]:
# Read in baseline data dictionary
nsduhdf = pd.read_csv('nsduh_illicit_drug_use.txt',sep='\t')
nsduhdf['state'] = nsduhdf['state'].astype(str)
nsduhdf['year'] = nsduhdf['year'].astype(int)
nsduhdf['illicitusepastmon'] = nsduhdf['illicitusepastmon'].astype(float)

# Add state code
stateabbvdf = pd.read_csv('state_abbreviations.txt', sep='\t')
nsduhdf = pd.merge(nsduhdf, stateabbvdf, on='state')

# Get population data from ACS
acsdf = pd.read_csv('acsdatafeatures.txt', sep='\t')
popdf = acsdf[['statecode', 'year', 'pop_tot']]

# Merge population data and calculate per capita field
nsduhdf = pd.merge(nsduhdf, popdf, on=['statecode', 'year'])
nsduhdf['illicitusepastmon_pcp'] = nsduhdf['illicitusepastmon']/nsduhdf['pop_tot']

# Drop unneeded columns and save to file
nsduhdf.drop(['state', 'pop_tot'], axis=1, inplace=True)
nsduhdf = nsduhdf[['statecode', 'year', 'illicitusepastmon', 'illicitusepastmon_pcp']]
nsduhdf.to_csv('nsduhfeatures.txt', sep='\t', index=False)

# Display dataframe
nsduhdf

Unnamed: 0,statecode,year,illicitusepastmon,illicitusepastmon_pcp
0,AL,2014,325000.0,0.067019
1,AL,2015,389000.0,0.080058
2,AL,2016,289000.0,0.059425
3,AL,2017,353000.0,0.072414
4,AL,2018,439000.0,0.089814
...,...,...,...,...
505,WY,2019,42000.0,0.072569
506,WY,2021,62000.0,0.107118
507,WY,2013,32000.0,0.054921
508,WY,2012,33000.0,0.057251
