Import the Pandas and Numpy libraries and load in the data file
---

In [None]:
import pandas as pd
import numpy as np

demo = pd.read_csv('Demographics.csv')
bmx = pd.read_csv('BodyMeasures.csv')
ocq = pd.read_csv('Occupation.csv')

Impossible Data - Continuous Values
---

BMXWT - weight in kg

In [None]:
bmx['BMXWT'].describe()

In [None]:
ind = bmx['BMXWT'] < 0
bmx.loc[ind,'BMXWT'] = np.nan
bmx['BMXWT'].describe()

Impossible Data - Categorical Values
---
BMIWT - Weight Comment ([codebook entry](https://wwwn.cdc.gov/Nchs/Nhanes/1999-2000/BMX.htm#BMIWT))


|Code|Meaning|
|:--- |:--- |
|1|	Could not obtain|	
|2|	Exceeds capacity|	
|3|	Clothing|
|4|	Medical appliance|

In [None]:
bmx['BMIWT'].unique()

In [None]:
ind = bmx['BMIWT'] > 4
bmx.loc[ind, 'BMIWT'] = np.nan
bmx['BMIWT'].unique()

Extreme Data
---

Heaviest human weight on record: 635 kg

In [None]:
bmx['BMXWT'].describe()

In [None]:
ind = bmx['BMXWT'] > 635
bmx.loc[ind,'BMXWT'] = np.nan
bmx['BMXWT'].describe()

In [None]:
mean_wt = np.nanmean(bmx['BMXWT'])
std_wt = np.nanstd(bmx['BMXWT'])

min_wt = np.nanmin(bmx['BMXWT'])
max_wt = np.nanmax(bmx['BMXWT'])

low_wt_zscore = (min_wt - mean_wt)/std_wt
high_wt_zscore = (max_wt - mean_wt)/std_wt

print('Max weight z-score: ' + str(high_wt_zscore))
print('Min weight z-score: ' + str(low_wt_zscore))

Saturated Data
---

Survey instructions: list ages 85 and above as 85

In [None]:
np.nanmax(demo['RIDAGEYR'])

In [None]:
ind = demo['RIDAGEYR'] > 85
demo.loc[ind,'RIDAGEYR'] = 85

Individual Practice
---

1. Find and remove any non-sensible categorical values in the occupation file
    * Check OCQ130, OCQ140, OCQ150, OCQ160
2. Find the z-score of the maximum and minimum values in the occupation column OCQ180 - Hours worked last week at all jobs ([see codebook for more information](https://wwwn.cdc.gov/Nchs/Nhanes/1999-2000/OCQ.htm#OCQ180))
    * Exclude values 7777 and 9999


In [None]:
# Your Code Here

Save The Data
---

In [None]:
# demo.to_csv('Demographics.csv', index=False)
# bmx.to_csv('BodyMeasures.csv', index=False)
# ocq.to_csv('Occupation.csv', index=False)