In [1]:
import numpy as np
import scipy.stats as stats
import csv
import pandas as pd

# this line tells jupyter notebook to put the plots in the notebook rather than saving them to file.
%matplotlib inline

# this line makes plots prettier on mac retina screens. If you don't have one it shouldn't do anything.
%config InlineBackend.figure_format = 'retina'

In [2]:
drug_file = './drug-use-by-age.csv'

df_drug = pd.read_csv(drug_file)
df_drug.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17 entries, 0 to 16
Data columns (total 28 columns):
age                        17 non-null object
n                          17 non-null int64
alcohol-use                17 non-null float64
alcohol-frequency          17 non-null float64
marijuana-use              17 non-null float64
marijuana-frequency        17 non-null float64
cocaine-use                17 non-null float64
cocaine-frequency          17 non-null object
crack-use                  17 non-null float64
crack-frequency            17 non-null object
heroin-use                 17 non-null float64
heroin-frequency           17 non-null object
hallucinogen-use           17 non-null float64
hallucinogen-frequency     17 non-null float64
inhalant-use               17 non-null float64
inhalant-frequency         17 non-null object
pain-releiver-use          17 non-null float64
pain-releiver-frequency    17 non-null float64
oxycontin-use              17 non-null float64
oxycontin-f

In [4]:
# drop rows from 11 to end
df_drug = df_drug.loc[:12]
df_drug

Unnamed: 0,age,n,alcohol-use,alcohol-frequency,marijuana-use,marijuana-frequency,cocaine-use,cocaine-frequency,crack-use,crack-frequency,...,oxycontin-use,oxycontin-frequency,tranquilizer-use,tranquilizer-frequency,stimulant-use,stimulant-frequency,meth-use,meth-frequency,sedative-use,sedative-frequency
0,12,2798,3.9,3.0,1.1,4.0,0.1,5.0,0.0,-,...,0.1,24.5,0.2,52.0,0.2,2.0,0.0,-,0.2,13.0
1,13,2757,8.5,6.0,3.4,15.0,0.1,1.0,0.0,3.0,...,0.1,41.0,0.3,25.5,0.3,4.0,0.1,5.0,0.1,19.0
2,14,2792,18.1,5.0,8.7,24.0,0.1,5.5,0.0,-,...,0.4,4.5,0.9,5.0,0.8,12.0,0.1,24.0,0.2,16.5
3,15,2956,29.2,6.0,14.5,25.0,0.5,4.0,0.1,9.5,...,0.8,3.0,2.0,4.5,1.5,6.0,0.3,10.5,0.4,30.0
4,16,3058,40.1,10.0,22.5,30.0,1.0,7.0,0.0,1.0,...,1.1,4.0,2.4,11.0,1.8,9.5,0.3,36.0,0.2,3.0
5,17,3038,49.3,13.0,28.0,36.0,2.0,5.0,0.1,21.0,...,1.4,6.0,3.5,7.0,2.8,9.0,0.6,48.0,0.5,6.5
6,18,2469,58.7,24.0,33.7,52.0,3.2,5.0,0.4,10.0,...,1.7,7.0,4.9,12.0,3.0,8.0,0.5,12.0,0.4,10.0
7,19,2223,64.6,36.0,33.4,60.0,4.1,5.5,0.5,2.0,...,1.5,7.5,4.2,4.5,3.3,6.0,0.4,105.0,0.3,6.0
8,20,2271,69.7,48.0,34.0,60.0,4.9,8.0,0.6,5.0,...,1.7,12.0,5.4,10.0,4.0,12.0,0.9,12.0,0.5,4.0
9,21,2354,83.2,52.0,33.0,52.0,4.8,5.0,0.5,17.0,...,1.3,13.5,3.9,7.0,4.1,10.0,0.6,2.0,0.3,9.0


In [5]:
# keep target columns
list_columns = ['age','n','inhalant-use','inhalant-frequency','marijuana-use','marijuana-frequency','tranquilizer-use','tranquilizer-frequency']
df_drug = df_drug[list_columns]
df_drug

Unnamed: 0,age,n,inhalant-use,inhalant-frequency,marijuana-use,marijuana-frequency,tranquilizer-use,tranquilizer-frequency
0,12,2798,1.6,19.0,1.1,4.0,0.2,52.0
1,13,2757,2.5,12.0,3.4,15.0,0.3,25.5
2,14,2792,2.6,5.0,8.7,24.0,0.9,5.0
3,15,2956,2.5,5.5,14.5,25.0,2.0,4.5
4,16,3058,3.0,3.0,22.5,30.0,2.4,11.0
5,17,3038,2.0,4.0,28.0,36.0,3.5,7.0
6,18,2469,1.8,4.0,33.7,52.0,4.9,12.0
7,19,2223,1.4,3.0,33.4,60.0,4.2,4.5
8,20,2271,1.5,4.0,34.0,60.0,5.4,10.0
9,21,2354,1.4,2.0,33.0,52.0,3.9,7.0


In [14]:
# apply formular to all columns accept name & n columns
# (cell's value * n)/ 100
# df_drug = df_drug.iloc[:,2:]
df_drug.iloc[:,2] = df_drug.iloc[:,2].apply(lambda row: row['n']* row[''])
df_drug

Unnamed: 0,inhalant-use,inhalant-frequency,marijuana-use,marijuana-frequency,tranquilizer-use,tranquilizer-frequency
0,1.6,19.0,1.1,4.0,0.2,52.0
1,2.5,12.0,3.4,15.0,0.3,25.5
2,2.6,5.0,8.7,24.0,0.9,5.0
3,2.5,5.5,14.5,25.0,2.0,4.5
4,3.0,3.0,22.5,30.0,2.4,11.0
5,2.0,4.0,28.0,36.0,3.5,7.0
6,1.8,4.0,33.7,52.0,4.9,12.0
7,1.4,3.0,33.4,60.0,4.2,4.5
8,1.5,4.0,34.0,60.0,5.4,10.0
9,1.4,2.0,33.0,52.0,3.9,7.0
