# Python for data analysis : Drug consumption case
---

In [3]:
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
dataset = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/00373/drug_consumption.data", header = None)
# !ls /datasets/drug_consumption_dataset
# dataset = pd.read_csv("/datasets/drug_consumption_dataset/drug_consumption.data", header = None)

In [5]:
dataset.shape

(1885, 32)

In [6]:
columns = ['ID', 'AGE', 'GENDER', 'EDUCATION_LEVEL', 'COUNTRY', 'ETHNICITY', 'NSCORE_VALUE', 'ESCORE_VALUE', 'OSCORE_VALUE', 'ASCORE_VALUE', 
        'CSCORE_VALUE', 'IMPULSIVENESS', 'SENSATION_SEEING', 'ALCOHOL_CONSUMPTION', 'AMPHET_CONSUMPTION', 'AMYL_CONSUMPTION', 'BENZOS_CONSUMPTION', 
        'CAFFEINE_CONSUMPTION', 'CANNABIS_CONSUMPTION', 'CHOCOLATE_CONSUMPTION', 'COKE_CONSUMPTION', 'CRACK_CONSUMPTION', 'ECSTASY_CONSUMPTION', 
        'HEROIN_CONSUMPTION', 'KETAMINE_CONSUMPTION', 'LEGAL_HIGHS_CONSUMPTION', 'LSD_CONSUMPTION', 'METH_CONSUMPTION', 'MAGIC_MUSHROOMS_CONSUMPTION', 
        'NICOTINE_CONSUMPTION', 'SEMER_CONSUMPTION', 'VSA_CONSUMPTION']

len(columns)

32

In [7]:
dataset.columns = columns

In [8]:
dataset.head()

Unnamed: 0,ID,AGE,GENDER,EDUCATION_LEVEL,COUNTRY,ETHNICITY,NSCORE_VALUE,ESCORE_VALUE,OSCORE_VALUE,ASCORE_VALUE,...,ECSTASY_CONSUMPTION,HEROIN_CONSUMPTION,KETAMINE_CONSUMPTION,LEGAL_HIGHS_CONSUMPTION,LSD_CONSUMPTION,METH_CONSUMPTION,MAGIC_MUSHROOMS_CONSUMPTION,NICOTINE_CONSUMPTION,SEMER_CONSUMPTION,VSA_CONSUMPTION
0,1,0.49788,0.48246,-0.05921,0.96082,0.126,0.31287,-0.57545,-0.58331,-0.91699,...,CL0,CL0,CL0,CL0,CL0,CL0,CL0,CL2,CL0,CL0
1,2,-0.07854,-0.48246,1.98437,0.96082,-0.31685,-0.67825,1.93886,1.43533,0.76096,...,CL4,CL0,CL2,CL0,CL2,CL3,CL0,CL4,CL0,CL0
2,3,0.49788,-0.48246,-0.05921,0.96082,-0.31685,-0.46725,0.80523,-0.84732,-1.6209,...,CL0,CL0,CL0,CL0,CL0,CL0,CL1,CL0,CL0,CL0
3,4,-0.95197,0.48246,1.16365,0.96082,-0.31685,-0.14882,-0.80615,-0.01928,0.59042,...,CL0,CL0,CL2,CL0,CL0,CL0,CL0,CL2,CL0,CL0
4,5,0.49788,0.48246,1.98437,0.96082,-0.31685,0.73545,-1.6334,-0.45174,-0.30172,...,CL1,CL0,CL0,CL1,CL0,CL0,CL2,CL2,CL0,CL0


In [14]:
# Cleaning code

# GENDER = if positive "F" else "M"
dataset.GENDER.unique()

# AGE = 
# -0.95197 = 18-24 
# -0.07854 = 25-34 
#  0.49788 = 35-44 
#  1.09449 = 45-54 
#  1.82213 = 55-64 
#  2.59171 = 65+ 
dataset.AGE.unique()

# EDUCATION LEVEL = 
# -2.43591 = Left school before 16 years 
# -1.73790 = Left school at 16 years 
# -1.43719 = Left school at 17 years 
# -1.22751 = Left school at 18 years 
# -0.61113 = Some college or university, no certificate or degree 
# -0.05921 = Professional certificate/ diploma 
#  0.45468 = University degree 
#  1.16365 = Masters degree 
#  1.98437 = Doctorate degree 
dataset.EDUCATION_LEVEL.unique()

# COUNTRY = 
# -0.09765 Australia 
#  0.24923 Canada 
# -0.46841 New Zealand
# -0.28519 Other 
#  0.21128 Republic of Ireland 
#  0.96082 UK 
# -0.57009 USA 
dataset.COUNTRY.unique()

array([ 0.96082,  0.24923, -0.57009, -0.28519, -0.09765,  0.21128,
       -0.46841])

In [15]:
# Plots

# Pie Chart : proportion Men/women ; Age proportion ; Country
# Histogram : most consumed drug
# link between ethnicity & education level ? 
# Interactive plots : https://towardsdatascience.com/interactive-graphs-in-python-830b1e6c197f

In [17]:
# https://plotly.com/python/histograms/
# https://plotly.com/python/

!pip install plotly
import plotly.express as px
df = px.data.tips()
fig = px.histogram(df, x="total_bill", color="sex")
fig.show()

Collecting plotly
  Downloading plotly-4.14.0-py2.py3-none-any.whl (13.2 MB)
[K     |████████████████████████████████| 13.2 MB 19.2 MB/s 
[?25hCollecting retrying>=1.3.3
  Downloading retrying-1.3.3.tar.gz (10 kB)
Building wheels for collected packages: retrying
  Building wheel for retrying (setup.py) ... [?25ldone
[?25h  Created wheel for retrying: filename=retrying-1.3.3-py3-none-any.whl size=11429 sha256=f3ba7dd49ab72751627e551568319fe93be691838b90fa59f2c48cfc80b91886
  Stored in directory: /home/jovyan/.cache/pip/wheels/f9/8d/8d/f6af3f7f9eea3553bc2fe6d53e4b287dad18b06a861ac56ddf
Successfully built retrying
Installing collected packages: retrying, plotly
Successfully installed plotly-4.14.0 retrying-1.3.3
You should consider upgrading via the '/opt/venv/bin/python -m pip install --upgrade pip' command.[0m
