In [1]:
import pandas as pd
import numpy as np
import re

import plotly.express as px

In [4]:
df_raw = pd.read_csv('../Data/drug_data.csv', index_col=0)
df_raw.head()

Unnamed: 0,CASEID,DRUGID_1,CATID_1_1,CATID_2_1,CATID_3_1,ROUTE_1,TOXTEST_1,sdled_1_1,sdled_2_1,sdled_3_1,...,QUARTER,DAYPART,NUMSUBS,CASETYPE,DISPOSITION,ALLABUSE,METRO,AGECAT,SEX,RACE
0,1,865,105,110,2005,1,2,1,2.0,-7.0,...,4,1,2,8,6,1,2,4,1,2
1,2,2077,81,82,283,-8,2,17,67.0,67.01,...,3,4,1,4,7,0,2,11,1,3
2,3,2313,1,12,-7,-8,2,17,64.0,64.99,...,4,3,1,4,1,0,7,11,2,2
3,4,234,358,99,215,2,2,17,73.0,73.01,...,2,2,1,4,7,0,10,2,1,3
4,5,865,105,110,2005,1,2,1,2.0,-7.0,...,3,4,3,8,8,1,1,6,1,3


In [20]:
df_raw.columns

Index(['CASEID', 'DRUGID_1', 'CATID_1_1', 'CATID_2_1', 'CATID_3_1', 'ROUTE_1',
       'TOXTEST_1', 'sdled_1_1', 'sdled_2_1', 'sdled_3_1', 'sdled_4_1',
       'sdled_5_1', 'sdled_6_1', 'DRUGID_2', 'CATID_1_2', 'CATID_2_2',
       'CATID_3_2', 'ROUTE_2', 'TOXTEST_2', 'sdled_1_2', 'sdled_2_2',
       'sdled_3_2', 'sdled_4_2', 'sdled_5_2', 'sdled_6_2', 'DRUGID_3',
       'CATID_1_3', 'CATID_2_3', 'CATID_3_3', 'ROUTE_3', 'TOXTEST_3',
       'sdled_1_3', 'sdled_2_3', 'sdled_3_3', 'sdled_4_3', 'sdled_5_3',
       'sdled_6_3', 'ALCOHOL', 'NONALCILL', 'PHARMA', 'NONMEDPHARMA',
       'CASEWGT', 'YEAR', 'QUARTER', 'DAYPART', 'NUMSUBS', 'CASETYPE',
       'DISPOSITION', 'ALLABUSE', 'METRO', 'AGECAT', 'SEX', 'RACE'],
      dtype='object')

In [5]:
casetype_dummies = pd.get_dummies(df_raw['CASETYPE'])\
    .rename(columns={1: 'SUICIDE', 2: 'SEEKING_DETOX'})

In [6]:
columns_to_drop = ['CASEID', 'YEAR', 'QUARTER', 'DAYPART', 'NUMSUBS', 'DISPOSITION']
df_drop_unwanted = df_raw.drop(columns=columns_to_drop)

In [9]:
df_abuse_indicators = pd.concat([df_drop_unwanted, casetype_dummies.iloc[:, :2]], axis=1)

In [11]:
df_abuse_indicators.to_csv('../Data/abuse_indicator_data.csv')

In [10]:
df_abuse_indicators.head()

Unnamed: 0,DRUGID_1,CATID_1_1,CATID_2_1,CATID_3_1,ROUTE_1,TOXTEST_1,sdled_1_1,sdled_2_1,sdled_3_1,sdled_4_1,...,NONMEDPHARMA,CASEWGT,CASETYPE,ALLABUSE,METRO,AGECAT,SEX,RACE,SUICIDE,SEEKING_DETOX
0,865,105,110,2005,1,2,1,2.0,-7.0,-7.0,...,0,0.942635,8,1,2,4,1,2,0,0
1,2077,81,82,283,-8,2,17,67.0,67.01,67.014,...,0,5.992011,4,0,2,11,1,3,0,0
2,2313,1,12,-7,-8,2,17,64.0,64.99,-7.0,...,0,4.723172,4,0,7,11,2,2,0,0
3,234,358,99,215,2,2,17,73.0,73.01,73.0106,...,0,4.080147,4,0,10,2,1,3,0,0
4,865,105,110,2005,1,2,1,2.0,-7.0,-7.0,...,0,5.177709,8,1,1,6,1,3,0,0


In [12]:
df_cat_sdled_mapping = pd.read_csv('../Data/cat_sdled_mapping.csv', index_col=0)
df_cat_sdled_mapping.drop(columns=['sdled_5', 'sdled_6'], inplace=True)

In [14]:
df_cat_sdled_mapping.to_csv('../Data/cat_sdled_mapping.csv')

In [13]:
df_test = pd.DataFrame()
for index, drug in enumerate([2420, 1016, 152]):
        drug_row = df_cat_sdled_mapping[
            df_cat_sdled_mapping.eq(drug).any(axis=1)
        ].reset_index().drop(columns=['index', 'DRUGID'])
        drug_row.columns = [f'{c}_{index+1}' for c in drug_row.columns]
        df_test = pd.concat([df_test, drug_row], axis=1)

In [9]:
drug_row = df_cat_sdled_mapping[
    df_cat_sdled_mapping.eq(1254).any(axis=1)
].reset_index().drop(columns=['index', 'DRUGID'])

In [11]:
drug_row.columns = [f'{c}_{index+1}' for c in drug_row.columns]

In [14]:
df_test

Unnamed: 0,CATID_1_1,CATID_2_1,CATID_3_1,sdled_1_1,sdled_2_1,sdled_3_1,sdled_4_1,sdled_5_1,sdled_6_1,CATID_1_2,...,sdled_6_2,CATID_1_3,CATID_2_3,CATID_3_3,sdled_1_3,sdled_2_3,sdled_3_3,sdled_4_3,sdled_5_3,sdled_6_3
0,2006,0,0,17,79.0,0.0,0.0,0.0,0.0,57,...,37.504,57,67,69,17,33.0,46.3,46.32,46.3201,0.0


In [2]:
df_raw = pd.read_csv('../Data/demographic_data.csv', index_col=0)
df_raw.head()

Unnamed: 0,CASEID,METRO,AGECAT,SEX,RACE,DISPOSITION,ALLABUSE,CASETYPE,ALCOHOL,NONALCILL,PHARMA,NONMEDPHARMA
0,1,2,4,1,2,6,1,8,1,1,0,0
1,2,2,11,1,3,7,0,4,0,0,1,0
2,3,7,11,2,2,1,0,4,0,0,1,0
3,4,10,2,1,3,7,0,4,0,0,1,0
4,5,1,6,1,3,8,1,8,1,1,0,0


In [3]:
df_casetype_count_before = df_raw.groupby('CASETYPE').agg(
    Before_Classification=pd.NamedAgg(column='CASETYPE', aggfunc="count")
)

In [4]:
df_casetype_count_before

Unnamed: 0_level_0,Before_Classification
CASETYPE,Unnamed: 1_level_1
1,7872
2,13529
3,7421
4,85777
5,16810
6,768
7,3125
8,83648


In [None]:
count_before_classification = {'Seeking Detox':14841, 'Adverse Reaction':88096, 'Suicide Attempt':9033, 'Overmedication':18146, 'Malicious Poisoning': 793, 'Alcohol Only(Age<21)':7421, 'Accidental Injestion': 3253, 'Other': 87628}

count_after_classification = {'Seeking Detox':42144, 'Adverse Reaction':6563, 'Suicide Attempt':13380, 'Overmedication':6730, 'Malicious Poisoning': 12038, 'Alcohol Only(Age<21)':0, 'Accidental Injestion': 2793, 'Other':-87628}

In [12]:
data = {'Before_classification': [14841, 88096, 9033, 18146, 793, 7421, 3253, 87628], 'After_classification_count': [42144, 6563, 13380, 6730, 12038, 0, 2793, -87628]}
casetype = ['Seeking Detox', 'Adverse Reaction', 'Suicide Attempt', 'Overmedication', 'Malicious Poisoning', 'Alcohol Only(Age<21)', 'Accidental Injestion', 'Other']

In [13]:
df_casetype = pd.DataFrame(data=data, index=casetype)

In [15]:
df_casetype['After_classification'] = df_casetype\
        .apply(lambda x: x['Before_classification']+x['After_classification_count'], axis=1)

In [17]:
df_final = df_casetype[['Before_classification', 'After_classification']]\
        .unstack()\
        .reset_index()\
        .rename(columns={'level_0':'Classification', 'level_1':'Casetype', 0: 'Count'})

In [5]:
df_cat_sdled_mapping = pd.read_csv('../Data/cat_sdled_mapping.csv')

In [11]:
df_test = pd.DataFrame()
for index, drug in enumerate([1253, 1254, 1255]):
    drug_row = df_cat_sdled_mapping[
        df_cat_sdled_mapping.eq(drug).any(axis=1)
    ].reset_index().drop(columns=['index', 'Unnamed: 0', 'DRUGID'])
    print(drug_row)

    drug_row.columns = [f'{c}_{index+1}' for c in drug_row.columns]

    df_test = pd.concat([df_test, drug_row], axis=1)

   CATID_1  CATID_2  CATID_3  sdled_1  sdled_2  sdled_3  sdled_4  sdled_5  \
0      105      114     2032        1      2.5      4.0      0.0      0.0   

   sdled_6  
0      0.0  
   CATID_1  CATID_2  CATID_3  sdled_1  sdled_2  sdled_3  sdled_4  sdled_5  \
0      105      114     2032        1      2.5      3.0      0.0      0.0   

   sdled_6  
0      0.0  
   CATID_1  CATID_2  CATID_3  sdled_1  sdled_2  sdled_3  sdled_4  sdled_5  \
0      105      114     2032        1      2.5      5.0     5.01      0.0   

   sdled_6  
0      0.0  


In [7]:
df_test

Unnamed: 0,Unnamed: 0_1,CATID_1_1,CATID_2_1,CATID_3_1,sdled_1_1,sdled_2_1,sdled_3_1,sdled_4_1,sdled_5_1,sdled_6_1,...,Unnamed: 0_3,CATID_1_3,CATID_2_3,CATID_3_3,sdled_1_3,sdled_2_3,sdled_3_3,sdled_4_3,sdled_5_3,sdled_6_3
0,4,105,114,2032,1,2.5,4.0,0.0,0.0,0.0,...,1,105,114,2032,1,2.5,5.0,5.01,0.0,0.0


In [4]:
result = {'ALL_ABUSE': 0, 'Other': 1}
yes_abuse = [k for k,v in result.items() if v==1 ]
no_abuse = [k for k,v in result.items() if v==0]

In [5]:
yes_abuse

['Other']

In [6]:
''+','.join(yes_abuse)

'Other'

In [8]:
list(map({1: 'a', 2: 'b'}.get, [1, 2, 3]))

['a', 'b', None]