# RFD Score 

Recency, Frequency & Duration of drug abuse

In this notebook RFD Scores are generated for Heroin consumption and aggregated Drug consumption (Heroin, Meth, Crack)

In [None]:
import numpy as np
import pandas as pd

In [8]:
df = pd.read_csv("data/NSDUH_2015_Tab.tsv.gz", sep="\t", compression="gzip")
df.shape

  interactivity=interactivity, compiler=compiler, result=result)


(57146, 2679)

## RFD Function

Universal function is provided with column names, returns drug abuse probability in [0,1]

In [52]:
from sklearn.preprocessing import minmax_scale

In [61]:
def rfd(df, r_col, f_col, d_col, evr_col):
    '''
    Requires dataframe and column names for recency, frequency and duration 
    source columns of data source
    '''        
    
    # Recency
    recency_score = []
    recency = df[r_col]
    for i in range(len(recency)):
        x = recency[i]
        if x == 1 or x == 11:
            rec_factor = 1
        elif x == 2 or x == 8:
            rec_factor = 0.5
        else:
            rec_factor = 0.2
        recency_score.append(rec_factor)
        
    
    # Frequency
    freq_score = []
    frequency = df[f_col]
    

    for i in range(len(frequency)):
        if frequency[i] in range(1,366):
            freq_score.append(frequency[i])
        else:
            freq_score.append(0)

    freq_score_mapped = np.interp(freq_score,
                                  (np.min(freq_score),
                                   np.max(freq_score)),
                                  (0.3,1))
    
    
    # Duration
    duration = np.array(2015-df[d_col])
    duration_normalized = minmax_scale(duration)

    duration_mapped = np.interp(duration_normalized, 
                                (np.min(duration_normalized),
                                 np.max(duration_normalized)),
                                (0.3,1))
    
    
    # Finally, compute RFD Score, return column
    rfd_score = []
    for i in range(len(recency_score)):
        
        if df[evr_col][i] != 1:
            rfd_score.append(0)
            continue
            
        R = recency_score[i]
        F = freq_score_mapped[i]
        D = duration_mapped[i]
        if R == .2 and F <= .35:
            RFD = R * ((F*6/5)-(D*1/5))
        else:
            RFD = R * ((F*6/5)+(D*1/5))
        if RFD < 1:
            rfd_score.append(RFD)
        else:
            rfd_score.append(1)

    
    return rfd_score



## Calculate scores

We calculate score per each drug (Heroin, Meth, Crack) and aggregate a total score

In [66]:
rfd_her = rfd(df, 'HERREC','HERYRTOT', 'IRHERYFU', 'HEREVER')
rfd_mth = rfd(df, 'METHAMREC','METHAMYFQ', 'IRMETHAMYFU', 'METHAMEVR')
rfd_crk = rfd(df, 'CRAKREC','CRKYRTOT', 'IRCRKYFU', 'CRKEVER')

In [68]:
rfd_tot = []
for i in range(len(rfd_her)):
    total = rfd_her[i] + rfd_mth[i] + rfd_crk[i]
    if total > 1:
        total = 1
    rfd_tot.append(total)  

## Export results

We ditch single heroin and crack scores, as we only analyse them in the aggregate score

In [71]:
df['HERRFD'] = rfd_her
df['TOTRFD'] = rfd_tot

In [72]:
df.to_csv("data/NSDUH_2015_RFD_Tab.tsv.gz", sep="\t", compression="gzip")