In [None]:
!pip install fastdtw

from fastdtw import fastdtw
from scipy.spatial.distance import euclidean

In [None]:
import os
import pickle
from collections import defaultdict, Counter

import numpy as np
import pandas as pd
import tensorflow as tf
import scipy.interpolate as interp

import seaborn as sb
import matplotlib.pyplot as plt

from ml4h.TensorMap import TensorMap, Interpretation

In [None]:
def get_mean_trajectories(scores, output_columns, column='psychoactive_class', new_size = 0, out_col = "tag_0"):
    standardized_psycho = rec_dd()
    means_psycho = Counter()
    stds_psycho = Counter()
    new_sizes = {}
    for drug in scores:
        lens = 0
        for t in scores[drug]:
            lens += len(scores[drug][t][out_col])
        if new_size <= 0:
            new_sizes[drug] = int(lens/len(scores[drug]))
        else:
            new_sizes[drug] = new_size
            
        print(f'Drug {drug} from {len(scores[drug])} testimonials, mean length: {lens/len(scores[drug]):0.1f} Interpt to: { new_sizes[drug]:0.0f}')
    print(f'Output columns: {output_columns}')
    for drug in scores:
        psychoactive_class = df[df.drug==drug][column].unique()[0]
        for t in scores[drug]:
            for s in scores[drug][t]:
                try:
                    interpolator = interp.interp1d(np.arange(len(scores[drug][t][s])), scores[drug][t][s])

                    standardized_psycho[drug][t][s] = interpolator(np.linspace(0, len(scores[drug][t][s])-1, new_sizes[drug]))
                    means_psycho[psychoactive_class,s] += standardized_psycho[drug][t][s]
                    stds_psycho[psychoactive_class,s] += standardized_psycho[drug][t][s]*standardized_psycho[drug][t][s]
                except ValueError as e:
                    print(f'vall err {drug} {t} len: {len(scores[drug][t][s])} e:{e}')
                    break

    for pc in df[column].unique():
        n = 1
        for drug in df[df[column]==pc].drug.unique():
            n += len(standardized_psycho[drug])
        print(f'Class {pc} has length: {n}')
        for s in output_columns:
            means_psycho[pc,s] /= n
            stds_psycho[pc,s] /= n
            stds_psycho[pc,s] -= means_psycho[pc,s]*means_psycho[pc,s]
            stds_psycho[pc,s] = np.sqrt(stds_psycho[pc,s])
    return standardized_psycho, means_psycho, stds_psycho

def plot_tag_trajectories(labels, means, stds, keep_drugs, keep_tags,
                   figsize=(42, 42), cols=5, rows=4, dpi=300, ncol=4):
    fig, axes = plt.subplots(rows, cols, figsize=figsize, dpi=dpi)
    cmap = plt.get_cmap('hsv')
    colors = cmap(np.linspace(0, 1, len(labels)))
    markers = "v.,o1^2>348<spdxh*PH+XD|"
    o_drugs = sorted(labels)
    o_tags = [ot for ot in output_columns if 'tag_' in ot and itags[ot] in keep_tags]

    for sentiment, ax in zip(o_tags, axes.ravel()):
        for i, (drug, color) in enumerate(zip(o_drugs, colors)):
            if drug not in keep_drugs:
                continue
            if (drug,sentiment) not in means:
                continue
            ax.plot(range(len(means[drug,sentiment])), means[drug,sentiment], 
                    c=color, marker=markers[i%len(markers)], 
                    label=drug.capitalize() if len(drug) > 4 else drug.upper(),
                    fillstyle='none' if i%2==0 else 'full')
            ax.fill_between(range(len(means[drug,sentiment])), 
                            means[drug,sentiment] - stds[drug,sentiment]/2, 
                            means[drug,sentiment] + stds[drug,sentiment]/2, 
                            color=color, alpha=0.05)
        ax.set_title(f'{itags[sentiment].replace("_", " ")} Trajectory of Trip')
        ax.set_xlabel('Time in Narrative')
        ax.set_ylabel(f'{itags[sentiment].replace("_", " ")}')
        ax.set_xticks(())
        ax.set_yticks(())
        ax.legend(ncol=ncol)
    plt.tight_layout()



def get_drug_tag_scape(means, labels):
    avg = defaultdict(list)
    drug_order = {}
    for drug1,sentiment1 in means:
        print(f'\nat sentiment {sentiment1} d:{drug1} ')
        if 'tag' not in sentiment1 or isinstance(means[drug1,sentiment1], float):
            continue
        avg[sentiment1].append(means[drug1,sentiment1].mean())
        drug_order[drug1] = len(drug_order)-1
    order_drug = {v:k for k,v in drug_order.items()}
    
    drug_sentiments = defaultdict(dict)
    for sentiment in avg:
        print(f'\nat sentiment {sentiment} np.argsort(avg[sentiment]):{np.argsort(avg[sentiment]):} ')
        for i, idx in enumerate(np.argsort(avg[sentiment])):
            #print(f'{order_drug[idx]} has {idx}'  )
            drug_sentiments[order_drug[idx]][sentiment] = i - (len(order_drug)/2)
    print(f'\nat sentiment {drug_sentiments["entactogen"]} ')
    return drug_sentiments
        

def plot_tag_drug(title, agrees, ymin, ymax, figsize=(2, 8)):
    _ = plt.figure(figsize=figsize, dpi=300)
    my_cmap = plt.get_cmap("bwr_r")
    rescale = lambda y: (np.array(y) - ymin) / (ymax - ymin)
    plt.barh(range(len(agrees)), [t[1] for t in agrees], color=my_cmap(rescale([t[1] for t in agrees])))
    plt.axvline(0.0, linestyle='dashed', c='orange')
    #plt.yticks(())
    plt.yticks(np.arange(len(agrees)), [' '.join(itags[t[0]].split('_')[:2]) for t in agrees], ha='right')
    plt.title(title)
    #plt.xticks(())
    plt.xlabel('Relative Ranking')
    plt.box(False)
    

def get_dtw(means, labels):
    distances = {} 
    idxs = Counter()
    jdxs = Counter()

    for drug1,sentiment1 in means:
        if (drug1,sentiment1) not in means:
            continue
        if sentiment1 not in distances:
            distances[sentiment1] = np.zeros((len(labels),len(labels)))
        for drug2,sentiment2 in means:
            if sentiment1 != sentiment2:
                continue
            if (drug2,sentiment2) not in means:
                continue
            distance, path = fastdtw(means[drug1,sentiment1], 
                                     means[drug2,sentiment2], 
                                     dist=euclidean)
            if means[drug1,sentiment1].mean() > means[drug2,sentiment2].mean():
                sign = 1
            else:
                sign = -1
            distances[sentiment1][idxs[sentiment1], jdxs[sentiment1]] = distance * sign
            jdxs[sentiment1]+= 1
        idxs[sentiment1]+=1
        jdxs[sentiment1]=0
        print(f'Finished Drug {drug1} of {len(means)} ')
    return distances


def confusion_heatmap(confusion, labels, figsize=(12, 8), 
                      title='Emotion Heatmap', rotation=30):
    fig, ax = plt.subplots(figsize=figsize, dpi=300)
    ax = sb.heatmap(confusion, cmap='bwr', cbar_kws={"shrink": .8}, cbar=True, ax=ax)

    ax.set_title(title.replace('_', ' '))
    ax.set_xticks(np.arange(confusion.shape[0]) + 0.5)
    ax.set_yticks(np.arange(confusion.shape[0]) + 0.5)
    cbar = ax.collections[0].colorbar
    cbar.set_ticks([np.min(confusion), np.max(confusion)])
    
    clabel = "\n".join(title.split('_')[:2])
    cbar.set_ticklabels([f'Less \n{clabel}', 
                         f'More \n{clabel}'])
    labels_format = [l.capitalize() if len(l) > 5 else l.upper() for l in labels]
    ax.set_xticklabels(labels=labels_format, ha='right', rotation=rotation)
    ax.set_yticklabels(labels=labels_format, rotation=0)
    plt.tight_layout()
    figure_path='./confusion_heatmap.png'
    if not os.path.exists(os.path.dirname(figure_path)):
        os.makedirs(os.path.dirname(figure_path))
    plt.savefig(figure_path)
    plt.show()    

In [None]:
def rec_dd():
    return defaultdict(rec_dd)
#scores = rec_dd()

scores = pickle.load(open("split_32_bertowid_unweighted_max150_v2022_05_13.pkl", "rb"))

In [None]:
df = pd.read_csv(f'./all_split_32_v2022_05_10_meta_data.csv')
#df = pd.read_csv(f'./heroin_split_32_meta_data.csv')


step_size = 32

df = df[df.text.notna()]

df.psychoactive_class = df.psychoactive_class.apply(lambda x: x.strip())

drug2class = {d:i for i,d in enumerate(df.drug.unique())}
psychoactive2class = {d:i for i,d in enumerate(df.psychoactive_class.unique())}
ligand_chemical2class = {d:i for i,d in enumerate(df.ligand_chemical_class.unique())}
drug2weight = [(len(df)/len(df[df.drug==d])) for i,d in enumerate(df.drug.unique())]
print(psychoactive2class)
df['drug_class'] = [drug2class[d] for d in df.drug]
df['psychoactive_class_int'] = [psychoactive2class[d] for d in df.psychoactive_class]
psychoactive2weight = [(len(df)/len(df[df.psychoactive_class==d])) for d in df.psychoactive_class.unique()]
df['ligand_chemical_int'] = [ligand_chemical2class[d] for d in df.ligand_chemical_class]
chemical2weight = [(len(df)/len(df[df.ligand_chemical_class==d])) for d in df.ligand_chemical_class.unique()]
df[[f'tag_{i}' for i in range(52) ]] = df[[f'tag_{i}' for i in range(52) ]].fillna(0)
df[[f'tag_{i}' for i in range(52) ]] = df[[f'tag_{i}' for i in range(52) ]].astype(int)
tag2weight = {t:len(df)/sum(df[t]) for t in [f'tag_{i}' for i in range(52)] if sum(df[t])>100}

In [None]:
tags = {'Small_Group': 'tag_0', 'General': 'tag_1', 'First_Times': 'tag_2', 'Alone': 'tag_3', 'Difficult_Experiences': 'tag_4', 'Glowing_Experiences': 'tag_5', 'Retrospective_Summary': 'tag_6', 'Various': 'tag_7', 'Unknown_Context': 'tag_8', 'Mystical_Experiences': 'tag_9', 'Health_Problems': 'tag_10', 'Combinations': 'tag_11', 'Not_Applicable': 'tag_12', 'Bad_Trips': 'tag_13', 'Hangover_Days_After': 'tag_14', 'Entities_Beings': 'tag_15', 'Music_Discussion': 'tag_16', 'Addiction_Habituation': 'tag_17', 'Post_Trip_Problems': 'tag_18', 'Nature_Outdoors': 'tag_19', 'Relationships': 'tag_20', 'Depression': 'tag_21', 'Therapeutic_Intent_or_Outcome': 'tag_22', 'Overdose': 'tag_23', 'Medical_Use': 'tag_24', 'Sex_Discussion': 'tag_25', 'Train_Wrecks_Trip_Disasters': 'tag_26', 'Guides_Sitters': 'tag_27', 'Rave_Dance_Event': 'tag_28', 'Preparation_Recipes': 'tag_29', 'Festival_Lg_Crowd': 'tag_30', 'Health_Benefits': 'tag_31', 'Large_Group': 'tag_32', 'Multi-Day_Experience': 'tag_33', 'Club_Bar': 'tag_34', 'What_Was_in_That': 'tag_35', 'Personal_Preparation': 'tag_36', 'HPPD_Lasting_Visuals': 'tag_37', 'Families': 'tag_38', 'Second_Hand_Report': 'tag_39', 'Loss_of_Magic': 'tag_40', 'Hospital': 'tag_41', 'Public_Space': 'tag_42', 'School': 'tag_43', 'Poetry': 'tag_44', 'Performance_Enhancement': 'tag_45', 'Large_Party': 'tag_46', 'Group_Ceremony': 'tag_47', 'Workplace': 'tag_48', 'Cultivation_Synthesis': 'tag_49', 'Pregnancy_Baby': 'tag_50', 'Military': 'tag_51'}
itags = {v: k.replace('(', '').replace(')', '').replace('/', '') for k, v in tags.items()}
ctags = {v: int(k.replace('tag_', '')) for k, v in itags.items()}

In [None]:
output_columns = [ 'psychoactive_class_int', 'ligand_chemical_int', 'drug_class']
output_columns += [f'tag_{i}' for i in range(35)]# if sum(df[f'tag_{i}']) > 100]


# weighted_loss = False
# tensor_maps_out = []
# scce = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# def weight_scce(weight_array):
#     class_weights = tf.constant(weight_array)
#     def my_loss(y_true, y_pred):
#         weights = tf.gather(class_weights, y_true)
#         return tf.compat.v1.losses.sparse_softmax_cross_entropy(y_true, y_pred, weights)
#     return my_loss


# def pearson(y_true, y_pred):
#     # normalizing stage - setting a 0 mean.
#     y_true -= K.mean(y_true, axis=-1)
#     y_pred -= K.mean(y_pred, axis=-1)
#     # normalizing stage - setting a 1 variance
#     y_true = K.l2_normalize(y_true, axis=-1)
#     y_pred = K.l2_normalize(y_pred, axis=-1)
#     # final result
#     pearson_correlation = K.sum(y_true * y_pred, axis=-1)
#     return pearson_correlation

# for oc in output_cols:
#     if 'drug_class' == oc:
#         tensor_maps_out.append(TensorMap(f'{oc}', Interpretation.CATEGORICAL, shape=(1,),
#                                          loss=weight_scce(drug2weight) if weighted_loss else scce,
#                                          metrics=[tf.metrics.SparseCategoricalAccuracy()],
#                                          channel_map={f'drug_{d}': v for d,v in drug2class.items()}))
#     elif 'psychoactive_class_int' == oc:
#         tensor_maps_out.append(TensorMap(f'{oc}', Interpretation.CATEGORICAL, shape=(1,),
#                                          loss=weight_scce(psychoactive2weight) if weighted_loss else scce,
#                                          metrics=[tf.metrics.SparseCategoricalAccuracy()],
#                                          channel_map={f'{d}': v for d,v in psychoactive2class.items()}))
#     elif 'ligand_chemical_int' == oc:
#         tensor_maps_out.append(TensorMap(f'{oc}', Interpretation.CATEGORICAL, shape=(1,),
#                                          loss=weight_scce(chemical2weight) if weighted_loss else scce,
#                                          metrics=[tf.metrics.SparseCategoricalAccuracy()],
#                                          channel_map={f'{d}': v for d,v in ligand_chemical2class.items()}))        
#     elif 'tag_' in oc:
#         tensor_maps_out.append(TensorMap(f'{oc}', Interpretation.CATEGORICAL, shape=(1,), 
#                                          loss=weight_scce([1.1, tag2weight[oc]]) if weighted_loss else scce,
#                                          metrics=[tf.metrics.SparseCategoricalAccuracy()],
#                                          channel_map={f'no_{itags[oc]}': 0, f'{itags[oc]}': 1}))
#     elif 'age' in oc:
#         tensor_maps_out.append(TensorMap(f'{oc}', Interpretation.CONTINUOUS, shape=(1,),
#                                          loss=tf.keras.losses.MeanSquaredError(),
#                                          metrics=[pearson]
                                         
#                                         ))
#     elif oc in receptors:
#         tensor_maps_out.append(TensorMap(f'{oc}', Interpretation.CONTINUOUS, shape=(1,),
#                                          loss=tf.keras.losses.LogCosh(),
#                                          metrics=[tf.metrics.MeanAbsoluteError()],))
#     else:
#         tensor_maps_out.append(TensorMap(f'{oc}', Interpretation.CATEGORICAL, shape=(1,), 
#                                          loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
#                                          metrics=[tf.metrics.SparseCategoricalAccuracy()],
#                                          channel_map={f'no_{oc}':0, f'{oc}':1}))

# Tag Trajectories

In [None]:
list(tags.keys())

# keep_drugs = list(scores.keys())
# keep_drugs = ['dmt', 'psilocin', 'mdma',  'hydrocodone', 'sertraline', 'cocaine']


# keep_tags = list(tags.keys()) # ['Rave_Dance_Event']
# #keep_tags = ['Rave_Dance_Event', 'Mystical_Experiences', 'Depression' ]
# plot_tag_trajectories(scores, means, stds, keep_drugs, keep_tags, list(itags.keys()),
#                       figsize=(9, len(keep_tags)*2), cols=1, rows=len(keep_tags), ncol=6)
keep_drugs = list(scores.keys())
keep_drugs = ['5-meo-dmt', 'dmt', 'psilocin', 'mdma',  'hydrocodone', 'sertraline', 'cocaine',
             'venlafaxine', 'methamphetamine', 'hydrocodone', 'morphine', 'oxycodone', 'salvia', 'ketamine',
              'lsd', 'ibogaine', 'cocaine', 'amphetamine',
             '2c-b','2c-i', 'methadone', 'haloperidol']
# keep_drugs = [ 'sertraline',  'paroxetine',
#              'venlafaxine', 'methamphetamine', 'cocaine', 'amphetamine',]

keep_tags = [
    'Mystical_Experiences', 'Entities_Beings',
   # 'Addiction_Habituation', 'Depression' ,  #'Combinations', 'Bad_Trips', 'Various', "Unknown_Context", 'Retrospective_Summary'
    
            ]
        
        # mdma_tags = [
#              'Rave_Dance_Event', 'Music_Discussion', 'Festival_Lg_Crowd','Club_Bar', 'Large_Group','Relationships', 'Sex_Discussion', 
#     'Glowing_Experiences', 
        #list(tags.keys()) # ['Rave_Dance_Event']
#keep_tags = ['Rave_Dance_Event', 'Mystical_Experiences', 'Depression' ]
# 'Mystical_Experiences', 'Entities_Beings', 'Depression', 
# plot_tag_trajectories(scores, means, stds, keep_drugs, keep_tags, 
#                       figsize=(9, len(keep_tags)*2), cols=1, rows=len(keep_tags), ncol=6)
# plot_tag_trajectories(list(scores.keys()), means_d, stds_d, keep_drugs, keep_tags, 
#                       figsize=(9.25, len(keep_tags)*4), cols=1, rows=len(keep_tags), ncol=5)

#standardized_d, means_d, stds_d = get_mean_trajectories(scores, output_columns, column='drug')
#standardized_p, means_p, stds_p = get_mean_trajectories(scores, output_columns, column='psychoactive_class')

#d_sentiments = get_drug_tag_scape(means_d, df.drug.unique())

#p_sentiments = get_drug_tag_scape(means_p, df.psychoactive_class.unique())

# for d in p_sentiments:
#     ttt = sorted(p_sentiments[d].items(), key = lambda x: x[1])
#     plot_tag_drug(d.capitalize() if len(d) > 5 else d.upper(), ttt, figsize=(0.8, 8))

# for d in d_sentiments:
#     ttt = sorted(d_sentiments[d].items(), key = lambda x: x[1])
#     plot_tag_drug(d.capitalize() if len(d) > 5 else d.upper(), ttt, figsize=(0.5, 8))
    

In [None]:
standardized_d, means_d, stds_d = get_mean_trajectories(scores, output_columns, new_size=25, column='drug')

In [None]:
plot_tag_trajectories(scores, means_d, stds_d, keep_drugs, keep_tags, 
                      figsize=(12, len(keep_tags)*4), cols=1, rows=len(keep_tags), ncol=6)

In [None]:
d_sentiments = get_drug_tag_scape(means_d, df.drug.unique())

In [None]:
for d in d_sentiments:
#     if d not in ['mdma', 'dmt']:
#         continue
        
    ttt = sorted(d_sentiments[d].items(), key = lambda x: x[1], reverse=False)
    plot_tag_drug(d.capitalize() if len(d) > 5 else d.upper(), ttt, -5, 4, figsize=(0.8, 8))

In [None]:
d_distances = get_dtw(means_d, df.drug.unique())

In [None]:
for t in d_distances:
    if 'tag' not in t:
        continue
    confusion_heatmap(d_distances[t], df.drug.unique(), title=f'{itags[t]}', figsize=(18,10))

In [None]:
standardized_p, means_p, stds_p = get_mean_trajectories(scores, output_columns, new_size=20,  column='psychoactive_class')

In [None]:
p_sentiments = get_drug_tag_scape(means_p, df.psychoactive_class.unique())

In [None]:
for d in p_sentiments:
    if d not in ['psychedelic', 'dmt']:
        continue
        
    ttt = sorted(p_sentiments[d].items(), key = lambda x: x[1], reverse=False)
    plot_tag_drug(d.capitalize() if len(d) > 5 else d.upper(), ttt, -5, 4, figsize=(0.8, 8))

In [None]:
for d in p_sentiments:
    ttt = sorted(p_sentiments[d].items(), key = lambda x: itags[x[0]], reverse=True)
    plot_tag_drug(d.capitalize() if len(d) > 5 else d.upper(), ttt, -5, 4, figsize=(0.8, 8))

In [None]:
p_distances = get_dtw(means_p, df.psychoactive_class.unique())

In [None]:
for t in p_distances:
    if 'tag' not in t:
        continue
    confusion_heatmap(p_distances[t], df.psychoactive_class.unique(), title=f'{itags[t]}', figsize=(6,3))

In [None]:
for t in p_distances:
    if 'tag' not in t:
        continue
    confusion_heatmap(p_distances[t], df.psychoactive_class.unique(), title=f'{itags[t]}', figsize=(6,3))

# Find Extremes

In [None]:
standardized = rec_dd()
peaks=Counter()
peak_index = {}
peak_testimonials={}

mins=Counter()
mins_index = {}
mins_testimonials={}
new_size = 25
for drug in scores:
    for t in scores[drug]:
        for si, s in enumerate(scores[drug][t]):
            try:
                interpolator = interp.interp1d(np.arange(len(scores[drug][t][s])), scores[drug][t][s])
                standardized[drug][t][s] = interpolator(np.linspace(0, len(scores[drug][t][s])-1, new_size))
                if max(scores[drug][t][s]) > peaks[drug,s]:
                    peak_testimonials[drug,s] = t
                    peaks[drug,s] = max(scores[drug][t][s])
                    peak_index[drug,s] =  np.argmax(scores[drug][t][s])
                
                if mins[drug,s] == 0:
                    mins[drug,s] = 1
                if min(scores[drug][t][s]) < mins[drug,s]:
                    mins_testimonials[drug,s] = t
                    mins[drug,s] = min(scores[drug][t][s])
                    mins_index[drug,s] = np.argmin(scores[drug][t][s])
            except ValueError:
                print(f'vall err {drug} {t}')

In [None]:
df = pd.read_csv(f'./split_32_all_drugs_v2022_04_22_meta_data.csv')

step_size = 32

df = df[df.text.notna()]

df.psychoactive_class = df.psychoactive_class.apply(lambda x: x.strip())

drug2class = {d:i for i,d in enumerate(df.drug.unique())}
psychoactive2class = {d:i for i,d in enumerate(df.psychoactive_class.unique())}
ligand_chemical2class = {d:i for i,d in enumerate(df.ligand_chemical_class.unique())}
df['drug_class'] = [drug2class[d] for d in df.drug]
df['psychoactive_class_int'] = [psychoactive2class[d] for d in df.psychoactive_class]
df['ligand_chemical_int'] = [ligand_chemical2class[d] for d in df.ligand_chemical_class]
df[[f'tag_{i}' for i in range(52) ]] = df[[f'tag_{i}' for i in range(52) ]].fillna(0)
df[[f'tag_{i}' for i in range(52) ]] = df[[f'tag_{i}' for i in range(52) ]].astype(int)

In [None]:
data = defaultdict(list)
for sentiment in scores[drug][t]:
    s = itags[sentiment] if 'tag_' in sentiment else sentiment
    if 'tag' not in sentiment:
        continue
    count = []
    testi = []
    pzz = []
    for p in peaks:
        if sentiment in p:
            count.append(peaks[p])
            testi.append(peak_testimonials[p])
            pzz.append(p)
    print(f'\nmax {s.capitalize()} at drug {pzz[np.argmax(count)][0]} Testimonial: {testi[np.argmax(count)]} Sentence: {peak_index[pzz[np.argmax(count)]]}')
    peak_key = pzz[np.argmax(count)]
    ex = df[df.drug == peak_key[0]]
    ex = ex[ex.testimonial == peak_testimonials[peak_key]]
    ex = ex[ex.sentence == peak_index[peak_key]]
    print(ex.iloc[0].text)
    print()
    data['tag'].append(s)
    
    data['drug'].append(peak_key[0])
    data['psychoactive_class'].append(ex.iloc[0].psychoactive_class)
    data['ligand_chemical_class'].append(ex.iloc[0].ligand_chemical_class)
    data['text'].append(ex.iloc[0].text)
    data['score'].append(peaks[peak_key])
    data['testimonial'].append(peak_testimonials[peak_key])
    data['sentence'].append(peak_index[peak_key])
    data['tag_number'].append(sentiment)
    
    count = []
    testi = []
    pzz = []
    for p in mins:
        if sentiment in p:
            count.append(mins[p])
            testi.append(mins_testimonials[p])
            pzz.append(p)
            
    print(f'\nmax Not {s.capitalize()} at drug {pzz[np.argmin(count)][0]} Testimonial: {testi[np.argmin(count)]} Sentence: {peak_index[pzz[np.argmax(count)]]}')
    
    min_key = pzz[np.argmin(count)]
    ex = df[df.drug == min_key[0]]
    ex = ex[ex.testimonial == mins_testimonials[min_key]]
    ex = ex[ex.sentence==mins_index[min_key]]
    print(ex.iloc[0].text)
    print()
    data['tag'].append("Not_"+s)
    data['drug'].append(min_key[0])
    data['psychoactive_class'].append(ex.iloc[0].psychoactive_class)
    data['ligand_chemical_class'].append(ex.iloc[0].ligand_chemical_class)
    data['text'].append(ex.iloc[0].text)
    data['score'].append(mins[min_key])
    data['testimonial'].append(mins_testimonials[min_key])
    data['sentence'].append(mins_index[min_key])
    data['tag_number'].append(sentiment)
    
exdf = pd.DataFrame.from_dict(data)
exdf.to_csv('most_extreme_erowid_tags_v2022_05_13.csv', index=False)