In [None]:
!pip install tensorflow-text==2.5
!pip install tf-models-official==2.5

In [None]:
import os
import shutil
from collections import Counter, defaultdict

import numpy as np
import pandas as pd
import scipy.interpolate as interp
from collections import OrderedDict
from ml4h.TensorMap import TensorMap, Interpretation

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
from official.nlp import optimization

import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

tf.get_logger().setLevel('ERROR')

In [None]:
#df = pd.read_csv(f'./test_sentences_128_meta_data.csv')
df = pd.read_csv(f'/home/sam/unify-emotion-datasets/data/full_dataset/goemotions_1.csv')
in_cols = ['text']
output_columns = []
for i,c in enumerate(df):
    if i > 8:
        print(df[c].value_counts())
        output_columns.append(c)
print(df.info())
print(output_columns)
tensor_maps_out = []
for oc in output_columns:
    tensor_maps_out.append(TensorMap(f'{oc}', Interpretation.CATEGORICAL, shape=(1,), 
                           channel_map={f'no_{oc}':0, f'{oc}':1}))

In [None]:
epochs = 5
num_train_steps = 100 * epochs
num_warmup_steps = int(0.1*num_train_steps)

init_lr = 3e-5
optimizer = optimization.create_optimizer(init_lr=init_lr,
                                          num_train_steps=num_train_steps,
                                          num_warmup_steps=num_warmup_steps,
                                          optimizer_type='adamw')
classifier_model = tf.keras.models.load_model(f'./models/bert_{len(tensor_maps_out)}_sentiment_classifier_v2022_04_22', 
                                              custom_objects={'AdamWeightDecay':optimizer})

In [None]:
#classifier_model.summary()


In [None]:
human = pd.read_csv('/home/sam/csvs/galen_to_label_v2022_06_30.csv')
human.info()

In [None]:
human = human[human.human_label.notna()]
human.info()

In [None]:
top_n = 20
stats = Counter()
for index, row in human.iterrows():
    print(f'\n\nText {row.text}')
    
    results = tf.nn.softmax(classifier_model(tf.constant([row.text]))).numpy()
    machine = np.argsort(results[:, 0, 1], axis=0)
    top = [str(classifier_model.output_names[m]) for m in machine[-20:]]
    print(f'Human Emotions: {row.human_label}\nMachine Emotions: {top[-5:]}')
    for he in row.human_label.split(','):
        human_e = he.strip().lower()
        if human_e not in classifier_model.output_names:
            continue
#         if human_e == 'neutral':
#             continue
        stats[f'human_{human_e}'] += 1
        stats['n'] += 1
        if human_e in top[-len(row.human_label.split(',')):]:
            stats['correct'] += 1
            stats[f'human_{human_e}_correct'] += 1
        else:
            stats[f'human_{human_e}_wrong'] += 1
        for n in range(1,top_n):
            if human_e in top[-n:]:
                stats[f'top_{n}'] += 1  


for k, v in sorted(stats.items(), key = lambda x: x[0]):
    print(f'{k} has {v}')

    
for n in range(1,top_n):
    print(f" Top {n} accuracy:{100*stats[f'top_{n}']/stats['n']:0.3f}%")    

print(f"Galen Accuracy: {100*stats['correct']/stats['n']:0.3f}%,")



In [None]:
def plot_stats(stats, num_samples=20):
    fig, ax = plt.subplots(figsize=(4, 4), dpi=300)
    y = [100*stats[f'top_{t}']/stats['n'] for t in range(1,num_samples)]
    ax.plot(range(1,num_samples), y, label='Psychiatrist')
    y_chance = [100*t/28 for t in range(1,num_samples)]
    ax.plot(range(1,num_samples), y_chance, label='Random')
    ax.set_title(f"Psychiatrist BERTiment Concordance\nN = {len(human)}, Emotions = {stats['n']}")
    ax.set_xlabel('Top K of 28 Emotions')
    ax.set_xticks([1,5,10,15])
    ax.set_ylabel(f'Concordance (%)')
    ax.legend()
plot_stats(stats)

In [None]:
original_results = tf.nn.softmax(classifier_model(tf.constant(words)))

In [None]:
for j,s in enumerate(words):
    for i,ot in enumerate(classifier_model.output_names):
        score = original_results[i, j, 1].numpy()
        print(f'{s[:60]} <- {ot}: {score:0.6f}')

In [None]:
df = pd.read_csv(f'./all_split_192_v2022_05_10_meta_data.csv')
#df = pd.read_csv(f'./all_split_192_v2022_05_10_meta_data.csv')
#df = pd.read_csv(f'./heroin_split_32_meta_data.csv')


step_size = 32

# df = df[df.age != 'Not Given']
# df = df[df.sex_int.notna()]
df = df[df.text.notna()]

#df.psychoactive_class = 'opioid'
#df.ligand_chemical_class = 'morphinan'
df.psychoactive_class = df.psychoactive_class.apply(lambda x: x.strip())

drug2class = {d:i for i,d in enumerate(df.drug.unique())}
#psychoactive2class = {d:i for i,d in enumerate(df.psychoactive_class.unique())}
class2weight = {i:(len(df)/len(df[df.drug==d])) for i,d in enumerate(df.drug.unique())}
#print(psychoactive2class)
df['drug_class'] = [drug2class[d] for d in df.drug]
#df['psychoactive_class_int'] = [psychoactive2class[d] for d in df.psychoactive_class]
df[[f'tag_{i}' for i in range(52) ]] = df[[f'tag_{i}' for i in range(52) ]].fillna(0)


tags = {'Small_Group': 'tag_0', 'General': 'tag_1', 'First_Times': 'tag_2', 'Alone': 'tag_3', 'Difficult_Experiences': 'tag_4', 'Glowing_Experiences': 'tag_5', 'Retrospective_Summary': 'tag_6', 'Various': 'tag_7', 'Unknown_Context': 'tag_8', 'Mystical_Experiences': 'tag_9', 'Health_Problems': 'tag_10', 'Combinations': 'tag_11', 'Not_Applicable': 'tag_12', 'Bad_Trips': 'tag_13', 'Hangover_Days_After': 'tag_14', 'Entities_Beings': 'tag_15', 'Music_Discussion': 'tag_16', 'Addiction_Habituation': 'tag_17', 'Post_Trip_Problems': 'tag_18', 'Nature_Outdoors': 'tag_19', 'Relationships': 'tag_20', 'Depression': 'tag_21', 'Therapeutic_Intent_or_Outcome': 'tag_22', 'Overdose': 'tag_23', 'Medical_Use': 'tag_24', 'Sex_Discussion': 'tag_25', 'Train_Wrecks_Trip_Disasters': 'tag_26', 'Guides_Sitters': 'tag_27', 'Rave_Dance_Event': 'tag_28', 'Preparation_Recipes': 'tag_29', 'Festival_Lg_Crowd': 'tag_30', 'Health_Benefits': 'tag_31', 'Large_Group': 'tag_32', 'Multi-Day_Experience': 'tag_33', 'Club_Bar': 'tag_34', 'What_Was_in_That': 'tag_35', 'Personal_Preparation': 'tag_36', 'HPPD_Lasting_Visuals': 'tag_37', 'Families': 'tag_38', 'Second_Hand_Report': 'tag_39', 'Loss_of_Magic': 'tag_40', 'Hospital': 'tag_41', 'Public_Space': 'tag_42', 'School': 'tag_43', 'Poetry': 'tag_44', 'Performance_Enhancement': 'tag_45', 'Large_Party': 'tag_46', 'Group_Ceremony': 'tag_47', 'Workplace': 'tag_48', 'Cultivation_Synthesis': 'tag_49', 'Pregnancy_Baby': 'tag_50', 'Military': 'tag_51'}
itags = {v: k.replace('(', '').replace(')', '').replace('/', '') for k, v in tags.items()}
ctags = {v: int(k.replace('tag_', '')) for k, v in itags.items()}

receptors = ['5_ht2a', '5_ht2c', '5_ht2b', '5_ht1a', '5_ht1b', '5_ht1d', '5_ht1e', '5_ht1f', '5_ht3', '5_ht5a', '5_ht6', '5_ht7', 'dopamine_d1', 'dopamine_d2', 'dopamine_d3', 'dopamine_d4', 'dopamine_d5', 'adrenergic_alpha1a', 'adrenergic_alpha1b', 'adrenergic_alpha2a', 'adrenergic_alpha2b', 'adrenergic_beta1', 'adrenergic_beta2', 'sert', 'dat', 'net', 'imidazoline_1', 'sigma_1', 'sigma_2', 'dor', 'kor', 'mor', 'm1', 'm2', 'm3', 'm4', 'm5', 'h1', 'h2', 'h3', 'h4', 'calcium_channel', 'nmda', 'cb1', 'cb2', 'glutamate_ampa', 'gaba_a', 'gaba_b', 'dopamine_d2_long', 'dopamine_d2_short', 'sodium_channel', 'taar1', 'substance_p', 'paf_platelet_activating_factor', 'prostaglandin_e3', 'prostaglandin_e4', 'herg', 'monoamine_oxidase_a', 'monoamine_oxidase_b', 'cholecystokinin_a', 'cholecystokinin_b']
df[receptors] = df[receptors].astype(float)


# stats = Counter()
# def rec_dd():
#     return defaultdict(rec_dd)
# scores = rec_dd()

df.head()

In [None]:
from collections import defaultdict
predictions = defaultdict(list)
truths = defaultdict(list)

def rec_dd():
    return defaultdict(rec_dd)
scores = rec_dd()

last_t = 0
max_testimonials = 400
max_drugs = 62
for index, row in df.iterrows():
    if last_t != row.testimonial and len(scores[row.drug]) >= max_testimonials and len(scores) >= max_drugs:
        break
    if last_t != row.testimonial and len(scores[row.drug]) >= max_testimonials:
        continue 
    results = classifier_model(tf.constant([row.text]))
    results = np.array([tf.nn.softmax(r).numpy() for r in results])
    if row.testimonial not in scores[row.drug]:
        for i,sentiment in enumerate(classifier_model.output_names):
            scores[row.drug][row.testimonial][sentiment] = []
    for i,sentiment in enumerate(classifier_model.output_names):
        scores[row.drug][row.testimonial][sentiment].append(results[i, 0, 1])
        
    if len(scores[row.drug]) % 10 == 0 and last_t != row.testimonial:
        print(f'at {row.drug} with {len(scores[row.drug])} testimonials from {len(scores)} drug(s).')
    last_t = row.testimonial


In [None]:
for drug in scores:
    lens = 0
    for t in scores[drug]:
        lens += len(scores[drug][t]["love"])
    print(f'Drug {drug} mean testimonial length: {lens/len(scores[drug]):0.1f}')

In [None]:
import pickle
# open file for writing
# create a binary pickle file 
f = open("split_32_28_sentiment_scores_v2022_05_04.pkl","wb")

# # write the python object (dict) to pickle file
pickle.dump(scores,f)

# # # write file
# # f.write( str(scores) )

# # # close file
# # f.close()

In [None]:
standardized = rec_dd()
peaks=Counter()
peak_index = {}
peak_testimonials={}
new_size = 25
for drug in scores:
    for t in scores[drug]:
        for si, s in enumerate(scores[drug][t]):
            try:
                interpolator = interp.interp1d(np.arange(len(scores[drug][t][s])), scores[drug][t][s])
                standardized[drug][t][s] = interpolator(np.linspace(0, len(scores[drug][t][s])-1, new_size))
                if max(scores[drug][t][s]) > peaks[drug,s]:
                    peak_testimonials[drug,s] = t
                    peaks[drug,s] = max(scores[drug][t][s])
                    peak_index[drug,s] =  np.argmax(scores[drug][t][s])
            except ValueError:
                print(f'vall err {drug} {t}')

data = defaultdict(list)
for sentiment in scores[drug][t]:
    count = []
    testi = []
    pzz = []
    for p in peaks:
        if sentiment in p:
            #print(f'{p} has: {peaks[p]:0.3f} and testi: {peak_testimonials[p]} at index: {peak_index[p]}')
            count.append(peaks[p])
            testi.append(peak_testimonials[p])
            pzz.append(p)
    print(f'\nmax {sentiment.capitalize()} at drug {pzz[np.argmax(count)][0]} Testimonial: {testi[np.argmax(count)]} Sentence: {peak_index[pzz[np.argmax(count)]]}')
    peak_key = pzz[np.argmax(count)]
    ex = df[df.drug == peak_key[0]]
    ex = ex[ex.testimonial == testi[np.argmax(count)]]
    ex = ex[ex.sentence==peak_index[peak_key]]
    print(ex.iloc[0].text)
    print()
    data['sentiment'].append(sentiment)
    data['drug'].append(pzz[np.argmax(count)][0])
    data['psychoactive_class'].append(ex.iloc[0].psychoactive_class)
    data['ligand_chemical_class'].append(ex.iloc[0].ligand_chemical_class)
    data['text'].append(ex.iloc[0].text)
        
exdf = pd.DataFrame.from_dict(data)
exdf.to_csv('most_extreme_erowid_emotions_max100.csv', index=False)

In [None]:
import pickle
stats = Counter()
def rec_dd():
    return defaultdict(rec_dd)
#scores = rec_dd()

#scores = pickle.load(open("split_32_28_sentiment_100_scores_v2022_05_13.pkl", "rb"))
scores = pickle.load(open("split_32_28_sentiment_scores_v2022_04_26.pkl", "rb"))

In [None]:
new_sizes = {}
for drug in scores:
    lens = 0
    for t in scores[drug]:
        lens += len(scores[drug][t]["love"])
    print(f'Drug {drug} from {len(scores[drug])} testimonials, mean length: {lens/len(scores[drug]):0.1f}')

In [None]:
def get_mean_trajectories(scores, output_columns, column='psychoactive_class', new_size = 0, out_col = "love"):
    standardized_psycho = rec_dd()
    means_psycho = Counter()
    stds_psycho = Counter()
    new_sizes = {}
    for drug in scores:
        lens = 0
        for t in scores[drug]:
            lens += len(scores[drug][t][out_col])
        if new_size <= 0:
            new_sizes[drug] = int(lens/len(scores[drug]))
        else:
            new_sizes[drug] = new_size
            
        print(f'Drug {drug} from {len(scores[drug])} testimonials, mean length: {lens/len(scores[drug]):0.1f} Interpt to: { new_sizes[drug]:0.0f}')
    print(f'Output columns: {output_columns}')
    for drug in scores:
        psychoactive_class = df[df.drug==drug][column].unique()[0]
        for t in scores[drug]:
            for s in scores[drug][t]:
                try:
                    interpolator = interp.interp1d(np.arange(len(scores[drug][t][s])), scores[drug][t][s])

                    standardized_psycho[drug][t][s] = interpolator(np.linspace(0, len(scores[drug][t][s])-1, new_sizes[drug]))
                    means_psycho[psychoactive_class,s] += standardized_psycho[drug][t][s]
                    stds_psycho[psychoactive_class,s] += standardized_psycho[drug][t][s]*standardized_psycho[drug][t][s]
                except ValueError as e:
                    print(f'vall err {drug} {t} len: {len(scores[drug][t][s])} e:{e}')
                    break

    for pc in df[column].unique():
        n = 1
        for drug in df[df[column]==pc].drug.unique():
            n += len(standardized_psycho[drug])
        print(f'Class {pc} has length: {n}')
        for s in output_columns:
            means_psycho[pc,s] /= n
            stds_psycho[pc,s] /= n
            stds_psycho[pc,s] -= means_psycho[pc,s]*means_psycho[pc,s]
            stds_psycho[pc,s] = np.sqrt(stds_psycho[pc,s])
    return standardized_psycho, means_psycho, stds_psycho


def get_mean_trajectories_unadjusted(scores, column='psychoactive_class'):

    means_psycho = Counter()
    stds_psycho = Counter()
    n_psycho = Counter()
    new_size = 20
    print(output_cols)
    for drug in scores:
        psychoactive_class = df[df.drug==drug][column].unique()[0]
        for t in scores[drug]:
            
            for s in scores[drug][t]:
                for n,a_score in enumerate(scores[drug][t][s]):
                    if s == 'admiration':
                        n_psycho[psychoactive_class,n] += 1
                    
                    means_psycho[psychoactive_class,s,n] += a_score
                    stds_psycho[psychoactive_class,s,n] += a_score*a_score
    for k in n_psycho:
        print(f'{k} n psycho is: {n_psycho[k]}')
    for pc in df[column].unique():
        n = 0
        while((pc,n) in n_psycho):
            for s in output_cols:
                means_psycho[pc,s,n] /= n_psycho[pc,n]
                stds_psycho[pc,s,n] /= n_psycho[pc,n]
                stds_psycho[pc,s,n] -= means_psycho[pc,s]*means_psycho[pc,s]
                stds_psycho[pc,s,n] = np.sqrt(stds_psycho[pc,s])
            n+=1
    means_l = defaultdict(list)
    stds_l = defaultdict(list)
    for pc in df[column].unique():
        n = 0
        while((pc,n) in n_psycho):
            for s in output_cols:
                means_l[pc,s].append(means_psycho[pc,s,n])
                stds_l[pc,s].append(stds_psycho[pc,s,n])
            n += 1
    return means_l, stds_l


def plot_sentiment(labels, means, stds, keep_drugs, keep_sentiments,
                   figsize=(42, 42), cols=5, rows=4, dpi=300, ncol=4):
    fig, axes = plt.subplots(rows, cols, figsize=figsize, dpi=dpi)
    cmap = plt.get_cmap('hsv')
    colors = cmap(np.linspace(0, 1, len(labels)))
    markers = "v.,o1^2>348<spdxh*PH+XD|"
    o_drugs = sorted(labels)
    
    for sentiment, ax in zip(keep_sentiments, axes.ravel()):
        for i, (drug, color) in enumerate(zip(o_drugs, colors)):
            if drug not in keep_drugs:
                continue

            ax.plot(range(len(means[drug,sentiment])), means[drug,sentiment], 
                    c=color, marker=markers[i%len(markers)], 
                    label=drug.capitalize() if len(drug) > 4 else drug.upper(),
                    fillstyle='none' if i%2==0 else 'full')
            ax.fill_between(range(len(means[drug,sentiment])), 
                            means[drug,sentiment] - stds[drug,sentiment]/4, 
                            means[drug,sentiment] + stds[drug,sentiment]/4, 
                            color=color, alpha=0.05)
        ax.set_title(f'{sentiment.capitalize()}', size=24)
        ax.set_xlabel('Time in Narrative')
        ax.set_ylabel(sentiment.capitalize())
        ax.set_xticks(())
        ax.set_yticks(())
        ax.legend(ncol=ncol)
    plt.tight_layout()

In [None]:
umeans_d, ustds_d = get_mean_trajectories_unadjusted(scores, column='drug')

In [None]:
df.ligand_chemical_class.unique()

In [None]:
standardized_p, means_p, stds_p = get_mean_trajectories(scores, output_columns,new_size=25)

In [None]:
standardized_d, means_d, stds_d = get_mean_trajectories(scores, output_columns, column='drug', new_size=12)

In [None]:
standardized_c, means_c, stds_c = get_mean_trajectories(scores, output_columns, 
                                                        column='ligand_chemical_class', new_size=25)

In [None]:
keep_drugs = ['amphetamine', 'methamphetamine', 'cocaine', 'sertraline', 'venlafaxine', 
              'mdma', 'haloperidol', 'dmt']
#keep_sentiments = classifier_model.output_names
keep_sentiments = ['sadness', 'love']
plot_sentiment(df.drug.unique(), means_d, stds_d, keep_drugs, keep_sentiments,
               figsize=(7.6, 6.6), cols=1, rows=2, ncol=4)
plt.show()

In [None]:
keep_drugs = ['lsd', 'dmt', '5-meo-dmt', 'ketamine', 'psilocin', 'mescaline', 'ketamine', 
               'oxycodone', 'morphine',  'methadone']
keep_sentiments = ['realization', 'curiosity', 'surprise', 'relief', 'confusion', 'amusement']
plot_sentiment(df.drug.unique(), means_d, stds_d, keep_drugs, keep_sentiments,
               figsize=(12, 11), cols=2, rows=3, ncol=3)
plt.show()

In [None]:
keep_drugs = ['amphetamine', 'methamphetamine', 'cocaine', 'sertraline', 'venlafaxine', 
              'mdma', 'mda', 'haloperidol', 'dmt']
#keep_sentiments = classifier_model.output_names
keep_sentiments = ['neutral', 'optimism']
plot_sentiment(df.drug.unique(), means_d, stds_d, keep_drugs, keep_sentiments,
               figsize=(7, 7), cols=1, rows=2, ncol=3)
plt.show()

In [None]:
standardized_c, means_c, stds_c = get_mean_trajectories(scores, column='ligand_chemical_class')


In [None]:
standardized_p, means_p, stds_p= get_mean_trajectories(scores, new_size = 25)

In [None]:
keep_drugs = ['amphetamine', 'methamphetamine', 'cocaine', 'sertraline', 'venlafaxine', 
              'mdma', 'mda', 'haloperidol', 'dmt']

#keep_sentiments = classifier_model.output_names
keep_sentiments = ['neutral', 'optimism']
plot_sentiment(df.psychoactive_class.unique(), means_p, stds_p, df.psychoactive_class.unique(), keep_sentiments,
               figsize=(7, 7), cols=1, rows=2, ncol=2)
plt.show()

In [None]:
df.drug.unique()

In [None]:
keep_drugs = ['amphetamine', 'methamphetamine', 'cocaine', 'sertraline', 'venlafaxine', 'paroxetine',
               'bupropion', 'olanzapine',
              ]

#keep_sentiments = classifier_model.output_names
keep_sentiments = ['anger', 'sadness']
plot_sentiment(df.drug.unique(), means_d, stds_d, keep_drugs, keep_sentiments,
               figsize=(7, 7), cols=1, rows=2, ncol=2)
plt.show()

In [None]:
print(keep_drugs)

In [None]:
classifier_model.output_names

In [None]:
keep_drugs = ['amphetamine', 'methamphetamine', 'cocaine', 'sertraline', 'venlafaxine', 
              'mdma', 'mda', 'haloperidol', 'dmt']
keep_drugs = ['tryptamine', 'lysergamide', 'salvinorin',
 'piperazine','piperazinoazepine','piperidine',
              
 'ssri' , 'snri']
#keep_sentiments = classifier_model.output_names
keep_sentiments = ['realization', 'confusion', 'curiosity', 'embarrassment']
plot_sentiment(df.ligand_chemical_class.unique(), means_c, stds_c, keep_drugs, 
               keep_sentiments, figsize=(16, 8), cols=2, rows=2, ncol=4)
plt.show()

In [None]:
keep_drugs = ['amphetamine', 'methamphetamine', 'cocaine', 'sertraline', 'venlafaxine', 
              'mdma', 'mda', 'haloperidol', 'dmt']
keep_drugs = ['tryptamine', 'lysergamide', 'salvinorin',
 'piperazine','piperidine','benzodiazepine',
              
 'ssri' , 'snri']
#keep_sentiments = classifier_model.output_names
keep_sentiments = ['realization', 'confusion', 'curiosity', 'embarrassment']
plot_sentiment(df.ligand_chemical_class.unique(), means_c, stds_c, keep_drugs, 
               keep_sentiments, figsize=(16, 8), cols=2, rows=2, ncol=4)
plt.show()

In [None]:
keep_drugs = ['dmt', 'mescaline', 'psilocin', 'lsd', '5-meo-dmt', '2c-i',
              'pcp', 'salvia', 'ketamine', 'morphine', 'hydromorphone', 'oxycodone']

#keep_sentiments = classifier_model.output_names
keep_sentiments = ['realization', 'confusion', 'curiosity', 'amusement', 'surprise', 'relief']
plot_sentiment(df.drug.unique(), means_d, stds_d, keep_drugs, 
               keep_sentiments, figsize=(16, 12), cols=2, rows=3, ncol=4)
plt.show()

In [None]:
plot_sentiment(df.drug.unique(), means_d, stds_d, figsize=(40, 64))

In [None]:
!pip install fastdtw

from fastdtw import fastdtw
from scipy.spatial.distance import euclidean
def get_dtw(means, labels):
    distances = {} 
    idxs = Counter()
    jdxs = Counter()

    for drug1,sentiment1 in means:
    #     if 'love' not in sentiment1:
    #         continue
        if sentiment1 not in distances:
            distances[sentiment1] = np.zeros((len(labels),len(labels)))
        for drug2,sentiment2 in means:
            if sentiment1 != sentiment2:
                continue
            print(f'len {len(means[drug1,sentiment1])}  len2: {len(means[drug2,sentiment1])}')
            distance, path = fastdtw(means[drug1,sentiment1], 
                                     means[drug2,sentiment2], 
                                     dist=euclidean)
            if np.array(means[drug1,sentiment1]).mean() > np.array(means[drug2,sentiment2]).mean():
                sign = 1
            else:
                sign = -1
            distances[sentiment1][idxs[sentiment1], jdxs[sentiment1]] = distance * sign
            jdxs[sentiment1]+= 1
        idxs[sentiment1]+=1
        jdxs[sentiment1]=0
        print(f'Finished Drug {drug1} of {len(means)} ')
    return distances

In [None]:
p_distances = get_dtw(means_p, df.psychoactive_class.unique())

In [None]:
def confusion_heatmap(confusion, labels, figsize=(24, 15), 
                      title='Emotion Heatmap'):
    fig, ax = plt.subplots(figsize=figsize, dpi=300)
    ax = sb.heatmap(confusion, cmap='bwr', cbar_kws={"shrink": .8}, cbar=True, ax=ax)

    ax.set_title(title, size=24)
    ax.set_xticks(np.arange(confusion.shape[0]) + 0.5)
    ax.set_yticks(np.arange(confusion.shape[0]) + 0.5)
    cbar = ax.collections[0].colorbar
    cbar.set_ticks([np.min(confusion), np.max(confusion)])
    cbar.set_ticklabels([f'Less \n{title.split()[0].capitalize()}', 
                         f'More \n{title.split()[0].capitalize()}'])
    ax.set_xticklabels(labels=[l.capitalize() if len(l) > 5 else l.upper() for l in labels], ha='right', rotation=45)
    ax.set_yticklabels(labels=[l.capitalize() if len(l) > 5 else l.upper()  for l in labels], rotation=0)
    plt.tight_layout()
    figure_path='./confusion_heatmap.png'
    if not os.path.exists(os.path.dirname(figure_path)):
        os.makedirs(os.path.dirname(figure_path))
    plt.savefig(figure_path)
    plt.show()

In [None]:
import seaborn as sb
for sentiment in p_distances:
    confusion_heatmap(p_distances[sentiment], df.psychoactive_class.unique(), figsize=(5, 3.5),
                      title=f'{sentiment.capitalize()}')    
    

In [None]:
distances = get_dtw(means_d, df.drug.unique())

In [None]:
import seaborn as sb
confusion_heatmap(distances['love'], df.drug.unique(), figsize=(16,10), title='Love')

In [None]:
confusion_heatmap(distances['admiration'], df.drug.unique(), figsize=(16,10))

In [None]:
confusion_heatmap(distances['fear'], df.drug.unique(), figsize=(16,10))

In [None]:
confusion_heatmap(distances['disgust'], df.drug.unique(), figsize=(16,10))

In [None]:
import seaborn as sb
confusion_heatmap(distances['love'], df.psychoactive_class.unique())

In [None]:
distances['love'].shape

In [None]:
def get_drug_sentiments(means, labels):
    avg = defaultdict(list)
    drug_order = {}
    for drug1,sentiment1 in means:
        avg[sentiment1].append(means[drug1,sentiment1].mean())
        drug_order[drug1] = len(drug_order)-1
    order_drug = {v:k for k,v in drug_order.items()}
    
    drug_sentiments = defaultdict(dict)
    for sentiment in avg:
        #print(f'\nat sentiment {sentiment} np.argsort(avg[sentiment]):{np.argsort(avg[sentiment]):} ')
        for i, idx in enumerate(np.argsort(avg[sentiment])):
            #print(f'{order_drug[idx]} has {idx}'  )
            drug_sentiments[order_drug[idx]][sentiment] = i - (len(order_drug)/2)
    #print(f'\nat sentiment {drug_sentiments["entactogen"]} ')
    return drug_sentiments
        
        
drug_sentiments = get_drug_sentiments(means_d, output_columns)        

In [None]:
p_sentiments = get_drug_sentiments(means_p, output_columns)    

In [None]:
c_sentiments = get_drug_sentiments(means_c, output_columns)    

In [None]:
agrees = [('annoyance', -0.5344732645347968),
 ('nervousness', -0.510669535095979),
 ('embarrassment', -0.5051026528532618),
 ('disapproval', -0.47925662915486605),
 ('disgust', -0.45396932002450086),
 ('disappointment', -0.44460392944701455),
 ('anger', -0.4056618042592467),
 ('sadness', -0.3906127742858656),
 ('grief', -0.34479892700251286),
 ('fear', -0.329826291616709),
 ('remorse', -0.24441154029927206),
 ('neutral', -0.18857309773063533),
 ('confusion', -0.16747687799665423),
 ('caring', -0.031466636020550026),
 ('relief', 0.008632272914382256),
 ('desire', 0.019039286463182307),
 ('curiosity', 0.05119058863343657),
 ('amusement', 0.11223702119570078),
 ('realization', 0.11545442106395289),
 ('optimism', 0.1437452267129484),
 ('gratitude', 0.1862795441962794),
 ('surprise', 0.1936036781292831),
 ('excitement', 0.25960061830614545),
 ('joy', 0.32537628132345764),
 ('love', 0.37134711022473577),
 ('approval', 0.4608826740570749),
 ('pride', 0.48951957939259805),
 ('admiration', 0.5222207250518904)]
labels = [l[0] for l in agrees]
#emos = [(l, drug_sentiments["mdma"][l]) for l in labels]

In [None]:
def get_drug_sentiments(means, labels):
    avg = defaultdict(list)
    drug_order = {}
    for drug1,sentiment1 in means:
        avg[sentiment1].append(means[drug1,sentiment1].mean())
        drug_order[drug1] = len(drug_order)-1
    order_drug = {v:k for k,v in drug_order.items()}
    
    drug_sentiments = defaultdict(dict)
    for sentiment in avg:
        #print(f'\nat sentiment {sentiment} np.argsort(avg[sentiment]):{np.argsort(avg[sentiment]):} ')
        for i, idx in enumerate(np.argsort(avg[sentiment])):
            #print(f'{order_drug[idx]} has {idx}'  )
            drug_sentiments[order_drug[idx]][sentiment] = i - (len(order_drug)/2)
    #print(f'\nat sentiment {drug_sentiments["entactogen"]} ')
    return drug_sentiments
        
        
#drug_sentiments = get_drug_sentiments(means_d, classifier_model.output_names)  

def plot_emo_drug(title, agrees):
    _ = plt.figure(figsize=(0.8, 6.2), dpi=300)
    my_cmap = plt.get_cmap("Spectral")
    rescale = lambda y: (y - np.min(y)) / (np.max(y) - np.min(y))
    plt.barh(range(len(agrees)), [t[1] for t in agrees], color=my_cmap(rescale([t[1] for t in agrees])))
    plt.axvline(0.0, linestyle='dashed', c='orange')
    plt.yticks(np.arange(len(agrees)), [t[0].capitalize() for t in agrees], ha='right')
    plt.yticks(())
    plt.title(title, size=12)
    plt.xticks(())
    plt.xlabel('Rank')
    plt.box(False)  

In [None]:
for psy in p_sentiments:
    emos = [(l, p_sentiments[psy][l]) for l in labels]
    plot_emo_drug(psy.capitalize(), emos)

In [None]:
for d in drug_sentiments:
    emos = [(l, drug_sentiments[d][l]) for l in labels]
    plot_emo_drug(d.capitalize() if len(d)>5 else d.upper(), emos)

In [None]:
#emos = sorted(drug_sentiments["lsd"].items(), key = lambda x: x[1])
emos = [(l, drug_sentiments["lsd"][l]) for l in labels]
plot_emo_drug('LSD', emos)

In [None]:
emos = [(l, drug_sentiments["morphine"][l]) for l in labels]
plot_emo_drug('Morphine', emos)

In [None]:
#emos = sorted(drug_sentiments["pcp"].items(), key = lambda x: x[1])
emos = [(l, drug_sentiments["haloperidol"][l]) for l in labels]
plot_emo_drug('Haloperidol', emos)

In [None]:
emos = [(l, drug_sentiments["cocaine"][l]) for l in labels]
plot_emo_drug('Cocaine', emos)

In [None]:
emos = [(l, drug_sentiments["risperidone"][l]) for l in labels]
plot_emo_drug('Risperidone', emos)

In [None]:
emos = [(l, drug_sentiments["alprazolam"][l]) for l in labels]
plot_emo_drug('Alprazolam', emos)

In [None]:
emos = [(l, drug_sentiments["bupropion"][l]) for l in labels]
plot_emo_drug('Bupropion', emos)

In [None]:
emos = [(l, drug_sentiments["venlafaxine"][l]) for l in labels]
plot_emo_drug('Venlafaxine', emos)

In [None]:
emos = [(l, drug_sentiments["methamphetamine"][l]) for l in labels]
plot_emo_drug('Methamphetamine', emos)

In [None]:
emos = [(l, drug_sentiments["amphetamine"][l]) for l in labels]
plot_emo_drug('Amphetamine', emos)

In [None]:
emos = [(l, p_sentiments["depressant"][l]) for l in labels]
plot_emo_drug('Depressant', emos) 

In [None]:
emos = [(l, p_sentiments["antipsychotic"][l]) for l in labels]
plot_emo_drug('Antipsychotic', emos) 

In [None]:
emos = [(l, p_sentiments["stimulant"][l]) for l in labels]
plot_emo_drug('Stimulant', emos) 

In [None]:
emos = [(l, p_sentiments["opioid"][l]) for l in labels]
plot_emo_drug('Opioid', emos)

In [None]:
emos = sorted(drug_sentiments["psychedelic"].items(), key = lambda x: x[1])
plot_emo_drug('Psychedelic Emotion Scape', emos)

In [None]:
emos = sorted(drug_sentiments["hallucinogen"].items(), key = lambda x: x[1])
plot_emo_drug('Hallucinogen Emotion Scape', emos)

In [None]:
emos = sorted(drug_sentiments["dissociative"].items(), key = lambda x: x[1])
plot_emo_drug('Dissociative Emotion Scape', emos)

In [None]:
emos = sorted(d_sentiments["antidepressant"].items(), key = lambda x: x[1])
plot_emo_drug('Antidepressant', emos)

In [None]:
emos = sorted(drug_sentiments["antipsychotic"].items(), key = lambda x: x[1])
plot_emo_drug('Antipsychotic', emos)

In [None]:
emos = sorted(drug_sentiments["deliriant"].items(), key = lambda x: x[1])
plot_emo_drug('Deliriant EmotionScape', emos)

In [None]:
drug_sentiments = get_drug_sentiments(means_d, classifier_model.output_names)
emos = sorted(drug_sentiments["mdma"].items(), key = lambda x: x[1])
plot_emo_drug('MDMA Emotion Scape', emos)

In [None]:
c_sentiments = get_drug_sentiments(means_c, df.ligand_chemical_class.unique())

In [None]:
list(c_sentiments.keys())

In [None]:
emos = [(l, c_sentiments["phenethylamine"][l]) for l in labels]
plot_emo_drug('Phenethylamine', emos)

In [None]:
emos = [(l, c_sentiments["tryptamine"][l]) for l in labels]
plot_emo_drug('Tryptamine', emos)

In [None]:
emos = [(l, c_sentiments["lysergamide"][l]) for l in labels]
plot_emo_drug('Lysergamide', emos)

In [None]:
emos = [(l, c_sentiments["snri"][l]) for l in labels]
plot_emo_drug('SNRI', emos)

In [None]:
emos = [(l, c_sentiments["ssri"][l]) for l in labels]
plot_emo_drug('SSRI', emos)

In [None]:
p_sentiments = get_drug_sentiments(means_p, df.psychoactive_class.unique())

In [None]:
emos = [(l, p_sentiments["opioid"][l]) for l in labels]
plot_emo_drug('Opioid', emos)

In [None]:
emos = [(l, p_sentiments["deliriant"][l]) for l in labels]
plot_emo_drug('Deliriant', emos)

In [None]:
emos = [(l, p_sentiments["hallucinogen"][l]) for l in labels]
plot_emo_drug('Hallucinogen', emos)

In [None]:
emos = [(l, p_sentiments["psychedelic"][l]) for l in labels]
plot_emo_drug('Psychedelic', emos)

In [None]:
emos = [(l, p_sentiments["entactogen"][l]) for l in labels]
plot_emo_drug('Entactogen', emos)