In [1]:
import pandas as pd
import numpy as np
import pickle
# data
dat = pickle.load(open('../data/zooni/annotations_1105.pkl', 'rb'))

----
## BASELINES


### Connection TYPE
RF and KNN

In [3]:
# create connection df
connection = dat[['connection', 'text']]

In [4]:

# connection is already binary values so no need to encode
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report

X = connection['text']
y = connection['connection']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y, test_size=0.2)

# vectorize
vect = CountVectorizer(stop_words='english', ngram_range=(1,3))
X_train_vect = vect.fit_transform(X_train)
X_test_vect = vect.transform(X_test)

# fit model
rf = RandomForestClassifier()
rf.fit(X_train_vect, y_train)

# predict
y_pred = rf.predict(X_test_vect)

# evaluate
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

  Disclosure       0.69      0.29      0.41        31
 Inquisition       0.58      0.88      0.70        34

    accuracy                           0.60        65
   macro avg       0.63      0.59      0.55        65
weighted avg       0.63      0.60      0.56        65



In [12]:
# KNN
knn = KNeighborsClassifier()
knn.fit(X_train_vect, y_train)

# predict
y_pred = knn.predict(X_test_vect)

# evaluate
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

  Disclosure       0.00      0.00      0.00        31
 Inquisition       0.52      1.00      0.69        34

    accuracy                           0.52        65
   macro avg       0.26      0.50      0.34        65
weighted avg       0.27      0.52      0.36        65



  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [13]:
# export test data for LLM evaluation
test_data = pd.DataFrame({'text': X_test, 'connection': y_test})
test_data.to_csv('../data/connection_test_data2.csv', index=False)

-- 

### Subject

In [4]:
subj = dat[['subject', 'text']]

# encode
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
subj['subject'] = le.fit_transform(subj['subject'])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subj['subject'] = le.fit_transform(subj['subject'])


In [14]:
X = subj['text']
y = subj['subject']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y, test_size=0.2)

# vectorize
vect = CountVectorizer(stop_words='english', ngram_range=(1,3))
X_train_vect = vect.fit_transform(X_train)
X_test_vect = vect.transform(X_test)

# fit model
rf = RandomForestClassifier()
rf.fit(X_train_vect, y_train)

# predict
y_pred = rf.predict(X_test_vect)

# evaluate
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.35      0.25      0.29        24
           1       0.00      0.00      0.00         6
           2       0.38      0.77      0.51        22
           3       0.67      0.15      0.25        13

    accuracy                           0.38        65
   macro avg       0.35      0.29      0.26        65
weighted avg       0.39      0.38      0.33        65



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
# knn
knn = KNeighborsClassifier()
knn.fit(X_train_vect, y_train)

# predict
y_pred = knn.predict(X_test_vect)

# evaluate
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.67      0.08      0.15        24
           1       0.00      0.00      0.00         6
           2       0.36      1.00      0.53        22
           3       1.00      0.08      0.14        13

    accuracy                           0.38        65
   macro avg       0.51      0.29      0.21        65
weighted avg       0.57      0.38      0.26        65



  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [16]:
# export test data for LLM evaluation
test_data = pd.DataFrame({'text': X_test, 'subject': y_test})
test_data.to_csv('../data/subject_test_data2.csv', index=False)

--
### Objective

In [5]:
objectives = ['Quality', 'Legality', 'Effects', 'Methods', 'Combination of Substances', 'Mental Health',
          'N/A', 'Other', 'Overdose', 'Nurturant Support & Morality', 'Withdrawal', 'Safety', 'Relapse']

# create a column for each topic
obj = pd.DataFrame(columns=objectives)

for t in objectives:
    obj[t] = dat['objective'].apply(lambda x: 1 if t in x else 0)

obj = obj.fillna(0)

# add the text column
obj['Text'] = dat['text']

In [3]:
# # sample data
# dat = {'Symptoms':[1,0,0,0,0],
#        'Guidance':[0,1,0,0,0],
#        'Withdrawal': [0,0,1,0,0],
#        'Recovery':[0,0,0,1,0],
#        'Relapse':[0,0,0,0,1],
#        'Text':['I am feeling very depressed today',
#                'I am feeling very depressed today',
#                'I am feeling very depressed today',
#                'I am feeling very depressed today',
#                'I am feeling very depressed today']}
# dat = pd.DataFrame(dat)

In [6]:


# Split the data into features (text) and labels (topics)
X = obj['Text']
y = obj.drop(columns=['Text'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Vectorize the text data (you can choose between CountVectorizer or TfidfVectorizer)
vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1,3))
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Build and train the Random Forest Classifier
rf_classifier = MultiOutputClassifier(RandomForestClassifier(n_estimators=10, random_state=42))
rf_classifier.fit(X_train_vec, y_train)
rf_predictions = rf_classifier.predict(X_test_vec)

# Build and train the K-Nearest Neighbors Classifier
knn_classifier = MultiOutputClassifier(KNeighborsClassifier(n_neighbors=5))
knn_classifier.fit(X_train_vec, y_train)
knn_predictions = knn_classifier.predict(X_test_vec)

# Evaluate the models
print("Random Forest Classifier Metrics:")
print(classification_report(y_test, rf_predictions))

Random Forest Classifier Metrics:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         6
           1       0.00      0.00      0.00         1
           2       0.60      0.12      0.19        26
           3       0.00      0.00      0.00         0
           4       1.00      0.08      0.14        13
           5       0.00      0.00      0.00        12
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         8
           8       0.00      0.00      0.00         8
           9       0.00      0.00      0.00         5
          10       0.00      0.00      0.00         8
          11       0.00      0.00      0.00         6
          12       0.00      0.00      0.00         5

   micro avg       0.67      0.04      0.07       101
   macro avg       0.12      0.01      0.03       101
weighted avg       0.28      0.04      0.07       101
 samples avg       0.06      0.03      0.04   

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [7]:
print("\nK-Nearest Neighbors Classifier Metrics:")
print(classification_report(y_test, knn_predictions))


K-Nearest Neighbors Classifier Metrics:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         6
           1       0.00      0.00      0.00         1
           2       0.80      0.31      0.44        26
           3       0.00      0.00      0.00         0
           4       0.50      0.31      0.38        13
           5       0.33      0.08      0.13        12
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         8
           8       0.00      0.00      0.00         8
           9       0.00      0.00      0.00         5
          10       0.00      0.00      0.00         8
          11       0.00      0.00      0.00         6
          12       0.00      0.00      0.00         5

   micro avg       0.46      0.13      0.20       101
   macro avg       0.13      0.05      0.07       101
weighted avg       0.31      0.13      0.18       101
 samples avg       0.18      0.15      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
# get all labels
labels = list(y.columns)

y_test2 = y_test.apply(lambda x: ','.join(x.index[x==1]), axis=1)

In [10]:
# export test data for LLM evaluation
test_data = pd.DataFrame({'text': X_test, 'objective': y_test2})
test_data.to_csv('../data/objective_test_data2.csv', index=False)
# test_data = pd.DataFrame({'text': X_test})
# test_data.to_csv('../data/objective_test_data2.csv', index=False)


In [17]:
# check if test set 2 is a subset of test set 1

ct1 = pd.read_csv('../data/connection_test_data.csv')
ct2 = pd.read_csv('../data/connection_test_data2.csv')

compare = ct1.merge(ct2, on=['text', 'connection'], how='left', indicator=True)

compare['_merge'].value_counts()


both          67
left_only     16
right_only     0
Name: _merge, dtype: int64

In [18]:
# subject
st1 = pd.read_csv('../data/subject_test_data.csv')
st2 = pd.read_csv('../data/subject_test_data2.csv')

compare = st1.merge(st2, on=['text', 'subject'], how='left', indicator=True)

compare['_merge'].value_counts()

both          67
left_only     14
right_only     0
Name: _merge, dtype: int64

---

## LLM evaluations

In [28]:
connection_test = pd.read_csv('../data/connection_test_data_LLM.csv')
subject_test = pd.read_csv('../data/subject_test_data_LLM.csv')

In [29]:
# subset the data to the 65 in ct2
connection_test = connection_test[connection_test['text'].isin(ct2['text'])]

In [30]:
# make labels consistent
# if label contains 'inquisition', change to 'inquisition' 
# connection_test.columns
cols = ['Q1_1_instruct_gpt3',
       'Q1_1_instruct_gpt4', 'Q1_1_example_gpt3', 'Q1_1_example_gpt4']

# if label contains 'inquisition', change to 'inquisition'
for c in cols:
    connection_test[c] = connection_test[c].apply(lambda x: 'Inquisition' if 'inquisition' in x else 'Disclosure')

In [31]:
connection_test.to_csv('../data/connection_test_data_LLM2.csv', index=False)


In [34]:

# connection is ground truth
# for Q1_1_instruct_gpt3
connection_test['Q1_1_instruct_gpt3'].value_counts()
# get confusion matrix
from sklearn.metrics import confusion_matrix
confusion_matrix(connection_test['connection'], connection_test['Q1_1_instruct_gpt3'])
# get classification report
from sklearn.metrics import classification_report
print(classification_report(connection_test['connection'], connection_test['Q1_1_instruct_gpt3']))



              precision    recall  f1-score   support

  Disclosure       0.59      0.87      0.70        31
 Inquisition       0.79      0.44      0.57        34

    accuracy                           0.65        65
   macro avg       0.69      0.66      0.63        65
weighted avg       0.69      0.65      0.63        65



In [35]:
# for Q1_1_instruct_gpt4
confusion_matrix(connection_test['connection'], connection_test['Q1_1_instruct_gpt4'])
print(classification_report(connection_test['connection'], connection_test['Q1_1_instruct_gpt4']))

              precision    recall  f1-score   support

  Disclosure       0.80      0.77      0.79        31
 Inquisition       0.80      0.82      0.81        34

    accuracy                           0.80        65
   macro avg       0.80      0.80      0.80        65
weighted avg       0.80      0.80      0.80        65



In [36]:
print(classification_report(connection_test['connection'], connection_test['Q1_1_example_gpt3']))

              precision    recall  f1-score   support

  Disclosure       0.57      0.87      0.69        31
 Inquisition       0.78      0.41      0.54        34

    accuracy                           0.63        65
   macro avg       0.68      0.64      0.62        65
weighted avg       0.68      0.63      0.61        65



In [37]:
print(classification_report(connection_test['connection'], connection_test['Q1_1_example_gpt4']))

              precision    recall  f1-score   support

  Disclosure       0.81      0.84      0.83        31
 Inquisition       0.85      0.82      0.84        34

    accuracy                           0.83        65
   macro avg       0.83      0.83      0.83        65
weighted avg       0.83      0.83      0.83        65



--

#### subject llm

In [38]:
subject_test = pd.read_csv('../data/subject_test_data_LLM.csv')
# subset the data to the 65 in st2
subject_test = subject_test[subject_test['text'].isin(st2['text'])]

In [41]:
# make labels consistent
# if label contains 'dependency', change to 'dependency'
cols = ['Q1_2_instruct_gpt3',  
         'Q1_2_instruct_gpt4', 'Q1_2_example_gpt3', 'Q1_2_example_gpt4']

# fill NaNs with 'N/A'
subject_test = subject_test.fillna('N/A')

for c in cols:
    subject_test[c] = subject_test[c].apply(lambda x: 'Dependency' if 'dependency' in x else x)
    subject_test[c] = subject_test[c].apply(lambda x: 'Recovery' if 'recovery' in x else x)
    subject_test[c] = subject_test[c].apply(lambda x: 'Other' if 'other' in x else x)
    subject_test[c] = subject_test[c].apply(lambda x: 'N/A' if 'n/a' in x else x)

# encode
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
subject_test['Q1_2_instruct_gpt3'] = le.fit_transform(subject_test['Q1_2_instruct_gpt3'])
subject_test['Q1_2_instruct_gpt4'] = le.fit_transform(subject_test['Q1_2_instruct_gpt4'])
subject_test['Q1_2_example_gpt3'] = le.fit_transform(subject_test['Q1_2_example_gpt3'])
subject_test['Q1_2_example_gpt4'] = le.fit_transform(subject_test['Q1_2_example_gpt4'])


In [45]:
# get clssification report
print("Q1_2_instruct_gpt3 \n",classification_report(subject_test['subject'], subject_test['Q1_2_instruct_gpt3']))
print("Q1_2_instruct_gpt4 \n", classification_report(subject_test['subject'], subject_test['Q1_2_instruct_gpt4']))
print("Q1_2_example_gpt3 \n", classification_report(subject_test['subject'], subject_test['Q1_2_example_gpt3']))
print("Q1_2_example_gpt4 \n", classification_report(subject_test['subject'], subject_test['Q1_2_example_gpt4']))


Q1_2_instruct_gpt3 
               precision    recall  f1-score   support

           0       0.27      0.12      0.17        24
           1       0.11      1.00      0.20         6
           2       1.00      0.05      0.09        22
           3       0.00      0.00      0.00        15

    accuracy                           0.15        67
   macro avg       0.35      0.29      0.11        67
weighted avg       0.44      0.15      0.11        67

Q1_2_instruct_gpt4 
               precision    recall  f1-score   support

           0       0.52      0.71      0.60        24
           1       1.00      0.17      0.29         6
           2       0.50      0.59      0.54        22
           3       0.71      0.33      0.45        15

    accuracy                           0.54        67
   macro avg       0.68      0.45      0.47        67
weighted avg       0.60      0.54      0.52        67

Q1_2_example_gpt3 
               precision    recall  f1-score   support

           0 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


--

#### Objective

In [21]:
# objective
objective_test = pd.read_csv('../data/objective_test_data_LLM.csv')
# join with objective_test2
objective_test2 = pd.read_csv('../data/objective_test_data2.csv')

objective_test = objective_test.merge(objective_test2, on='text', how='left')

In [22]:
# make labels consistent
objective_test = objective_test.fillna('N/A')
cols = ['Q1_3_instruct_gpt3',  
         'Q1_3_instruct_gpt4', 'Q1_3_example_gpt3', 'Q1_3_example_gpt4']

objectives = ['Quality', 'Legality', 'Effects', 'Methods', 'Combination of Substances', 'Mental Health',
          'N/A', 'Other', 'Overdose', 'Nurturant Support & Morality', 'Withdrawal', 'Safety', 'Relapse']
# mapping = 1: Quality
#     2: Legality
#     3- Safety
#     4- Mental health
#     5- Combination
#     6- Method
#     7- Effects
#     8- Nurturant Support & Morality
#     9- Overdose
#     10- Withdrawal
#     11- Relapse
#     12- Other
#     13- N/A

for c in cols:
    for row in range(len(objective_test)):
        # if not float
        if type(objective_test[c][row]) != float:
            objective_test[c] = objective_test[c].apply(lambda x: 'Quality' if 'quality' in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Legality' if 'legality' in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Effects' if 'effects' in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Effects' if 'effect' in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Methods' if 'methods' in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Methods' if 'method' in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Combination of Substances' if 'combination of substances' in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Combination of Substances' if 'combination' in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Mental Health' if 'mental health' in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'N/A' if 'n/a' in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Other' if 'other' in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Overdose' if 'overdose' in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Nurturant Support & Morality' if 'nurturant support & morality' in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Withdrawal' if 'withdrawal' in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Safety' if 'safety' in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Relapse' if 'relapse' in x else x)
        else:
            objective_test[c] = objective_test[c].apply(lambda x: 'Quality' if 1 in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Legality' if 2 in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Safety' if 3 in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Mental Health' if 4 in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Combination of Substances' if 5 in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Methods' if 6 in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Effects' if 7 in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Nurturant Support & Morality' if 8 in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Overdose' if 9 in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Withdrawal' if 10 in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Relapse' if 11 in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'Other' if 12 in x else x)
            objective_test[c] = objective_test[c].apply(lambda x: 'N/A' if 13 in x else x)



In [24]:
# get index 8
objective_test['Q1_3_instruct_gpt4'][8]

'1, 2, 3, 4, 7, 8, 9, 10, 11'

In [26]:
# one of the cells is a string of integers
# convert manually
objective_test['Q1_3_instruct_gpt4'][objective_test['Q1_3_instruct_gpt4'] == '1, 2, 3, 4, 7, 8, 9, 10, 11'] = 'Quality,Legality,Safety,Mental Health,Effects,Nurturant Support & Morality,Overdose,Withdrawal,Relapse'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  objective_test['Q1_3_instruct_gpt4'][objective_test['Q1_3_instruct_gpt4'] == '1, 2, 3, 4, 7, 8, 9, 10, 11'] = 'Quality,Legality,Safety,Mental Health,Effects,Nurturant Support & Morality,Overdose,Withdrawal,Relapse'


In [29]:
# order the labels
objective_test['Q1_3_instruct_gpt3'] = objective_test['Q1_3_instruct_gpt3'].apply(lambda x: ','.join(sorted(x.split(','))))
objective_test['Q1_3_instruct_gpt4'] = objective_test['Q1_3_instruct_gpt4'].apply(lambda x: ','.join(sorted(x.split(','))))
objective_test['Q1_3_example_gpt3'] = objective_test['Q1_3_example_gpt3'].apply(lambda x: ','.join(sorted(x.split(','))))
objective_test['Q1_3_example_gpt4'] = objective_test['Q1_3_example_gpt4'].apply(lambda x: ','.join(sorted(x.split(','))))
objective_test['objective'] = objective_test['objective'].apply(lambda x: ','.join(sorted(x.split(','))))

In [32]:
# get classification report
print("Q1_3_instruct_gpt3 \n",classification_report(objective_test['objective'], objective_test['Q1_3_instruct_gpt3']))
print("Q1_3_instruct_gpt4 \n", classification_report(objective_test['objective'], objective_test['Q1_3_instruct_gpt4']))
print("Q1_3_example_gpt3 \n", classification_report(objective_test['objective'], objective_test['Q1_3_example_gpt3']))
print("Q1_3_example_gpt4 \n", classification_report(objective_test['objective'], objective_test['Q1_3_example_gpt4']))

Q1_3_instruct_gpt3 
                                                                      precision    recall  f1-score   support

                                          Combination of Substances       0.00      0.00      0.00         7
                                  Combination of Substances,Effects       0.00      0.00      0.00         2
           Combination of Substances,Effects,Mental Health,Overdose       0.00      0.00      0.00         1
Combination of Substances,Effects,Mental Health,Overdose,Withdrawal       0.00      0.00      0.00         2
                          Combination of Substances,Effects,Relapse       0.00      0.00      0.00         1
    Combination of Substances,Nurturant Support & Morality,Overdose       0.00      0.00      0.00         1
                                                            Effects       0.00      0.00      0.00        14
                           Effects,Legality,Overdose,Quality,Safety       0.00      0.00      0.00        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

In [18]:
# export nb to html
!jupyter nbconvert --to html make_baseline.ipynb


[NbConvertApp] Converting notebook make_baseline.ipynb to html
[NbConvertApp] Writing 622865 bytes to make_baseline.html
