#### This notebook contains code written by Dr. Rebeckah Fussell that has been modified by Rachel Merrill. ####
##### See https://github.com/rkfussell/NLP_quantum_classical_measurement/blob/main/All_data_model.ipynb?short_path=2151e13 for original code #####

In [1]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from transformers import BertModel

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import torch.nn.functional as F

import calibration_fns as cal

from sklearn.metrics import accuracy_score, precision_score, recall_score, balanced_accuracy_score, f1_score, roc_auc_score

## Prepping to rebuild the model

In [11]:
cwd = os.getcwd()
data_folder = cwd + '/2025_spring_data_prepped/'

In [3]:
pd.set_option('display.max_colwidth', None) #prevents columns from truncating

In [4]:
#Define the model class
class Classifier(nn.Module):
    def __init__(self, num_classes=2, freeze_bert = False):
        #Where we define all the parts of the model
        super(Classifier, self).__init__()  # initialize object with everything from the parent class
        # Specify hidden size of BERT, hidden size of our classifier, and number of labels
        D_in, H, D_out = 768, 200, num_classes
        # Instantiate BERT model
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        # Instantiate an one-layer feed-forward classifier
        self.classifier = nn.Sequential(
            nn.Linear(D_in, H),
            nn.ReLU(),
            #nn.Dropout(0.5),
            nn.Linear(H, D_out)
        )
        # Freeze the BERT model
        if freeze_bert:
            for param in self.bert.parameters():
                param.requires_grad = False
    def forward(self, input_ids, attention_mask):
        # Connect these parts and return the output
        # Feed input to BERT
        outputs = self.bert(input_ids=input_ids,
                            attention_mask=attention_mask)
        # Extract the last hidden state of the token `[CLS]` for classification task
        last_hidden_state_cls = outputs[0][:, 0, :]
        # Feed input to classifier to compute logits
        logits = self.classifier(last_hidden_state_cls)
        return logits

## Loading the model

In [6]:
#Define new models
model_P = Classifier()
model_L = Classifier()
model_O = Classifier()

In [7]:
#tell the model we are in inference mode rather than training mode
model_P.eval()
model_L.eval()
model_O.eval()

Classifier(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_af

In [8]:
#load the saved state dictionary - we are using the map_location="cpu" command because we are loading on a cpu
model_P.load_state_dict(torch.load("modelP.pth", map_location="cpu"))
model_L.load_state_dict(torch.load("modelL.pth", map_location="cpu"))
model_O.load_state_dict(torch.load("modelO.pth", map_location="cpu"))

<All keys matched successfully>

In [10]:
#Load the tokenizer
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained('my_tokenizer/')

In [13]:
#Load new data
new_data_sources = pd.read_excel(data_folder + "pre_sources.xlsx")
new_data_morebetter = pd.read_excel(data_folder + "pre_morebetter.xlsx")
new_data_generic = pd.read_excel(data_folder + "pre_generic.xlsx")
new_data_twoStudents = pd.read_excel(data_folder + "pre_twoStudents.xlsx")

In [14]:
#removes "extra" whitespace the model will get confused by, standardizes the text
def text_preprocessing_simple(text):
    try:
        text = re.sub(r'\s+', ' ', text).strip()
    except:
        pass
    return text

In [15]:
def preprocessing_for_bert(data, tokenizer, max_len):
    """Perform required preprocessing steps for pretrained BERT.
    @param    data (np.array): Array of texts to be processed.
    @return   input_ids (torch.Tensor): Tensor of token ids to be fed to a model.
    @return   attention_masks (torch.Tensor): Tensor of indices specifying which
                  tokens should be attended to by the model.
    """
    # Create empty lists to store outputs
    input_ids = []
    attention_masks = []

    # For every sentence...
    for sent in data:
        # `encode_plus` will:
        #    (1) Tokenize the sentence
        #    (2) Add the `[CLS]` and `[SEP]` token to the start and end
        #    (3) Truncate/Pad sentence to max length
        #    (4) Map tokens to their IDs
        #    (5) Create attention mask
        #    (6) Return a dictionary of outputs
        encoded_sent = tokenizer.encode_plus(
            text=text_preprocessing_simple(str(sent)),  # Preprocess sentence
            add_special_tokens=True,        # Add `[CLS]` and `[SEP]`
            max_length=max_len,                  # Max length to truncate/pad
            padding='max_length',         # Pad sentence to max length
            #return_tensors='pt',           # Return PyTorch tensor
            return_attention_mask=True,      # Return attention mask
            truncation = True)
        
        # Add the outputs to the lists
        input_ids.append(encoded_sent.get('input_ids'))
        attention_masks.append(encoded_sent.get('attention_mask'))

    # Convert lists to tensors
    input_ids = torch.tensor(input_ids)
    attention_masks = torch.tensor(attention_masks)

    return input_ids, attention_masks

In [27]:
#Define Dataset class
class Dataset(Dataset):
    def __init__(self, df, tokenizer, max_len):
        self.data, self.masks = preprocessing_for_bert(df["Input"], tokenizer, MAX_LEN)
        self.texts = df["Input"].reset_index(drop=True)
        
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        sample = self.data[idx]
        mask = self.masks[idx]
        text = self.texts[idx]
        
        return sample, mask, text

In [21]:
MAX_LEN = 424 #change this number based on what the trained model says

In [28]:
#make datasets and dataloaders
new_dataset_sources = Dataset(new_data_sources, tokenizer, MAX_LEN)
new_dataset_morebetter = Dataset(new_data_morebetter, tokenizer, MAX_LEN)
new_dataset_generic = Dataset(new_data_generic, tokenizer, MAX_LEN)
new_dataset_twoStudents = Dataset(new_data_twoStudents, tokenizer, MAX_LEN)

new_loader_sources = DataLoader(new_dataset_sources, batch_size=16)
new_loader_morebetter = DataLoader(new_dataset_morebetter, batch_size=16)
new_loader_generic = DataLoader(new_dataset_generic, batch_size=16)
new_loader_twoStudents = DataLoader(new_dataset_twoStudents, batch_size=16)

In [23]:
#defines a function to run the model
def apply_model_to_data_set(model, dataloader):
    model.eval()
    model=model.to(device)
    all_logits = []
    
    # For each batch in our dataset...
    for sents, masks, texts in dataloader:
        sents, masks = sents.to(device), masks.to(device)
        # Compute logits
        with torch.no_grad():
            logits = model(sents, masks)
            all_logits.append(logits)
            
    # Concatenate logits from each batch
    all_logits = torch.cat(all_logits, dim=0)
 
    # Apply softmax to calculate probabilities
    probs = F.softmax(all_logits, dim=1).cpu().numpy()
    preds = torch.argmax(all_logits, dim=1).flatten().cpu().numpy()
    all_logits = all_logits.cpu().numpy()
    return all_logits, probs, preds

In [25]:
device = torch.device("cuda:0" if torch.cuda.is_available()  else "cpu")
print(device)

cpu


## Running the model

In [31]:
#run the L model
(sources_logits_L, sources_probs_L, sources_preds_L) = apply_model_to_data_set(model_L, new_loader_sources)
(morebetter_logits_L, morebetter_probs_L, morebetter_preds_L) = apply_model_to_data_set(model_L, new_loader_morebetter)
(generic_logits_L, generic_probs_L, generic_preds_L) = apply_model_to_data_set(model_L, new_loader_generic)
(twoStudents_logits_L, twoStudents_probs_L, twoStudents_preds_L) = apply_model_to_data_set(model_L, new_loader_twoStudents)

In [32]:
#run the P model
(sources_logits_P, sources_probs_P, sources_preds_P) = apply_model_to_data_set(model_P, new_loader_sources)
(morebetter_logits_P, morebetter_probs_P, morebetter_preds_P) = apply_model_to_data_set(model_P, new_loader_morebetter)
(generic_logits_P, generic_probs_P, generic_preds_P) = apply_model_to_data_set(model_P, new_loader_generic)
(twoStudents_logits_P, twoStudents_probs_P, twoStudents_preds_P) = apply_model_to_data_set(model_P, new_loader_twoStudents)


In [33]:
#run the O model
(sources_logits_O, sources_probs_O, sources_preds_O) = apply_model_to_data_set(model_O, new_loader_sources)
(morebetter_logits_O, morebetter_probs_O, morebetter_preds_O) = apply_model_to_data_set(model_O, new_loader_morebetter)
(generic_logits_O, generic_probs_O, generic_preds_O) = apply_model_to_data_set(model_O, new_loader_generic)
(twoStudents_logits_O, twoStudents_probs_O, twoStudents_preds_O) = apply_model_to_data_set(model_O, new_loader_twoStudents)

## Saving the outputs

In [43]:
new_data_sources["P"] = sources_preds_P
new_data_sources["L"] = sources_preds_L
new_data_sources["O"] = sources_preds_O

In [45]:
new_data_morebetter["P"] = morebetter_preds_P
new_data_morebetter["L"] = morebetter_preds_L
new_data_morebetter["O"] = morebetter_preds_O

In [46]:
new_data_generic["P"] = generic_preds_P
new_data_generic["L"] = generic_preds_L
new_data_generic["O"] = generic_preds_O

In [47]:
new_data_twoStudents["P"] = twoStudents_preds_P
new_data_twoStudents["L"] = twoStudents_preds_L
new_data_twoStudents["O"] = twoStudents_preds_O

In [49]:
new_data_sources.to_excel("BERT_coded_pre_data_S25/sources_pre_S25.xlsx")
new_data_morebetter.to_excel("BERT_coded_pre_data_S25/morebetter_pre_S25.xlsx")
new_data_generic.to_excel("BERT_coded_pre_data_S25/generic_pre_S25.xlsx")
new_data_twoStudents.to_excel("BERT_coded_pre_data_S25/twoStudents_pre_S25.xlsx")

In [None]:
#need to add code to convert preds to labels, add 3 columns to the dataframes (P,L,O) and save to csv
#This code already exists locally on the BEEAR Mac

## Testing the Model Performance: Calibration Curves

We want to use some subset of hand-coded data from the new dataset to check the model's performance on this data

In [None]:
# Set up plots and view distribution of C(x)
fig, axs = plt.subplots(1, 1, sharey=True, tight_layout=True)
df_new_test = pd.DataFrame(data = {"probs": probs_val[:,1], "gt_label":all_labels_val, "text":list(val_dataset_sources_L.texts.values)})
axs.hist(df_val["probs"], bins = 20)

In [None]:
#Load test data
test_data_sources = pd.read_csv("test_data_sources.csv")
test_data_morebetter = pd.read_csv("test_data_morebetter.csv")
test_data_generic = pd.read_csv("test_data_generic.csv")
test_data_twoStudents = pd.read_csv("test_data_twoStudents.csv")

In [None]:
#make datasets and dataloaders
test_dataset_sources = Dataset(test_data_sources, tokenizer, MAX_LEN)
test_dataset_morebetter = Dataset(test_data_morebetter, tokenizer, MAX_LEN)
test_dataset_generic = Dataset(test_data_generic, tokenizer, MAX_LEN)
test_dataset_twoStudents = Dataset(test_data_twoStudents, tokenizer, MAX_LEN)

test_loader_sources = DataLoader(test_dataset_sources, batch_size=16)
test_loader_morebetter = DataLoader(test_dataset_morebetter, batch_size=16)
test_loader_generic = DataLoader(test_dataset_generic, batch_size=16)
test_loader_twoStudents = DataLoader(test_dataset_twoStudents, batch_size=16)

In [None]:
def apply_model_to_test_set(model, dataloader):
    model.eval()
    model=model.to(device)
    all_logits = []
    all_labels = []
    # For each batch in our test set...
    for sents, masks, texts, labels in dataloader:
        sents, masks = sents.to(device), masks.to(device)
        # Compute logits
        with torch.no_grad():
            logits = model(sents, masks)
            all_logits.append(logits)
            all_labels.append(labels)
    # Concatenate logits from each batch
    all_logits = torch.cat(all_logits, dim=0)
    all_labels = torch.cat(all_labels, dim = 0).cpu().numpy()
    # Apply softmax to calculate probabilities
    probs = F.softmax(all_logits, dim=1).cpu().numpy()
    preds = torch.argmax(all_logits, dim=1).flatten().cpu().numpy()
    all_logits = all_logits.cpu().numpy()
    return all_logits, all_labels, probs, preds

In [None]:
#run models on data sets

(sources_logits_test_L, sources_labels_test_L, sources_probs_test_L, sources_preds_test_L) = apply_model_to_test_set(model_L, test_loader_sources)
(morebetter_logits_test_L, morebetter_labels_test_L, morebetter_probs_test_L, morebetter_preds_test_L) = apply_model_to_test_set(model_L, test_loader_morebetter)
(generic_logits_test_L, generic_labels_test_L, generic_probs_test_L, generic_preds_test_L) = apply_model_to_data_set(model_L, test_loader_generic)
(twoStudents_logits_test_L, twoStudents_labels_test_L, twoStudents_probs_test_L, twoStudents_preds_test_L) = apply_model_to_data_set(model_L, test_loader_twoStudents)

(sources_logits_test_P, sources_labels_test_P, sources_probs_test_P, sources_preds_test_P) = apply_model_to_test_set(model_P, test_loader_sources)
(morebetter_logits_test_P, morebetter_labels_test_P, morebetter_probs_test_P, morebetter_preds_test_P) = apply_model_to_test_set(model_P, test_loader_morebetter)
(generic_logits_test_P, generic_labels_test_P, generic_probs_test_P, generic_preds_test_P) = apply_model_to_data_set(model_P, test_loader_generic)
(twoStudents_logits_test_P, twoStudents_labels_test_P, twoStudents_probs_test_P, twoStudents_preds_test_P) = apply_model_to_data_set(model_P, test_loader_twoStudents)

(sources_logits_test_O, sources_labels_test_O, sources_probs_test_O, sources_preds_test_O) = apply_model_to_test_set(model_O, test_loader_sources)
(morebetter_logits_test_O, morebetter_labels_test_O, morebetter_probs_test_O, morebetter_preds_test_O) = apply_model_to_test_set(model_O, test_loader_morebetter)
(generic_logits_test_O, generic_labels_test_O, generic_probs_test_O, generic_preds_test_O) = apply_model_to_data_set(model_O, test_loader_generic)
(twoStudents_logits_test_O, twoStudents_labels_test_O, twoStudents_probs_test_O, twoStudents_preds_test_O) = apply_model_to_data_set(model_O, test_loader_twoStudents)

In [None]:
# Set up plots and view distributions of C(x) for Sources
fig1, axs1 = plt.subplots(1, 1, sharey=True, tight_layout=True)
df_sources_L = pd.DataFrame(data = {"probs": sources_probs_test_L[:,1], "gt_label":sources_labels_test_L, "text":list(test_dataset_sources.texts.values)})
axs1.hist(df_sources_L["probs"], bins = 20)

fig2, axs2 = plt.subplots(1, 1, sharey=True, tight_layout=True)
df_sources_P = pd.DataFrame(data = {"probs": sources_probs_test_P[:,1], "gt_label":sources_labels_test_P, "text":list(test_dataset_sources.texts.values)})
axs2.hist(df_sources_L["probs"], bins = 20)

fig3, axs3 = plt.subplots(1, 1, sharey=True, tight_layout=True)
df_sources_O = pd.DataFrame(data = {"probs": sources_probs_test_O[:,1], "gt_label":sources_labels_test_O, "text":list(test_dataset_sources.texts.values)})
axs3.hist(df_sources_L["probs"], bins = 20)

In [None]:
# Set up plots and view distributions of C(x) for More/Better
fig1, axs1 = plt.subplots(1, 1, sharey=True, tight_layout=True)
df_morebetter_L = pd.DataFrame(data = {"probs": morebetter_probs_test_L[:,1], "gt_label":morebetter_labels_test_L, "text":list(test_dataset_morebetter.texts.values)})
axs1.hist(df_morebetter_L["probs"], bins = 20)

fig2, axs2 = plt.subplots(1, 1, sharey=True, tight_layout=True)
df_morebetter_P = pd.DataFrame(data = {"probs": morebetter_probs_test_P[:,1], "gt_label":morebetter_labels_test_P, "text":list(test_dataset_morebetter.texts.values)})
axs2.hist(df_morebetter_L["probs"], bins = 20)

fig3, axs3 = plt.subplots(1, 1, sharey=True, tight_layout=True)
df_morebetter_O = pd.DataFrame(data = {"probs": morebetter_probs_test_O[:,1], "gt_label":morebetter_labels_test_O, "text":list(test_dataset_morebetter.texts.values)})
axs3.hist(df_morebetter_L["probs"], bins = 20)

In [None]:
# Set up plots and view distributions of C(x) for Generic
fig1, axs1 = plt.subplots(1, 1, sharey=True, tight_layout=True)
df_generic_L = pd.DataFrame(data = {"probs": generic_probs_test_L[:,1], "gt_label":generic_labels_test_L, "text":list(test_dataset_generic.texts.values)})
axs1.hist(df_generic_L["probs"], bins = 20)

fig2, axs2 = plt.subplots(1, 1, sharey=True, tight_layout=True)
df_generic_P = pd.DataFrame(data = {"probs": generic_probs_test_P[:,1], "gt_label":generic_labels_test_P, "text":list(test_dataset_generic.texts.values)})
axs2.hist(df_generic_L["probs"], bins = 20)

fig3, axs3 = plt.subplots(1, 1, sharey=True, tight_layout=True)
df_generic_O = pd.DataFrame(data = {"probs": generic_probs_test_O[:,1], "gt_label":generic_labels_test_O, "text":list(test_dataset_generic.texts.values)})
axs3.hist(df_generic_L["probs"], bins = 20)

In [None]:
# Set up plots and view distributions of C(x) for Two Students
fig1, axs1 = plt.subplots(1, 1, sharey=True, tight_layout=True)
df_twoStudents_L = pd.DataFrame(data = {"probs": twoStudents_probs_test_L[:,1], "gt_label":twoStudents_labels_test_L, "text":list(test_dataset_twoStudents.texts.values)})
axs1.hist(df_twoStudents_L["probs"], bins = 20)

fig2, axs2 = plt.subplots(1, 1, sharey=True, tight_layout=True)
df_twoStudents_P = pd.DataFrame(data = {"probs": twoStudents_probs_test_P[:,1], "gt_label":twoStudents_labels_test_P, "text":list(test_dataset_twoStudents.texts.values)})
axs2.hist(df_twoStudents_L["probs"], bins = 20)

fig3, axs3 = plt.subplots(1, 1, sharey=True, tight_layout=True)
df_twoStudents_O = pd.DataFrame(data = {"probs": twoStudents_probs_test_O[:,1], "gt_label":twoStudents_labels_test_O, "text":list(test_dataset_twoStudents.texts.values)})
axs3.hist(df_twoStudents_L["probs"], bins = 20)

In [None]:
# Get binned calibration curves for Sources
binned_cal_curve1 = cal.generate_calibration_curve_binned(df_sources_L, num_bin = 10, binary = True)
binned_cal_curve1.plot(show_diagonal=True, filled = False)

binned_cal_curve2 = cal.generate_calibration_curve_binned(df_sources_P, num_bin = 10, binary = True)
binned_cal_curve2.plot(show_diagonal=True, filled = False)

binned_cal_curve2 = cal.generate_calibration_curve_binned(df_sources_O, num_bin = 10, binary = True)
binned_cal_curve2.plot(show_diagonal=True, filled = False)

In [None]:
# Get binned calibration curves for More/Better
binned_cal_curve1 = cal.generate_calibration_curve_binned(df_morebetter_L, num_bin = 10, binary = True)
binned_cal_curve1.plot(show_diagonal=True, filled = False)

binned_cal_curve2 = cal.generate_calibration_curve_binned(df_morebetter_P, num_bin = 10, binary = True)
binned_cal_curve2.plot(show_diagonal=True, filled = False)

binned_cal_curve2 = cal.generate_calibration_curve_binned(df_morebetter_O, num_bin = 10, binary = True)
binned_cal_curve2.plot(show_diagonal=True, filled = False)

In [None]:
# Get binned calibration curves for Generic
binned_cal_curve1 = cal.generate_calibration_curve_binned(df_generic_L, num_bin = 10, binary = True)
binned_cal_curve1.plot(show_diagonal=True, filled = False)

binned_cal_curve2 = cal.generate_calibration_curve_binned(df_generic_P, num_bin = 10, binary = True)
binned_cal_curve2.plot(show_diagonal=True, filled = False)

binned_cal_curve2 = cal.generate_calibration_curve_binned(df_generic_O, num_bin = 10, binary = True)
binned_cal_curve2.plot(show_diagonal=True, filled = False)

In [None]:
# Get binned calibration curves for Two Students
binned_cal_curve1 = cal.generate_calibration_curve_binned(df_twoStudents_L, num_bin = 10, binary = True)
binned_cal_curve1.plot(show_diagonal=True, filled = False)

binned_cal_curve2 = cal.generate_calibration_curve_binned(df_twoStudents_P, num_bin = 10, binary = True)
binned_cal_curve2.plot(show_diagonal=True, filled = False)

binned_cal_curve2 = cal.generate_calibration_curve_binned(df_twoStudents_O, num_bin = 10, binary = True)
binned_cal_curve2.plot(show_diagonal=True, filled = False)

In [None]:
# Get platt calibration curves for Sources and save as png files
plt.figure()
platt_cal_curve1S = cal.generate_calibration_curve_platt(df_sources_L, binary = True)
platt_cal_curve1S.plot(show_diagonal=True, filled = False)
plt.savefig("cal_curve_sources_L.png", bbox_inches='tight')
#plt.savefig("cal_curve.pdf", bbox_inches='tight')

plt.figure()
platt_cal_curve2S = cal.generate_calibration_curve_platt(df_sources_P, binary = True)
platt_cal_curve2S.plot(show_diagonal=True, filled = False)
plt.savefig("cal_curve_sources_P.png", bbox_inches='tight')

plt.figure()
platt_cal_curve3S = cal.generate_calibration_curve_platt(df_sources_O, binary = True)
platt_cal_curve3S.plot(show_diagonal=True, filled = False)
plt.savefig("cal_curve_sources_O.png", bbox_inches='tight')

In [None]:
# Get platt calibration curves for More/Better and save as png files
plt.figure()
platt_cal_curve1M = cal.generate_calibration_curve_platt(df_morebetter_L, binary = True)
platt_cal_curve1M.plot(show_diagonal=True, filled = False)
plt.savefig("cal_curve_morebetter_L.png", bbox_inches='tight')
#plt.savefig("cal_curve.pdf", bbox_inches='tight')

plt.figure()
platt_cal_curve2M = cal.generate_calibration_curve_platt(df_morebetter_P, binary = True)
platt_cal_curve2M.plot(show_diagonal=True, filled = False)
plt.savefig("cal_curve_morebetter_P.png", bbox_inches='tight')

plt.figure()
platt_cal_curve3M = cal.generate_calibration_curve_platt(df_morebetter_O, binary = True)
platt_cal_curve3M.plot(show_diagonal=True, filled = False)
plt.savefig("cal_curve_morebetter_O.png", bbox_inches='tight')

In [None]:
# Get platt calibration curves for Generic and save as png files
plt.figure()
platt_cal_curve1G = cal.generate_calibration_curve_platt(df_generic_L, binary = True)
platt_cal_curve1G.plot(show_diagonal=True, filled = False)
plt.savefig("cal_curve_generic_L.png", bbox_inches='tight')
#plt.savefig("cal_curve.pdf", bbox_inches='tight')

plt.figure()
platt_cal_curve2G = cal.generate_calibration_curve_platt(df_generic_P, binary = True)
platt_cal_curve2G.plot(show_diagonal=True, filled = False)
plt.savefig("cal_curve_generic_P.png", bbox_inches='tight')

plt.figure()
platt_cal_curve3G = cal.generate_calibration_curve_platt(df_generic_O, binary = True)
platt_cal_curve3G.plot(show_diagonal=True, filled = False)
plt.savefig("cal_curve_generic_O.png", bbox_inches='tight')

In [None]:
# Get platt calibration curves for More/Better and save as png files
plt.figure()
platt_cal_curve1T = cal.generate_calibration_curve_platt(df_twoStudents_L, binary = True)
platt_cal_curve1T.plot(show_diagonal=True, filled = False)
plt.savefig("cal_curve_twoStudents_L.png", bbox_inches='tight')
#plt.savefig("cal_curve.pdf", bbox_inches='tight')

plt.figure()
platt_cal_curve2T = cal.generate_calibration_curve_platt(df_twoStudents_P, binary = True)
platt_cal_curve2T.plot(show_diagonal=True, filled = False)
plt.savefig("cal_curve_twoStudents_P.png", bbox_inches='tight')

plt.figure()
platt_cal_curve3T = cal.generate_calibration_curve_platt(df_twoStudents_O, binary = True)
platt_cal_curve3T.plot(show_diagonal=True, filled = False)
plt.savefig("cal_curve_twoStudents_O.png", bbox_inches='tight')

## Calculating Performance Metrics: Accuracy, Balanced Accuracy and AUC

In [None]:
#Sources metrics
sources_performance = pd.DataFrame({"Code":["P","L", "O"], 
                                        "labels":[sources_labels_test_P,sources_labels_test_L,sources_labels_test_O],
                                        "preds":[sources_preds_test_P,sources_preds_test_L,sources_preds_test_O],
                                        "probs":[sources_probs_test_P[:,1],sources_probs_test_L[:,1],sources_probs_test_O[:,1]]
                                       })

sources_performance["Accuracy"] = sources_performance.apply(lambda row: accuracy_score(row["labels"],row["preds"]), axis = 1)
sources_performance["Balanced accuracy"] = sources_performance.apply(lambda row: balanced_accuracy_score(row["labels"],row["preds"]), axis = 1)
sources_performance["AUC"] = sources_performance.apply(lambda row: roc_auc_score(row["labels"],row["probs"]), axis = 1)
sources_performance = sources_performance.drop(["labels","preds","probs"], axis = 1)

In [None]:
#View the results
sources_performance.round(2)

In [None]:
#More/Better metrics
morebetter_performance = pd.DataFrame({"Code":["P","L", "O"], 
                                        "labels":[morebetter_labels_test_P,morebetter_labels_test_L,morebetter_labels_test_O],
                                        "preds":[morebetter_preds_test_P,morebetter_preds_test_L,morebetter_preds_test_O],
                                        "probs":[morebetter_probs_test_P[:,1],morebetter_probs_test_L[:,1],morebetter_probs_test_O[:,1]]
                                       })

morebetter_performance["Accuracy"] = morebetter_performance.apply(lambda row: accuracy_score(row["labels"],row["preds"]), axis = 1)
morebetter_performance["Balanced accuracy"] = morebetter_performance.apply(lambda row: balanced_accuracy_score(row["labels"],row["preds"]), axis = 1)
morebetter_performance["AUC"] = morebetter_performance.apply(lambda row: roc_auc_score(row["labels"],row["probs"]), axis = 1)
morebetter_performance = morebetter_performance.drop(["labels","preds","probs"], axis = 1)

In [None]:
#View the results
morebetter_performance.round(2)

In [None]:
#Generic metrics
generic_performance = pd.DataFrame({"Code":["P","L", "O"], 
                                        "labels":[generic_labels_test_P,generic_labels_test_L,generic_labels_test_O],
                                        "preds":[generic_preds_test_P,generic_preds_test_L,generic_preds_test_O],
                                        "probs":[generic_probs_test_P[:,1],generic_probs_test_L[:,1],generic_probs_test_O[:,1]]
                                       })

generic_performance["Accuracy"] = generic_performance.apply(lambda row: accuracy_score(row["labels"],row["preds"]), axis = 1)
generic_performance["Balanced accuracy"] = generic_performance.apply(lambda row: balanced_accuracy_score(row["labels"],row["preds"]), axis = 1)
generic_performance["AUC"] = generic_performance.apply(lambda row: roc_auc_score(row["labels"],row["probs"]), axis = 1)
generic_performance = generic_performance.drop(["labels","preds","probs"], axis = 1)

In [None]:
#View the results
generic_performance.round(2)

In [None]:
#Two Students metrics
twoStudents_performance = pd.DataFrame({"Code":["P","L", "O"], 
                                        "labels":[twoStudents_labels_test_P,twoStudents_labels_test_L,twoStudents_labels_test_O],
                                        "preds":[twoStudents_preds_test_P,twoStudents_preds_test_L,twoStudents_preds_test_O],
                                        "probs":[twoStudents_probs_test_P[:,1],twoStudents_probs_test_L[:,1],twoStudents_probs_test_O[:,1]]
                                       })

twoStudents_performance["Accuracy"] = twoStudents_performance.apply(lambda row: accuracy_score(row["labels"],row["preds"]), axis = 1)
twoStudents_performance["Balanced accuracy"] = twoStudents_performance.apply(lambda row: balanced_accuracy_score(row["labels"],row["preds"]), axis = 1)
twoStudents_performance["AUC"] = twoStudents_performance.apply(lambda row: roc_auc_score(row["labels"],row["probs"]), axis = 1)
twoStudents_performance = twoStudents_performance.drop(["labels","preds","probs"], axis = 1)

In [None]:
#View the results
twoStudents_performance.round(2)