# Train Blending (BERT cased, DistilBERT uncased, DeBERTa)

Best F1 score BERT model on HS_C dataset are used

In [1]:
# This automates data tabulation onto google sheets 

import gspread
from oauth2client.service_account import ServiceAccountCredentials
from googleapiclient.discovery import build

import os

# new directory path
new_directory = '/Users/levan/ATENEO MASTERAL/Thesis'

# Change the current working directory
os.chdir(new_directory)

# Use creds to create a client to interact with the Google Drive API
scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('thesis-432315-12daec8d1ff6.json', scope)

service = build('sheets', 'v4', credentials=creds)

client = gspread.authorize(creds)

spreadsheet_id = '13Fk5oXX9B_mdHmNpMKQMy29y9iiHWrgQCa4hUTiQKD0' 

## Load Dataset

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, AutoConfig
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.utils.data import DataLoader, TensorDataset
import torch
import torch.nn.functional as F

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.metrics import precision_recall_curve, f1_score, confusion_matrix, precision_score, recall_score, accuracy_score


import os

# new directory path
new_directory = '/Users/levan/ATENEO MASTERAL/Thesis/Development'

# Change the current working directory
os.chdir(new_directory)

# Adjust file paths to local system
file_path = 'Corpus/FiReCS/FiReCS_data_b.csv'
df = pd.read_csv(file_path)

# Split the data
holdout_df, test_df = train_test_split(df, test_size=0.3, random_state=42)

## Load Models and Tokenizers

In [3]:
def load_model_and_tokenizer(model_path, tokenizer_path, base_model):
    # Load the tokenizer from the local directory
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
    
    # Load the configuration from the base model
    config = AutoConfig.from_pretrained(base_model, num_labels=3)

    # Initialize the model with the configuration
    model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config)

    # Ensure the model is in evaluation mode
    model.eval()
    
    return model, tokenizer

model_info = {
    
    'BERT cased': {
        'model_path': 'BERT models/1 FiReCS/fire_bert-base-cased-finetuned',
        'tokenizer_path': 'BERT models/1 FiReCS/fire_bert-base-cased-finetuned',
        'base_model': 'google-bert/bert-base-cased'
    },

    'DistilBERT uncased': {
        'model_path': 'BERT models/1 FiReCS/fire_distilbert-base-uncased-finetuned',
        'tokenizer_path': 'BERT models/1 FiReCS/fire_distilbert-base-uncased-finetuned',
        'base_model': 'distilbert/distilbert-base-uncased'
    },

    'DeBERTa': {
        'model_path': 'BERT models/1 FiReCS/fire_DeBERTa-finetuned',
        'tokenizer_path': 'BERT models/1 FiReCS/fire_DeBERTa-finetuned',
        'base_model': 'microsoft/deberta-v3-base'
    },
    
    'BERT uncased': {
        'model_path': 'BERT models/1 FiReCS/fire_bert-base-uncased-finetuned',
        'tokenizer_path': 'BERT models/1 FiReCS/fire_bert-base-uncased-finetuned',
        'base_model': 'google-bert/bert-base-uncased'
    },

}

models_and_tokenizers = {name: load_model_and_tokenizer(info['model_path'], 
                                                        info['tokenizer_path'], 
                                                        info['base_model']) 
                         for name, info in model_info.items()}

## Apply Tokenization

In [4]:
def texts_to_dataloader(texts, tokenizer, batch_size=32):
    device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
    encodings = tokenizer(texts, truncation=True, padding=True, max_length=512, return_tensors="pt")
    
    dataset = TensorDataset(encodings['input_ids'], encodings['attention_mask'])
    dataloader = DataLoader(dataset, batch_size=batch_size)
    return dataloader

## Perform Blending

In [5]:
%load_ext memory_profiler

In [6]:
%%memit

def get_meta_features(model, tokenizer, df, device):
    texts = df['review'].tolist()
    dataloader = texts_to_dataloader(texts, tokenizer, batch_size=32)
    
    model.to(device)
    model.eval()
    meta_features = []
    with torch.no_grad():
        for input_ids, attention_mask in dataloader:
            input_ids, attention_mask = input_ids.to(device), attention_mask.to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            probs = F.softmax(outputs.logits, dim=1)
            meta_features.append(probs.cpu().numpy())
    return np.concatenate(meta_features)

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# Generate meta-features for holdout and test sets
holdout_meta_features = [get_meta_features(model, tokenizer, holdout_df, device) for model, tokenizer in models_and_tokenizers.values()]
test_meta_features = [get_meta_features(model, tokenizer, test_df, device) for model, tokenizer in models_and_tokenizers.values()]

# Stack meta-features
holdout_meta_features = np.hstack(holdout_meta_features)
test_meta_features = np.hstack(test_meta_features)

peak memory: 2414.98 MiB, increment: 76.89 MiB


In [7]:
# Check the base model predictions
for name, (model, tokenizer) in models_and_tokenizers.items():
    print(f"Checking base model predictions for {name}...")
    base_model_preds = get_meta_features(model, tokenizer, test_df, device)
    print("Predictions distribution:", np.mean(base_model_preds, axis=0))

Checking base model predictions for BERT cased...
Predictions distribution: [0.34362876 0.36963773 0.28673354]
Checking base model predictions for DistilBERT uncased...
Predictions distribution: [0.33207488 0.33896688 0.32895863]
Checking base model predictions for DeBERTa...
Predictions distribution: [0.33203426 0.3266011  0.34136456]
Checking base model predictions for BERT uncased...
Predictions distribution: [0.32610813 0.3632681  0.3106237 ]


### Train LR

In [8]:
%%memit
# Train the LR meta-classifier
lr_meta_classifier = LogisticRegression()
lr_meta_classifier.fit(holdout_meta_features, holdout_df['label'].values)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


peak memory: 938.94 MiB, increment: 1.52 MiB


### Train NB

In [9]:
%%memit
# Train the NB meta-classifier
nb_meta_classifier = GaussianNB()
nb_meta_classifier.fit(holdout_meta_features, holdout_df['label'].values)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

peak memory: 939.00 MiB, increment: 0.03 MiB


### Train DT

In [10]:
%%memit
# Train the DT meta-classifier
dt_meta_classifier = DecisionTreeClassifier(max_depth=None)  
dt_meta_classifier.fit(holdout_meta_features, holdout_df['label'].values)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


peak memory: 925.59 MiB, increment: 0.30 MiB


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


### Train SVM

In [11]:
%%memit
# Train the SVM meta-classifier
svm_meta_classifier = SVC(kernel='linear', probability=True)
svm_meta_classifier.fit(holdout_meta_features, holdout_df['label'].values)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


peak memory: 935.47 MiB, increment: 9.88 MiB


## Evaluate Models

### LR

In [12]:
lr_final_predictions = lr_meta_classifier.predict(test_meta_features)

# Evaluate
precision = precision_score(test_df['label'].values, lr_final_predictions, average='macro')
recall = recall_score(test_df['label'].values, lr_final_predictions, average='macro')
accuracy = accuracy_score(test_df['label'].values, lr_final_predictions)
f1 = f1_score(test_df['label'].values, lr_final_predictions, average='macro')
conf_matrix = confusion_matrix(test_df['label'].values, lr_final_predictions)

print("Logistic Regression")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"Ensemble accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")
print("Confusion Matrix:")
print(conf_matrix)


Logistic Regression
Precision: 0.8183
Recall: 0.8159
Ensemble accuracy: 0.8143
F1 Score: 0.8170
Confusion Matrix:
[[392  82   1]
 [ 77 370  44]
 [  5  54 391]]


In [13]:
probabilities = lr_meta_classifier.predict_proba(test_meta_features)

# Calculate precision-recall curve and find optimal thresholds for each class
lr_thresh = []
for i in range(probabilities.shape[1]):  # Iterate over each class
    precision, recall, thresholds = precision_recall_curve(test_df['label'].values == i, probabilities[:, i])
    f1_scores = 2 * (precision * recall) / (precision + recall)
    optimal_idx = np.nanargmax(f1_scores)  # Handle cases where precision and recall are both zero
    lr_thresh.append(thresholds[optimal_idx])

# Apply the optimized thresholds to make adjusted predictions
adjusted_predictions = np.array([np.argmax([probabilities[i, j] if probabilities[i, j] >= lr_thresh[j] else 0
                                         for j in range(probabilities.shape[1])])
                              for i in range(len(probabilities))])

# Evaluate
adjusted_precision = precision_score(test_df['label'].values, adjusted_predictions, average='macro')
adjusted_recall = recall_score(test_df['label'].values, adjusted_predictions, average='macro')
adjusted_accuracy = accuracy_score(test_df['label'].values, adjusted_predictions)
adjusted_f1 = f1_score(test_df['label'].values, adjusted_predictions, average='macro')
conf_matrix = confusion_matrix(test_df['label'].values, adjusted_predictions)

print("Optimized thresholds for each class:", lr_thresh)
print("Logistic Regression")
print(f"Precision: {adjusted_precision:.4f}")
print(f"Recall: {adjusted_recall:.4f}")
print(f"Ensemble accuracy: {adjusted_accuracy:.4f}")
print(f"F1 Score: {adjusted_f1}")
print("Confusion Matrix:")
print(conf_matrix)

Optimized thresholds for each class: [0.26148041323664567, 0.4295520067946583, 0.3475124870580583]
Logistic Regression
Precision: 0.8188
Recall: 0.8166
Ensemble accuracy: 0.8150
F1 Score: 0.817611841945257
Confusion Matrix:
[[393  81   1]
 [ 77 370  44]
 [  6  53 391]]


In [14]:
# SAVE TO GOOGLE SHEET

# Define the range and values to update
range_name = '4-1!B8:E8'  

values = [[
    f"{adjusted_precision * 100:.2f}",
    f"{adjusted_recall * 100:.2f}",
    f"{adjusted_accuracy * 100:.2f}",
    f"{adjusted_f1 * 100:.2f}"
]]

# Prepare the request body
body = {
    'values': values
}

# Call the Sheets API to update the values
result = service.spreadsheets().values().update(
    spreadsheetId=spreadsheet_id, 
    range=range_name,
    valueInputOption='USER_ENTERED',
    body=body
).execute()

print('Updated cells count:', result.get('updatedCells'))

Updated cells count: 4


### NB

In [15]:
nb_final_predictions = nb_meta_classifier.predict(test_meta_features)

# Evaluate
precision = precision_score(test_df['label'].values, nb_final_predictions, average='macro')
recall = recall_score(test_df['label'].values, nb_final_predictions, average='macro')
accuracy = accuracy_score(test_df['label'].values, nb_final_predictions)
f1 = f1_score(test_df['label'].values, nb_final_predictions, average='macro')
conf_matrix = confusion_matrix(test_df['label'].values, nb_final_predictions)

print("Naive Bayes")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall}")
print(f"Ensemble accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")
print("Confusion Matrix:")
print(conf_matrix)

Naive Bayes
Precision: 0.8095
Recall: 0.8082884513841743
Ensemble accuracy: 0.8065
F1 Score: 0.8087
Confusion Matrix:
[[399  74   2]
 [ 86 357  48]
 [  4  60 386]]


In [16]:
probabilities = nb_meta_classifier.predict_proba(test_meta_features)

# Calculate precision-recall curve and find optimal thresholds for each class
nb_thresh = []
for i in range(probabilities.shape[1]):  # Iterate over each class
    precision, recall, thresholds = precision_recall_curve(test_df['label'].values == i, probabilities[:, i])
    f1_scores = 2 * (precision * recall) / (precision + recall)
    optimal_idx = np.nanargmax(f1_scores)  # Handle cases where precision and recall are both zero
    nb_thresh.append(thresholds[optimal_idx])

# Apply the optimized thresholds to make adjusted predictions
adjusted_predictions = np.array([np.argmax([probabilities[i, j] if probabilities[i, j] >= nb_thresh[j] else 0
                                         for j in range(probabilities.shape[1])])
                              for i in range(len(probabilities))])

# Evaluate
adjusted_precision = precision_score(test_df['label'].values, adjusted_predictions, average='macro')
adjusted_recall = recall_score(test_df['label'].values, adjusted_predictions, average='macro')
adjusted_accuracy = accuracy_score(test_df['label'].values, adjusted_predictions)
adjusted_f1 = f1_score(test_df['label'].values, adjusted_predictions, average='macro')
conf_matrix = confusion_matrix(test_df['label'].values, adjusted_predictions)

print("Optimized thresholds for each class:", nb_thresh)
print("Logistic Regression")
print(f"Precision: {adjusted_precision:.4f}")
print(f"Recall: {adjusted_recall:.4f}")
print(f"Ensemble accuracy: {adjusted_accuracy:.4f}")
print(f"F1 Score: {adjusted_f1}")
print("Confusion Matrix:")
print(conf_matrix)

Optimized thresholds for each class: [0.16598397872861453, 0.0026890821420721377, 0.0006331427528930969]
Logistic Regression
Precision: 0.8095
Recall: 0.8083
Ensemble accuracy: 0.8065
F1 Score: 0.8087400782309007
Confusion Matrix:
[[399  74   2]
 [ 86 357  48]
 [  4  60 386]]


  f1_scores = 2 * (precision * recall) / (precision + recall)


In [17]:
# SAVE TO GOOGLE SHEET

# Define the range and values to update
range_name = '4-1!B9:E9'  

values = [[
    f"{adjusted_precision * 100:.2f}",
    f"{adjusted_recall * 100:.2f}",
    f"{adjusted_accuracy * 100:.2f}",
    f"{adjusted_f1 * 100:.2f}"
]]

# Prepare the request body
body = {
    'values': values
}

# Call the Sheets API to update the values
result = service.spreadsheets().values().update(
    spreadsheetId=spreadsheet_id, 
    range=range_name,
    valueInputOption='USER_ENTERED',
    body=body
).execute()

print('Updated cells count:', result.get('updatedCells'))

Updated cells count: 4


### DT

In [18]:
dt_final_predictions = dt_meta_classifier.predict(test_meta_features)

# Evaluate
precision = precision_score(test_df['label'].values, dt_final_predictions, average='macro')
recall = recall_score(test_df['label'].values, dt_final_predictions, average='macro')
accuracy = accuracy_score(test_df['label'].values, dt_final_predictions)
f1 = f1_score(test_df['label'].values, dt_final_predictions, average='macro')
conf_matrix = confusion_matrix(test_df['label'].values, dt_final_predictions)

print("Decision Tree")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"Ensemble accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")
print("Confusion Matrix:")
print(conf_matrix)

Decision Tree
Precision: 0.7512
Recall: 0.7521
Ensemble accuracy: 0.7493
F1 Score: 0.7516
Confusion Matrix:
[[364 105   6]
 [105 318  68]
 [ 10  61 379]]


In [19]:
probabilities = dt_meta_classifier.predict_proba(test_meta_features)

# Calculate precision-recall curve and find optimal thresholds for each class
dt_thresh = []
for i in range(probabilities.shape[1]):  # Iterate over each class
    precision, recall, thresholds = precision_recall_curve(test_df['label'].values == i, probabilities[:, i])
    f1_scores = 2 * (precision * recall) / (precision + recall)
    optimal_idx = np.nanargmax(f1_scores)  # Handle cases where precision and recall are both zero
    dt_thresh.append(thresholds[optimal_idx])

# Apply the optimized thresholds to make adjusted predictions
adjusted_predictions = np.array([np.argmax([probabilities[i, j] if probabilities[i, j] >= dt_thresh[j] else 0
                                         for j in range(probabilities.shape[1])])
                              for i in range(len(probabilities))])

# Evaluate
adjusted_precision = precision_score(test_df['label'].values, adjusted_predictions, average='macro')
adjusted_recall = recall_score(test_df['label'].values, adjusted_predictions, average='macro')
adjusted_accuracy = accuracy_score(test_df['label'].values, adjusted_predictions)
adjusted_f1 = f1_score(test_df['label'].values, adjusted_predictions, average='macro')
conf_matrix = confusion_matrix(test_df['label'].values, adjusted_predictions)

print("Optimized thresholds for each class:", dt_thresh)
print("Logistic Regression")
print(f"Precision: {adjusted_precision:.4f}")
print(f"Recall: {adjusted_recall:.4f}")
print(f"Ensemble accuracy: {adjusted_accuracy:.4f}")
print(f"F1 Score: {adjusted_f1}")
print("Confusion Matrix:")
print(conf_matrix)

Optimized thresholds for each class: [1.0, 1.0, 1.0]
Logistic Regression
Precision: 0.7512
Recall: 0.7521
Ensemble accuracy: 0.7493
F1 Score: 0.7516115198080972
Confusion Matrix:
[[364 105   6]
 [105 318  68]
 [ 10  61 379]]


In [20]:
# SAVE TO GOOGLE SHEET

# Define the range and values to update
range_name = '4-1!B10:E10'  

values = [[
    f"{adjusted_precision * 100:.2f}",
    f"{adjusted_recall * 100:.2f}",
    f"{adjusted_accuracy * 100:.2f}",
    f"{adjusted_f1 * 100:.2f}"
]]

# Prepare the request body
body = {
    'values': values
}

# Call the Sheets API to update the values
result = service.spreadsheets().values().update(
    spreadsheetId=spreadsheet_id, 
    range=range_name,
    valueInputOption='USER_ENTERED',
    body=body
).execute()

print('Updated cells count:', result.get('updatedCells'))

Updated cells count: 4


### SVM

In [21]:
svm_final_predictions = svm_meta_classifier.predict(test_meta_features)

# Evaluate
precision = precision_score(test_df['label'].values, svm_final_predictions, average='macro')
recall = recall_score(test_df['label'].values, svm_final_predictions, average='macro')
accuracy = accuracy_score(test_df['label'].values, svm_final_predictions)
f1 = f1_score(test_df['label'].values, svm_final_predictions, average='macro')
conf_matrix = confusion_matrix(test_df['label'].values, svm_final_predictions)

print("SVM")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"Ensemble accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")
print("Confusion Matrix:")
print(conf_matrix)

SVM
Precision: 0.8137
Recall: 0.8094
Ensemble accuracy: 0.8079
F1 Score: 0.8112
Confusion Matrix:
[[390  84   1]
 [ 83 369  39]
 [  5  60 385]]


In [22]:
probabilities = svm_meta_classifier.predict_proba(test_meta_features)

# Calculate precision-recall curve and find optimal thresholds for each class
svm_thresh = []
for i in range(probabilities.shape[1]):  # Iterate over each class
    precision, recall, thresholds = precision_recall_curve(test_df['label'].values == i, probabilities[:, i])
    f1_scores = 2 * (precision * recall) / (precision + recall)
    optimal_idx = np.nanargmax(f1_scores)  # Handle cases where precision and recall are both zero
    svm_thresh.append(thresholds[optimal_idx])

# Apply the optimized thresholds to make adjusted predictions
adjusted_predictions = np.array([np.argmax([probabilities[i, j] if probabilities[i, j] >= svm_thresh[j] else 0
                                         for j in range(probabilities.shape[1])])
                              for i in range(len(probabilities))])

# Evaluate
adjusted_precision = precision_score(test_df['label'].values, adjusted_predictions, average='macro')
adjusted_recall = recall_score(test_df['label'].values, adjusted_predictions, average='macro')
adjusted_accuracy = accuracy_score(test_df['label'].values, adjusted_predictions)
adjusted_f1 = f1_score(test_df['label'].values, adjusted_predictions, average='macro')
conf_matrix = confusion_matrix(test_df['label'].values, adjusted_predictions)

print("Optimized thresholds for each class:", svm_thresh)
print("Logistic Regression")
print(f"Precision: {adjusted_precision:.4f}")
print(f"Recall: {adjusted_recall:.4f}")
print(f"Ensemble accuracy: {adjusted_accuracy:.4f}")
print(f"F1 Score: {adjusted_f1}")
print("Confusion Matrix:")
print(conf_matrix)

Optimized thresholds for each class: [0.2635124920545949, 0.4195320505311737, 0.32984560886007913]
Logistic Regression
Precision: 0.8154
Recall: 0.8124
Ensemble accuracy: 0.8107
F1 Score: 0.8137269284864223
Confusion Matrix:
[[388  86   1]
 [ 80 369  42]
 [  5  54 391]]


In [23]:
# SAVE TO GOOGLE SHEET

# Define the range and values to update
range_name = '4-1!B11:E11'  

values = [[
    f"{adjusted_precision * 100:.2f}",
    f"{adjusted_recall * 100:.2f}",
    f"{adjusted_accuracy * 100:.2f}",
    f"{adjusted_f1 * 100:.2f}"
]]

# Prepare the request body
body = {
    'values': values
}

# Call the Sheets API to update the values
result = service.spreadsheets().values().update(
    spreadsheetId=spreadsheet_id, 
    range=range_name,
    valueInputOption='USER_ENTERED',
    body=body
).execute()

print('Updated cells count:', result.get('updatedCells'))

Updated cells count: 4


# Validate on Data C

## Load Data C Dataset

In [24]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Load validation data
validation_file_path = 'Corpus/FiReCS/FiReCS_data_c.csv'
validation_df = pd.read_csv(validation_file_path)

## Perform Blending

In [25]:
%%memit

# Generate meta-features for validation set
validation_meta_features = [get_meta_features(model, tokenizer, validation_df, device) 
                            for model, tokenizer in models_and_tokenizers.values()]

# Stack meta-features
validation_meta_features = np.hstack(validation_meta_features)

# Predict with LR meta-classifier
lr_validation_predictions = lr_meta_classifier.predict(validation_meta_features)

# Predict with NB meta-classifier
nb_validation_predictions = nb_meta_classifier.predict(validation_meta_features)

# Predict with DT meta-classifier
dt_validation_predictions = dt_meta_classifier.predict(validation_meta_features)

# Predict with SVM meta-classifier
svm_validation_predictions = svm_meta_classifier.predict(validation_meta_features)

# Extract true labels from the validation set
true_labels = validation_df['label'].values

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


peak memory: 1220.41 MiB, increment: 120.78 MiB


## Validate Models

### LR

In [26]:
# Calculate metrics
accuracy = accuracy_score(true_labels, lr_validation_predictions)
precision = precision_score(true_labels, lr_validation_predictions, average='macro')
recall = recall_score(true_labels, lr_validation_predictions, average='macro')
f1 = f1_score(true_labels, lr_validation_predictions, average='macro')
conf_matrix = confusion_matrix(true_labels, lr_validation_predictions)

# Print the metrics
print("Logistic Regression")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("Confusion Matrix:\n", conf_matrix)

Logistic Regression
Accuracy: 0.8036
Precision: 0.8078
Recall: 0.8047
F1 Score: 0.8060
Confusion Matrix:
 [[278  62   1]
 [ 50 272  42]
 [  4  47 293]]


#### Optimized LR

In [27]:
lr_validation_probabilities = lr_meta_classifier.predict_proba(validation_meta_features)

# Apply thresholds to the validation set probabilities to make final predictions
lr_validation_predictions = np.array([np.argmax([lr_validation_probabilities[i, j] if lr_validation_probabilities[i, j] >= lr_thresh[j] else 0
                                              for j in range(lr_validation_probabilities.shape[1])])
                                   for i in range(len(lr_validation_probabilities))])

# Evaluate threshold on validation set
adjusted_precision = precision_score(true_labels, lr_validation_predictions, average='macro')
adjusted_recall = recall_score(true_labels, lr_validation_predictions, average='macro')
adjusted_accuracy = accuracy_score(true_labels, lr_validation_predictions)
adjusted_f1 = f1_score(true_labels, lr_validation_predictions, average='macro')
adjusted_conf_matrix = confusion_matrix(true_labels, lr_validation_predictions)

# Print the evaluation results
print("Validation Results")
print(f"Precision: {adjusted_precision:.4f}")
print(f"Recall: {adjusted_recall:.4f}")
print(f"Accuracy: {adjusted_accuracy:.4f}")
print(f"F1 Score: {adjusted_f1:.4f}")
print("Confusion Matrix:")
print(adjusted_conf_matrix)


Validation Results
Precision: 0.8085
Recall: 0.8057
Accuracy: 0.8046
F1 Score: 0.8069
Confusion Matrix:
[[278  62   1]
 [ 50 272  42]
 [  4  46 294]]


In [28]:
# SAVE TO GOOGLE SHEET

# Define the range and values to update
range_name = '4-1!F8:I8'  

values = [[
    f"{adjusted_precision * 100:.2f}",
    f"{adjusted_recall * 100:.2f}",
    f"{adjusted_accuracy * 100:.2f}",
    f"{adjusted_f1 * 100:.2f}"
]]

# Prepare the request body
body = {
    'values': values
}

# Call the Sheets API to update the values
result = service.spreadsheets().values().update(
    spreadsheetId=spreadsheet_id, 
    range=range_name,
    valueInputOption='USER_ENTERED',
    body=body
).execute()

print('Updated cells count:', result.get('updatedCells'))

Updated cells count: 4


#### Save Confusion Matrix

In [29]:
import altair as alt
import altair_saver
import pandas as pd
from sklearn.metrics import confusion_matrix

# Calculate the confusion matrix
cm = confusion_matrix(true_labels, lr_validation_predictions)

# Define class names
class_names = ['Negative', 'Neutral', 'Positive']

# Convert confusion matrix to DataFrame
cm_df = pd.DataFrame(cm, index=class_names, columns=class_names).reset_index().melt(id_vars='index')
cm_df.columns = ['True', 'Predicted', 'Count']

# Ensure the order of categories
cm_df['True'] = pd.Categorical(cm_df['True'], categories=class_names, ordered=True)
cm_df['Predicted'] = pd.Categorical(cm_df['Predicted'], categories=class_names, ordered=True)

# Create the Altair plot
heatmap = alt.Chart(cm_df).mark_rect().encode(
    x=alt.X('Predicted:O', sort=class_names),
    y=alt.Y('True:O', sort=class_names),
    color='Count:Q',
    tooltip=['True', 'Predicted', 'Count']
).properties(
    width=400,
    height=300,
    title='OF_Using_FIRE 4B-1 LR'
)

# Add text labels
text = heatmap.mark_text(
    align='center',
    baseline='middle',
    fontSize=12
).encode(
    text='Count:Q',
    color=alt.condition(
        alt.datum.Count > cm.max() / 2,
        alt.value('white'),
        alt.value('black')
    )
)

# Combine heatmap and text
final_chart = heatmap + text

# Display the plot
final_chart.show()

In [30]:

# Specify the folder path
folder_path = os.path.expanduser('Results/Ensemble Model Results/On FireCS dataset/OF Using FIRE ENSEMBLE/Blending/')

# Save the plot using vl-convert
file_path_png = os.path.join(folder_path, 'OF_Using_FIRE 4B-1 LR.png')
final_chart.save(file_path_png)

print(f"Plot saved to {file_path_png}")

Plot saved to Results/Ensemble Model Results/On FireCS dataset/OF Using FIRE ENSEMBLE/Blending/OF_Using_FIRE 4B-1 LR.png


### NB

In [31]:
# Calculate metrics
accuracy = accuracy_score(true_labels, nb_validation_predictions)
precision = precision_score(true_labels, nb_validation_predictions, average='macro')
recall = recall_score(true_labels, nb_validation_predictions, average='macro')
f1 = f1_score(true_labels, nb_validation_predictions, average='macro')
conf_matrix = confusion_matrix(true_labels, nb_validation_predictions)

# Print the metrics
print("Naive Bayes")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("Confusion Matrix:\n", conf_matrix)

Naive Bayes
Accuracy: 0.8046
Precision: 0.8078
Recall: 0.8059
F1 Score: 0.8068
Confusion Matrix:
 [[279  61   1]
 [ 54 269  41]
 [  3  45 296]]


#### Optimized NB

In [32]:
nb_validation_probabilities = nb_meta_classifier.predict_proba(validation_meta_features)

# Apply thresholds to the validation set probabilities to make final predictions
nb_validation_predictions = np.array([np.argmax([nb_validation_probabilities[i, j] if nb_validation_probabilities[i, j] >= nb_thresh[j] else 0
                                              for j in range(nb_validation_probabilities.shape[1])])
                                   for i in range(len(nb_validation_probabilities))])

# Evaluate threshold on validation set
adjusted_precision = precision_score(true_labels, nb_validation_predictions, average='macro')
adjusted_recall = recall_score(true_labels, nb_validation_predictions, average='macro')
adjusted_accuracy = accuracy_score(true_labels, nb_validation_predictions)
adjusted_f1 = f1_score(true_labels, nb_validation_predictions, average='macro')
adjusted_conf_matrix = confusion_matrix(true_labels, nb_validation_predictions)

# Print the evaluation results
print("Validation Results")
print(f"Precision: {adjusted_precision:.4f}")
print(f"Recall: {adjusted_recall:.4f}")
print(f"Accuracy: {adjusted_accuracy:.4f}")
print(f"F1 Score: {adjusted_f1:.4f}")
print("Confusion Matrix:")
print(adjusted_conf_matrix)


Validation Results
Precision: 0.8078
Recall: 0.8059
Accuracy: 0.8046
F1 Score: 0.8068
Confusion Matrix:
[[279  61   1]
 [ 54 269  41]
 [  3  45 296]]


In [33]:
# SAVE TO GOOGLE SHEET

# Define the range and values to update
range_name = '4-1!F9:I9'  

values = [[
    f"{adjusted_precision * 100:.2f}",
    f"{adjusted_recall * 100:.2f}",
    f"{adjusted_accuracy * 100:.2f}",
    f"{adjusted_f1 * 100:.2f}"
]]

# Prepare the request body
body = {
    'values': values
}

# Call the Sheets API to update the values
result = service.spreadsheets().values().update(
    spreadsheetId=spreadsheet_id, 
    range=range_name,
    valueInputOption='USER_ENTERED',
    body=body
).execute()

print('Updated cells count:', result.get('updatedCells'))

Updated cells count: 4


#### Save Confusion Matrix

In [34]:
import altair as alt
import pandas as pd
from sklearn.metrics import confusion_matrix

# Calculate the confusion matrix
cm = confusion_matrix(true_labels, nb_validation_predictions)

# Define class names
class_names = ['Negative', 'Neutral', 'Positive']

# Convert confusion matrix to DataFrame
cm_df = pd.DataFrame(cm, index=class_names, columns=class_names).reset_index().melt(id_vars='index')
cm_df.columns = ['True', 'Predicted', 'Count']

# Ensure the order of categories
cm_df['True'] = pd.Categorical(cm_df['True'], categories=class_names, ordered=True)
cm_df['Predicted'] = pd.Categorical(cm_df['Predicted'], categories=class_names, ordered=True)

# Create the Altair plot
heatmap = alt.Chart(cm_df).mark_rect().encode(
    x=alt.X('Predicted:O', sort=class_names),
    y=alt.Y('True:O', sort=class_names),
    color='Count:Q',
    tooltip=['True', 'Predicted', 'Count']
).properties(
    width=400,
    height=300,
    title='OF_Using_FIRE 4B-1 NB'
)

# Add text labels
text = heatmap.mark_text(
    align='center',
    baseline='middle',
    fontSize=12
).encode(
    text='Count:Q',
    color=alt.condition(
        alt.datum.Count > cm.max() / 2,
        alt.value('white'),
        alt.value('black')
    )
)

# Combine heatmap and text
final_chart = heatmap + text

# Display the plot
final_chart.show()

In [35]:
# Specify the folder path
folder_path = os.path.expanduser('Results/Ensemble Model Results/On FireCS dataset/OF Using FIRE ENSEMBLE/Blending/')

# Save the plot using vl-convert
file_path_png = os.path.join(folder_path, 'OF_Using_FIRE 4B-1 NB.png')
final_chart.save(file_path_png)

print(f"Plot saved to {file_path_png}")

Plot saved to Results/Ensemble Model Results/On FireCS dataset/OF Using FIRE ENSEMBLE/Blending/OF_Using_FIRE 4B-1 NB.png


### DT

In [36]:
# Calculate metrics
accuracy = accuracy_score(true_labels, dt_validation_predictions)
precision = precision_score(true_labels, dt_validation_predictions, average='macro')
recall = recall_score(true_labels, dt_validation_predictions, average='macro')
f1 = f1_score(true_labels, dt_validation_predictions, average='macro')
conf_matrix = confusion_matrix(true_labels, dt_validation_predictions)

# Print the metrics
print("Decision Tree")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("Confusion Matrix:\n", conf_matrix)

Decision Tree
Accuracy: 0.7464
Precision: 0.7479
Recall: 0.7482
F1 Score: 0.7480
Confusion Matrix:
 [[260  68  13]
 [ 73 239  52]
 [  4  56 284]]


#### Optimized DT

In [37]:
dt_validation_probabilities = dt_meta_classifier.predict_proba(validation_meta_features)

# Apply thresholds to the validation set probabilities to make final predictions
dt_validation_predictions = np.array([np.argmax([dt_validation_probabilities[i, j] if dt_validation_probabilities[i, j] >= dt_thresh[j] else 0
                                              for j in range(dt_validation_probabilities.shape[1])])
                                   for i in range(len(dt_validation_probabilities))])

# Evaluate threshold on validation set
adjusted_precision = precision_score(true_labels, dt_validation_predictions, average='macro')
adjusted_recall = recall_score(true_labels, dt_validation_predictions, average='macro')
adjusted_accuracy = accuracy_score(true_labels, dt_validation_predictions)
adjusted_f1 = f1_score(true_labels, dt_validation_predictions, average='macro')
adjusted_conf_matrix = confusion_matrix(true_labels, dt_validation_predictions)

# Print the evaluation results
print("Validation Results")
print(f"Precision: {adjusted_precision:.4f}")
print(f"Recall: {adjusted_recall:.4f}")
print(f"Accuracy: {adjusted_accuracy:.4f}")
print(f"F1 Score: {adjusted_f1:.4f}")
print("Confusion Matrix:")
print(adjusted_conf_matrix)


Validation Results
Precision: 0.7479
Recall: 0.7482
Accuracy: 0.7464
F1 Score: 0.7480
Confusion Matrix:
[[260  68  13]
 [ 73 239  52]
 [  4  56 284]]


In [38]:
# SAVE TO GOOGLE SHEET

# Define the range and values to update
range_name = '4-1!F10:I10'  

values = [[
    f"{adjusted_precision * 100:.2f}",
    f"{adjusted_recall * 100:.2f}",
    f"{adjusted_accuracy * 100:.2f}",
    f"{adjusted_f1 * 100:.2f}"
]]

# Prepare the request body
body = {
    'values': values
}

# Call the Sheets API to update the values
result = service.spreadsheets().values().update(
    spreadsheetId=spreadsheet_id, 
    range=range_name,
    valueInputOption='USER_ENTERED',
    body=body
).execute()

print('Updated cells count:', result.get('updatedCells'))

Updated cells count: 4


#### Save Confusion Matrix

In [39]:
import altair as alt
import pandas as pd
from sklearn.metrics import confusion_matrix

# Calculate the confusion matrix
cm = confusion_matrix(true_labels, dt_validation_predictions)

# Define class names
class_names = ['Negative', 'Neutral', 'Positive']

# Convert confusion matrix to DataFrame
cm_df = pd.DataFrame(cm, index=class_names, columns=class_names).reset_index().melt(id_vars='index')
cm_df.columns = ['True', 'Predicted', 'Count']

# Ensure the order of categories
cm_df['True'] = pd.Categorical(cm_df['True'], categories=class_names, ordered=True)
cm_df['Predicted'] = pd.Categorical(cm_df['Predicted'], categories=class_names, ordered=True)

# Create the Altair plot
heatmap = alt.Chart(cm_df).mark_rect().encode(
    x=alt.X('Predicted:O', sort=class_names),
    y=alt.Y('True:O', sort=class_names),
    color='Count:Q',
    tooltip=['True', 'Predicted', 'Count']
).properties(
    width=400,
    height=300,
    title='OF_Using_FIRE 4B-1 DT'
)

# Add text labels
text = heatmap.mark_text(
    align='center',
    baseline='middle',
    fontSize=12
).encode(
    text='Count:Q',
    color=alt.condition(
        alt.datum.Count > cm.max() / 2,
        alt.value('white'),
        alt.value('black')
    )
)

# Combine heatmap and text
final_chart = heatmap + text

# Display the plot
final_chart.show()

In [40]:
# Specify the folder path
folder_path = os.path.expanduser('Results/Ensemble Model Results/On FireCS dataset/OF Using FIRE ENSEMBLE/Blending/')

# Save the plot using vl-convert
file_path_png = os.path.join(folder_path, 'OF_Using_FIRE 4B-1 DT.png')
final_chart.save(file_path_png)

print(f"Plot saved to {file_path_png}")

Plot saved to Results/Ensemble Model Results/On FireCS dataset/OF Using FIRE ENSEMBLE/Blending/OF_Using_FIRE 4B-1 DT.png


### SVM

In [41]:
# Calculate metrics
accuracy = accuracy_score(true_labels, svm_validation_predictions)
precision = precision_score(true_labels, svm_validation_predictions, average='macro')
recall = recall_score(true_labels, svm_validation_predictions, average='macro')
f1 = f1_score(true_labels, svm_validation_predictions, average='macro')
conf_matrix = confusion_matrix(true_labels, svm_validation_predictions)

# Print the metrics
print("SVM")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("Confusion Matrix:\n", conf_matrix)

SVM
Accuracy: 0.8027
Precision: 0.8077
Recall: 0.8036
F1 Score: 0.8053
Confusion Matrix:
 [[275  65   1]
 [ 51 274  39]
 [  4  47 293]]


#### Optimized SVM

In [42]:
svm_validation_probabilities = svm_meta_classifier.predict_proba(validation_meta_features)

# Apply thresholds to the validation set probabilities to make final predictions
svm_validation_predictions = np.array([np.argmax([svm_validation_probabilities[i, j] if svm_validation_probabilities[i, j] >= svm_thresh[j] else 0
                                              for j in range(svm_validation_probabilities.shape[1])])
                                   for i in range(len(svm_validation_probabilities))])

# Evaluate threshold on validation set
adjusted_precision = precision_score(true_labels, svm_validation_predictions, average='macro')
adjusted_recall = recall_score(true_labels, svm_validation_predictions, average='macro')
adjusted_accuracy = accuracy_score(true_labels, svm_validation_predictions)
adjusted_f1 = f1_score(true_labels, svm_validation_predictions, average='macro')
adjusted_conf_matrix = confusion_matrix(true_labels, svm_validation_predictions)

# Print the evaluation results
print("Validation Results")
print(f"Precision: {adjusted_precision:.4f}")
print(f"Recall: {adjusted_recall:.4f}")
print(f"Accuracy: {adjusted_accuracy:.4f}")
print(f"F1 Score: {adjusted_f1:.4f}")
print("Confusion Matrix:")
print(adjusted_conf_matrix)


Validation Results
Precision: 0.8051
Recall: 0.8018
Accuracy: 0.8008
F1 Score: 0.8032
Confusion Matrix:
[[274  65   2]
 [ 51 272  41]
 [  4  46 294]]


In [43]:
# SAVE TO GOOGLE SHEET

# Define the range and values to update
range_name = '4-1!F11:I11'  

values = [[
    f"{adjusted_precision * 100:.2f}",
    f"{adjusted_recall * 100:.2f}",
    f"{adjusted_accuracy * 100:.2f}",
    f"{adjusted_f1 * 100:.2f}"
]]

# Prepare the request body
body = {
    'values': values
}

# Call the Sheets API to update the values
result = service.spreadsheets().values().update(
    spreadsheetId=spreadsheet_id, 
    range=range_name,
    valueInputOption='USER_ENTERED',
    body=body
).execute()

print('Updated cells count:', result.get('updatedCells'))

Updated cells count: 4


#### Save Confusion Matrix

In [44]:
import altair as alt
import pandas as pd
from sklearn.metrics import confusion_matrix

# Calculate the confusion matrix
cm = confusion_matrix(true_labels, svm_validation_predictions)

# Define class names
class_names = ['Negative', 'Neutral', 'Positive']

# Convert confusion matrix to DataFrame
cm_df = pd.DataFrame(cm, index=class_names, columns=class_names).reset_index().melt(id_vars='index')
cm_df.columns = ['True', 'Predicted', 'Count']

# Ensure the order of categories
cm_df['True'] = pd.Categorical(cm_df['True'], categories=class_names, ordered=True)
cm_df['Predicted'] = pd.Categorical(cm_df['Predicted'], categories=class_names, ordered=True)

# Create the Altair plot
heatmap = alt.Chart(cm_df).mark_rect().encode(
    x=alt.X('Predicted:O', sort=class_names),
    y=alt.Y('True:O', sort=class_names),
    color='Count:Q',
    tooltip=['True', 'Predicted', 'Count']
).properties(
    width=400,
    height=300,
    title='OF_Using_FIRE 4B-1 SVM'
)

# Add text labels
text = heatmap.mark_text(
    align='center',
    baseline='middle',
    fontSize=12
).encode(
    text='Count:Q',
    color=alt.condition(
        alt.datum.Count > cm.max() / 2,
        alt.value('white'),
        alt.value('black')
    )
)

# Combine heatmap and text
final_chart = heatmap + text

# Display the plot
final_chart.show()

In [45]:
# Specify the folder path
folder_path = os.path.expanduser('Results/Ensemble Model Results/On FireCS dataset/OF Using FIRE ENSEMBLE/Blending/')

# Save the plot using vl-convert
file_path_png = os.path.join(folder_path, 'OF_Using_FIRE 4B-1 SVM.png')
final_chart.save(file_path_png)

print(f"Plot saved to {file_path_png}")

Plot saved to Results/Ensemble Model Results/On FireCS dataset/OF Using FIRE ENSEMBLE/Blending/OF_Using_FIRE 4B-1 SVM.png
