In [1]:
!nvidia-smi

Sun Aug 18 02:35:20 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100 80GB PCIe          On  |   00000000:C2:00.0 Off |                    0 |
| N/A   42C    P0             73W /  300W |   30778MiB /  81920MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
pip install torch transformers


Note: you may need to restart the kernel to use updated packages.


In [3]:
import torch
from transformers import AutoTokenizer, AutoModel
import pandas as pd
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Load the model and tokenizer, and move the model to the GPU
model_name = 'sentence-transformers/all-MiniLM-L6-v2'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name).to('cuda')

# Define cosine similarity function
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

# Function to get embeddings on GPU
def get_embeddings(texts, batch_size=32):
    embeddings = []
    with torch.no_grad():
        for i in range(0, len(texts), batch_size):
            batch = texts[i:i + batch_size]
            tokens = tokenizer(batch, padding=True, truncation=True, return_tensors='pt').to('cuda')
            outputs = model(**tokens)
            batch_embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
            embeddings.extend(batch_embeddings)
    return embeddings

# Function to calculate embeddings for all texts
def calculate_embeddings(df):
    df['untrained_embedding'] = get_embeddings(df['untrained_prediction'].tolist())
    df['trained_embedding'] = get_embeddings(df['trained_prediction'].tolist())
    df['answer_embedding'] = get_embeddings(df['answer'].tolist())
    return df



In [5]:
# Load the dataset
df = pd.read_csv('predictions.csv')

# Calculate embeddings and store them in the DataFrame
df = calculate_embeddings(df)


In [6]:
# Function to calculate precision, recall, F1 score, and accuracy
def calculate_metrics(predictions, answers, threshold=0.8):
    similarity_scores = [
        cosine_similarity(pred_embedding, ans_embedding)
        for pred_embedding, ans_embedding in zip(predictions, answers)
    ]
    
    y_true = np.ones(len(answers))  # Ground truth: all answers are positive
    y_pred = np.array([1 if score >= threshold else 0 for score in similarity_scores])

    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    accuracy = accuracy_score(y_true, y_pred)
    
    return precision, recall, f1, accuracy

# Calculate metrics for untrained predictions
untrained_metrics = calculate_metrics(df['untrained_embedding'], df['answer_embedding'], threshold=0.8)
print(f'Untrained Model - Precision: {untrained_metrics[0]:.2%}, Recall: {untrained_metrics[1]:.2%}, F1 Score: {untrained_metrics[2]:.2%}, Accuracy: {untrained_metrics[3]:.2%}')

# Calculate metrics for trained predictions
trained_metrics = calculate_metrics(df['trained_embedding'], df['answer_embedding'], threshold=0.8)
print(f'Trained Model - Precision: {trained_metrics[0]:.2%}, Recall: {trained_metrics[1]:.2%}, F1 Score: {trained_metrics[2]:.2%}, Accuracy: {trained_metrics[3]:.2%}')


Untrained Model - Precision: 100.00%, Recall: 53.00%, F1 Score: 69.28%, Accuracy: 53.00%
Trained Model - Precision: 100.00%, Recall: 71.00%, F1 Score: 83.04%, Accuracy: 71.00%
