# Shared-Private Architecture

In [1]:
# Importing from the src directory
import sys
sys.path.append('./src')

# Import necessary modules and functions
import torch
from torch.utils.data import DataLoader
from data import HumorDataset, sharedprivate_load_and_split_data
from models import SharedPrivateModel
from training import sharedprivate_train_model, sharedprivate_evaluate_model

# Device configuration for MPS compatibility
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
print(f"Using device: {device}")

# Parameters
MAX_LENGTH = 64
BATCH_SIZE = 32  # Adjusted for memory considerations
EPOCHS = 4
LEARNING_RATE = 0.00005  # 0.00005
NUM_LABELS = 2  # Humorous or not

# Paths to models
SHARED_MODEL_PATH = './models/bert-classification'  # Path to your saved shared BERT model
PRIVATE_MODEL_PATH = './models/bert-mlm'            # Path to your pre-trained BERT-MLM model
TOKENIZER_NAME = 'bert-base-uncased'                # Or the name of your tokenizer

# Path to dataset CSV file
CSV_FILE_PATH = './data/shared_private_dataset.csv'

# Load and split data
print("Loading and splitting data...")
train_df, val_df, test_df, humor_type_to_idx = sharedprivate_load_and_split_data(CSV_FILE_PATH)

# List of humor types
humor_types = list(humor_type_to_idx.keys())
print(f"Humor types: {humor_types}")
print(f"Humor type to index mapping: {humor_type_to_idx}")

2024-11-21 21:31:29.549877: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Using device: mps
Loading and splitting data...
Humor types: ['body punchlines', 'news headlines', 'puns', 'storylines']
Humor type to index mapping: {'body punchlines': 0, 'news headlines': 1, 'puns': 2, 'storylines': 3}


In [2]:
# Creating datasets and dataloaders
print("Creating datasets and dataloaders...")
train_dataset = HumorDataset(train_df, TOKENIZER_NAME, MAX_LENGTH)
val_dataset = HumorDataset(val_df, TOKENIZER_NAME, MAX_LENGTH)
test_dataset = HumorDataset(test_df, TOKENIZER_NAME, MAX_LENGTH)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

# Instead of trying to display the datasets, print their lengths
print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of validation samples: {len(val_dataset)}")
print(f"Number of test samples: {len(test_dataset)}")


Creating datasets and dataloaders...
Number of training samples: 142553
Number of validation samples: 30547
Number of test samples: 30547


In [3]:
# Initialize the model
print("Initializing the shared-private model...")
model = SharedPrivateModel(
    shared_model_path=SHARED_MODEL_PATH,
    private_model_path=PRIVATE_MODEL_PATH,
    num_labels=NUM_LABELS,
    humor_types=humor_types
)

# Train the model
print("Starting training...")
model = sharedprivate_train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    epochs=EPOCHS,
    learning_rate=LEARNING_RATE,
    device=device,
    save_dir='./models/shared_private_model'
)

# Save the trained model
print("Saving the model...")
torch.save(model.state_dict(), './models/shared_private_model.pt')
print("Model training complete and saved.")

Some weights of BertModel were not initialized from the model checkpoint at ./models/bert-mlm and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertModel were not initialized from the model checkpoint at ./models/bert-mlm and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertModel were not initialized from the model checkpoint at ./models/bert-mlm and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Initializing the shared-private model...


Some weights of BertModel were not initialized from the model checkpoint at ./models/bert-mlm and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training...
Training on device: mps


Epoch 1/4: 100%|██████████████████████████| 4455/4455 [1:21:56<00:00,  1.10s/it]


Epoch 1/4 - Average Training Loss: 0.1305
Validation Metrics - Accuracy: 0.9545, Precision: 0.9546, Recall: 0.9545, F1-Score: 0.9545
Saving checkpoint to: ./models/shared_private_model/shared_private_epoch_1.pt


RuntimeError: Parent directory ./models/shared_private_model does not exist.

In [None]:
# metrics =  sharedprivate_evaluate_model(model, test_loader, device)

# print('Evaluation Metrics: \n', metrics)

#                                                                                                                             # 