# Task 4
This serves as a template which will guide you through the implementation of this task. It is advised to first read the whole template and get a sense of the overall structure of the code before trying to fill in any of the TODO gaps.
This is the jupyter notebook version of the template. For the python file version, please refer to the file `template_solution.py`.

First, we import necessary libraries:

In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import DistilBertTokenizer, DistilBertModel
from transformers import Trainer, TrainingArguments, AdamW 
from sklearn.metrics import mean_squared_error as mse

  from .autonotebook import tqdm as notebook_tqdm





Depending on your approach, you might need to adapt the structure of this template or parts not marked by TODOs.
It is not necessary to completely follow this template. Feel free to add more code and delete any parts that are not required.

In [3]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(torch.cuda.is_available())
BATCH_SIZE = 4  # TODO: Set the batch size according to both training performance and available memory
NUM_EPOCHS = 5 # TODO: Set the number of epochs

train_val = pd.read_csv("train.csv")
test_val = pd.read_csv("test_no_score.csv")

True


In [4]:
# def print_gpu_info():
#     if torch.cuda.is_available():
#         device_count = torch.cuda.device_count()
#         print(f"Number of available GPUs: {device_count}")
#         for i in range(device_count):
#             device_name = torch.cuda.get_device_name(i)
#             device = torch.device(f'cuda:{i}')
#             print(f"GPU {i}: {device_name}")
#         # Print the GPU that is currently in use
#         current_device = torch.cuda.current_device()
#         print(f"Current GPU: {torch.cuda.get_device_name(current_device)}")
#     else:
#         print("CUDA is not available. No GPU found.")

# # Call the function to print GPU info
# print_gpu_info()


In [17]:
class ReviewDataset(Dataset):
    def __init__(self, data, tokenizer, max_len):
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)
    
    
    def __getitem__(self, idx):
        title = str(self.data.loc[idx, 'title'])
        sentence = str(self.data.loc[idx, 'sentence'])
        score = self.data.loc[idx].get('score',0.0)

        inputs = self.tokenizer(title, sentence, return_tensors="pt", padding = "max_length", truncation = True, max_length = self.max_len)
        

        input_ids = inputs['input_ids'].squeeze()
        attention_mask = inputs['attention_mask'].squeeze()
     
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'score': torch.tensor(score, dtype=torch.float)
        }

In [18]:
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
max_len = 512  # Adjust

train_dataset = ReviewDataset(train_val, tokenizer, max_len)
test_dataset = ReviewDataset(test_val, tokenizer, max_len)

train_loader = DataLoader(dataset=train_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True, num_workers=0, pin_memory=True)
test_loader = DataLoader(dataset=test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False, num_workers=0, pin_memory=True)
# Additional code if needed



In [7]:
class MyModule(nn.Module):
    
    def __init__(self, premodel):
        super().__init__()
        self.premodel = premodel
        self.fc1 = nn.Linear(self.premodel.config.hidden_size,1)
        self.dropout = nn.Dropout(0.1)

    def forward(self, input_ids, attention_mask):
        outputs = self.premodel(input_ids=input_ids, attention_mask=attention_mask)
        x = self.dropout(outputs.last_hidden_state[:,0])
        score = self.fc1(x)

        return score.squeeze(1)

In [None]:
premodel = DistilBertModel.from_pretrained('distilbert-base-uncased')

model = MyModule(premodel)
model.load_state_dict(torch.load("Das_Model_ceci.pth"))
model.to(DEVICE)

optimizer = AdamW(model.parameters(), lr=5e-6)
criterion = nn.MSELoss()
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

for epoch in range(NUM_EPOCHS):
    
    vloss = 0.0
    print('Epoch :', epoch+1, '/',NUM_EPOCHS)
    
    for batch in tqdm(train_loader, total=len(train_loader)):
        model.train()
        ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        score = batch['score']
        ids, attention_mask, score = ids.to(DEVICE), attention_mask.to(DEVICE), score.to(DEVICE)
        optimizer.zero_grad()
        
        # Forward pass
        train_scores = model(input_ids = ids, attention_mask=attention_mask)
        loss = criterion(train_scores, score)
        vloss += loss.item()

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

    scheduler.step()
    print(f"Epoch {epoch+1}, Training Loss: {vloss / len(train_loader)}")

model.eval()
vloss = 0.0
predictions = []
actuals = []

with torch.no_grad():
    results = []
    
    for batch in tqdm(test_loader, total=len(test_loader)):

        ids = batch['input_ids'].to(DEVICE)
        attention_mask = batch['attention_mask'].to(DEVICE)
        
        outputs = model(ids,attention_mask)
        
        predictions.extend(outputs.cpu().numpy())
    
    with open("result1.txt", "w") as f:
        for val in predictions:
            f.write(f"{val}\n")
            

In [21]:
# Save the model's state dictionary
torch.save(model.state_dict(), "Das_Model_Ceci.pth")