In [None]:
import pandas as pd
import numpy as np
import torch
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import matplotlib.pyplot as plt
import random
import os
import torch.nn as nn
# Activation Function
import torch.nn.functional as AF



# custom dataset class for our image data
class BookCoverDataset(Dataset):
    def __init__(self, csv_file_x,csv_file_y, root_dir, transforms=None):
        
        dataX= pd.read_csv(csv_file_x)
        dataY= pd.read_csv(csv_file_y)
        self.annotations=pd.merge(dataX, dataY, on='Id')
        self.root_dir=root_dir
        self.transforms=transforms
        self.length=len(self.annotations)
        
    def __len__(self):
        return self.length
    
    def __getitem__(self,index):
        imgPath=os.path.join(self.root_dir, self.annotations.iloc[index,1])
        image=Image.open(imgPath)
        y=torch.tensor(int(self.annotations.iloc[index,-1]))
        if self.transforms:
            image=self.transforms(image)
        return (image,y)


def train(ep=5):
    # Training loop
    for epoch in range(ep):
        for i, (img,target) in enumerate(train_loader):
            img=img.to(device)
            target=target.to(device)

            out=model(img)
            loss=lossFunction(out,target)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if(i%100==0):
                print(loss.item())
        print(f'epoch Finished: {loss.item()}')

def save(fileName):    
    #saving the model
    torch.save(model.state_dict(), fileName)  

def load(fileName):
    #loading the model
    state_dict = torch.load(fileName)
    model.load_state_dict(state_dict)

def training_accuracy():
    #Training accuracy    
    correct=0
    with torch.no_grad():
        for i, (img,target) in enumerate(train_loader):
            img=img.to(device)
            target=target.to(device)
            out=model(img)
    #         print(out)
            for j in range(len(out)):
    #             print(f'{torch.argmax(out[j])} : {target[j]}')
                if torch.argmax(out[j]) == target[j]:
                    correct+=1

    print(correct)
    print(correct*100/len(training_data))

def test_accuracy():
    #Test accuracy
    correct=0
    with torch.no_grad():
        for i, (img,target) in enumerate(test_loader):
            img=img.to(device)
            target=target.to(device)
            out=model(img)
    #         print(out)
            for j in range(len(out)):
    #             print(f'{torch.argmax(out[j])} : {target[j]}')
                if torch.argmax(out[j]) == target[j]:
                    correct+=1

    print(correct)
    print(correct*100/len(testing_data))

    

batch_size=32

#calculated mean and std of the entire data separately
mean=torch.tensor([0.5482, 0.5109, 0.4749])
std=torch.tensor([0.2526, 0.2428, 0.2356])

# Defining a custom transform pipeline
train_transform=transforms.Compose([
    transforms.RandomInvert(0.6),
    transforms.RandomSolarize(0.6),
    transforms.ToTensor(),
    transforms.ColorJitter(),
    transforms.Normalize(mean=mean,std=std),
])

test_transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=mean,std=std), 
])

#Getting GPU support if available
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    
    
    
# initializing cutom dataset
testing_data=BookCoverDataset(csv_file_x='/kaggle/input/col774-2022/non_comp_test_x.csv',
                               csv_file_y='/kaggle/input/col774-2022/non_comp_test_y.csv',
                               root_dir='/kaggle/input/col774-2022/images/images',
                              transforms=test_transform)

training_data=BookCoverDataset(csv_file_x='/kaggle/input/col774-2022/train_x.csv',
                               csv_file_y='/kaggle/input/col774-2022/train_y.csv',
                               root_dir='/kaggle/input/col774-2022/images/images',
                               transforms=train_transform)


# Dataloader for training dataset
train_loader=DataLoader(dataset=training_data, batch_size=batch_size,shuffle=True)
# Dataloader for testing dataset
test_loader=DataLoader(dataset=testing_data, batch_size=batch_size,shuffle=True)


   
model=torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
num_fters=model.fc.in_features
model.fc=nn.Linear(num_fters, 30)
# # freezing first 3 layers in RESNET
# child_no=0
# for child in model.children():
#     child_no+=1
#     if child_no<4:
#         for param in child.parameters():
#             param.requires_grad=False
# load('cnn_with_dropout_and_regularization2')  
model=model.to(device)
lossFunction=nn.CrossEntropyLoss()
# optimizer=torch.optim.SGD(model.parameters(), lr=0.0001)
optimizer=torch.optim.Adam(model.parameters(), lr=0.0001)    
 
train(ep=7)
save('cnn_with_dropout_and_regularization4')
training_accuracy()    
test_accuracy()

# 9627
# 28.149122807017545
# 1362
# 23.894736842105264


In [None]:
from transformers import BertTokenizer
from transformers import BertModel
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import re
from torch import nn
import torch
import os
from PIL import Image


# custom dataset class for our image data
class BookTitleDataset(Dataset):
    def __init__(self, csv_file_x,csv_file_y,  tokenizer, max_title_len):
        
        dataX= pd.read_csv(csv_file_x)
        dataY= pd.read_csv(csv_file_y)
        data=pd.merge(dataX, dataY, on='Id')
        del dataX
        del dataY
        title=data.iloc[:,2]
        self.x=title
        self.y=data.iloc[:,-1]
#         self.vocab=vocab
        self.tokenizer=tokenizer
        self.length=len(self.y)
        self.max_title_len=max_title_len
        
    def __len__(self):
        return self.length
    
    def __getitem__(self,index): 
        x=self.x[index]
        y=self.y[index]
        x=tokenizer(x,padding='max_length', max_length = self.max_title_len, truncation=True,
                                return_tensors="pt")
        return (x,y)


class BertClassifier(nn.Module):

    def __init__(self, dropout=0.3):

        super(BertClassifier, self).__init__()

        self.bert = BertModel.from_pretrained('bert-base-cased')
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768, 30)
        self.relu = nn.ReLU()

    def forward(self, input_id, mask):

        _, pooled_output = self.bert(input_ids= input_id, attention_mask=mask,return_dict=False)
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        final_layer = self.relu(linear_output)

        return final_layer
    


# training
def train(ep=5):
    for epoch in range(ep):
        for i, (img,target) in enumerate(train_loader):
    #         print(img.size())
            target=target.to(device)
            mask = img['attention_mask'].to(device)
            input_id = img['input_ids'].squeeze(1).to(device)

            out = model(input_id, mask)
    #         out=model(img)
            loss=lossFunction(out,target)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if(i%25==0):
                print(loss.item())
    #             print(out)
        print(f'epoch Finished: {loss.item()}')

def save(name):
    #saving the model
    torch.save(model.state_dict(), name)  

def load(name):
    #loading the model
    state_dict = torch.load(name)
    model.load_state_dict(state_dict)

def training_accuracy():
    #Training accuracy    
    correct=0
    with torch.no_grad():
        for i, (img,target) in enumerate(train_loader):
            target=target.to(device)
            mask = img['attention_mask'].to(device)
            input_id = img['input_ids'].squeeze(1).to(device)

            out = model(input_id, mask)
    #         print(out)
            for j in range(len(out)):
    #             print(f'{torch.argmax(out[j])} : {target[j]}')
                if torch.argmax(out[j]) == target[j]:
                    correct+=1

    print(correct)
    print(correct*100/len(training_data))

def test_accuracy(): 
    #Test accuracy
    correct=0
    with torch.no_grad():
        for i, (img,target) in enumerate(test_loader):
            target=target.to(device)
            mask = img['attention_mask'].to(device)
            input_id = img['input_ids'].squeeze(1).to(device)

            out = model(input_id, mask)
    #         print(out)
            for j in range(len(out)):
    #             print(f'{torch.argmax(out[j])} : {target[j]}')
                if torch.argmax(out[j]) == target[j]:
                    correct+=1

    print(correct)
    print(correct*100/len(testing_data))


def predictions(root_dir, path, fileName):
    dataX= pd.read_csv(path)
    id=(dataX.iloc[:,0]).tolist()
    predi=[]
    title=dataX.iloc[:,2]
    for i in range(len(title)):
        image=tokenizer(title[i],padding='max_length', max_length = 61, truncation=True,
                                return_tensors="pt")
#         image=image.to(device)
        
        mask = image['attention_mask'].to(device)
        input_id = image['input_ids'].squeeze(1).to(device)

        out = model(input_id, mask)
        predi.append(torch.argmax(out).item())
    data={'Id':id,'Genre':predi}
    output=pd.DataFrame(data)
    output.to_csv(fileName+'.csv', index=None)
#         print(prediction)
        

tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
learning_rate=5e-7
batch_size=32    
    
training_data=BookTitleDataset(csv_file_x='/kaggle/input/col774-2022/train_x.csv',
                               csv_file_y='/kaggle/input/col774-2022/train_y.csv',
                               tokenizer=tokenizer, max_title_len=61)

testing_data=BookTitleDataset(csv_file_x='/kaggle/input/col774-2022/non_comp_test_x.csv',
                               csv_file_y='/kaggle/input/col774-2022/non_comp_test_y.csv',
                               tokenizer=tokenizer, max_title_len=61)

train_loader=DataLoader(dataset=training_data, batch_size=batch_size,shuffle=True)
test_loader=DataLoader(dataset=testing_data, batch_size=batch_size,shuffle=True)
    


device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model=BertClassifier()
lossFunction=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(model.parameters(), lr=learning_rate)
load('ver3')
model=model.to(device)
train(ep=5)
test_accuracy()
predictions(root_dir='/kaggle/input/col774-2022/images/images',path='/kaggle/input/col774-2022/comp_test_x.csv',fileName='file2')
save('ver4')

# 21097
# 61.687134502923975
# 3109
# 54.54385964912281


In [None]:
import pandas as pd
import numpy as np
import torch
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import matplotlib.pyplot as plt
import random
import os
import torch.nn as nn
# Activation Function
import torch.nn.functional as AF
from transformers import BertTokenizer
from transformers import BertModel
import re



# custom dataset class for our image data
class BookTitleDataset(Dataset):
    def __init__(self, csv_file_x, csv_file_y, tokenizer, max_title_len, root_dir, transforms=None):
        
        dataX= pd.read_csv(csv_file_x)
        dataY= pd.read_csv(csv_file_y)
        data=pd.merge(dataX, dataY, on='Id')
        del dataX
        del dataY
        title=data.iloc[:,2]
        self.x=title
        self.y=data.iloc[:,-1]
#         self.vocab=vocab
        self.tokenizer=tokenizer
        self.length=len(self.y)
        self.max_title_len=max_title_len
        self.annotations=data
        self.root_dir=root_dir
        self.transforms=transforms
        
    def __len__(self):
        return self.length
    
    def __getitem__(self,index): 
        imgPath=os.path.join(self.root_dir, self.annotations.iloc[index,1])
        image=Image.open(imgPath)
        if self.transforms:
            image=self.transforms(image)
        x=self.x[index]
        y=self.y[index]
        x=tokenizer(x,padding='max_length', max_length = self.max_title_len, truncation=True,
                                return_tensors="pt")
        return (image,x,y)
    


class BertClassifier(nn.Module):

    def __init__(self, dropout=0.3):

        super(BertClassifier, self).__init__()

        self.bert = BertModel.from_pretrained('bert-base-cased')
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768, 256)
        self.relu = nn.ReLU()

    def forward(self, input_id, mask):

        _, pooled_output = self.bert(input_ids= input_id, attention_mask=mask,return_dict=False)
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        final_layer = self.relu(linear_output)

        return final_layer
    
        
    
    
class Res_Bert(nn.Module):
    def __init__(self):
        super(Res_Bert, self).__init__()
        #Bert classifire
        self.modelB = BertClassifier()
        
        model=torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
        num_fters=model.fc.in_features
        model.fc=nn.Linear(num_fters, 256)
        #ResNet
        self.modelA = model
        self.classifier = nn.Linear(512, 30)
        
    def forward(self, image, mask, input_id):
        x1 = self.modelA(image)
        x2 = self.modelB(input_id, mask)
        x = torch.cat((x1, x2), dim=1)
        x = self.classifier(AF.relu(x))
        return x

    
def train(ep=5):
    # Training loop
    for epoch in range(ep):
        for i, (img, embed, target) in enumerate(train_loader):
            img=img.to(device)
            mask = embed['attention_mask'].to(device)
            input_id = embed['input_ids'].squeeze(1).to(device)
            target=target.to(device)

            out=model(img, mask, input_id)
            loss=lossFunction(out,target)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if(i%100==0):
                print(loss.item())
        print(f'epoch Finished: {loss.item()}')
        
        
def save(name):
    #saving the model
    torch.save(model.state_dict(), name)  

def load(name):
    #loading the model
    state_dict = torch.load(name)
    model.load_state_dict(state_dict)

def training_accuracy():
    #Training accuracy    
    correct=0
    with torch.no_grad():
        for i, (img, embed, target) in enumerate(train_loader):
            img=img.to(device)
            mask = embed['attention_mask'].to(device)
            input_id = embed['input_ids'].squeeze(1).to(device)
            target=target.to(device)

            out=model(img, mask, input_id)
    #         print(out)
            for j in range(len(out)):
    #             print(f'{torch.argmax(out[j])} : {target[j]}')
                if torch.argmax(out[j]) == target[j]:
                    correct+=1

    print(correct)
    print(correct*100/len(training_data))

def test_accuracy(): 
    #Test accuracy
    correct=0
    with torch.no_grad():
        for i, (img, embed, target) in enumerate(test_loader):
            img=img.to(device)
            mask = embed['attention_mask'].to(device)
            input_id = embed['input_ids'].squeeze(1).to(device)
            target=target.to(device)

            out=model(img, mask, input_id)
    #         print(out)
            for j in range(len(out)):
    #             print(f'{torch.argmax(out[j])} : {target[j]}')
                if torch.argmax(out[j]) == target[j]:
                    correct+=1

    print(correct)
    print(correct*100/len(testing_data))


def predictions(root_dir, path, fileName):
    dataX= pd.read_csv(path)
    id=(dataX.iloc[:,0]).tolist()
    predi=[]
    title=dataX.iloc[:,2]
    img_names=dataX.iloc[:,1]
    for i in range(len(title)):
        imgPath=os.path.join(root_dir, img_names[i])
        image=Image.open(imgPath)
        image=test_transform(image)
        image=image.to(device)
        embed=tokenizer(title[i],padding='max_length', max_length = 61, truncation=True,
                                return_tensors="pt")
#         
        mask = embed['attention_mask'].to(device)
        input_id = embed['input_ids'].squeeze(1).to(device)

        out=model(image.unsqueeze(0), mask, input_id)
        predi.append(torch.argmax(out).item())
    data={'Id':id,'Genre':predi}
    output=pd.DataFrame(data)
    output.to_csv(fileName+'.csv', index=None)

    
    
    
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
learning_rate=5e-7
batch_size=32  

#calculated mean and std of the entire data separately
mean=torch.tensor([0.5482, 0.5109, 0.4749])
std=torch.tensor([0.2526, 0.2428, 0.2356])

# Defining a custom transform pipeline
train_transform=transforms.Compose([
    transforms.RandomInvert(0.6),
    transforms.RandomSolarize(0.6),
    transforms.ToTensor(),
    transforms.ColorJitter(),
    transforms.Normalize(mean=mean,std=std),
])

test_transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=mean,std=std), 
])

training_data=BookTitleDataset(csv_file_x='/kaggle/input/col774-2022/train_x.csv',
                               csv_file_y='/kaggle/input/col774-2022/train_y.csv',
                               root_dir='/kaggle/input/col774-2022/images/images',
                               tokenizer=tokenizer, max_title_len=61, transforms=train_transform)

testing_data=BookTitleDataset(csv_file_x='/kaggle/input/col774-2022/non_comp_test_x.csv',
                              csv_file_y='/kaggle/input/col774-2022/non_comp_test_y.csv',
                              root_dir='/kaggle/input/col774-2022/images/images',
                              tokenizer=tokenizer, max_title_len=61, transforms=test_transform)


train_loader=DataLoader(dataset=training_data, batch_size=batch_size,shuffle=True)
test_loader=DataLoader(dataset=testing_data, batch_size=batch_size,shuffle=True)    
    
    
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model=Res_Bert()
load('ver3')
model=model.to(device)
lossFunction=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(model.parameters(), lr=learning_rate)

train(ep=1)
save('ver4')
# load('ver1')
training_accuracy()
test_accuracy()
# predictions(root_dir='/kaggle/input/col774-2022/images/images',path='/kaggle/input/col774-2022/comp_test_x.csv',fileName='file2')

# 28856
# 84.37426900584795
# 3254
# 57.08771929824562

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


0.520939826965332
0.39590370655059814
0.36988842487335205
0.8525664210319519
0.5642260909080505
0.3386771082878113
0.7660030126571655
0.5333539247512817
0.31317538022994995
0.42892882227897644
0.5142131447792053
epoch Finished: 0.49554121494293213


In [4]:
import pandas as pd
import numpy as np
import torch
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import matplotlib.pyplot as plt
import random
import os
import torch.nn as nn
import torch.nn.functional as AF
from transformers import BertTokenizer, BertModel
from sklearn.metrics import confusion_matrix, classification_report
from tqdm import tqdm  # Import tqdm for progress tracking
import re

# Set random seed for reproducibility
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# Custom dataset class for our image data
class BookTitleDataset(Dataset):
    def __init__(self, csv_file_x, csv_file_y, tokenizer, max_title_len, root_dir, transforms=None):
        dataX = pd.read_csv(csv_file_x)
        dataY = pd.read_csv(csv_file_y)
        data = pd.merge(dataX, dataY, on='Id')
        del dataX
        del dataY
        title = data.iloc[:, 2]
        self.x = title
        self.y = data.iloc[:, -1]
        self.tokenizer = tokenizer
        self.length = len(self.y)
        self.max_title_len = max_title_len
        self.annotations = data
        self.root_dir = root_dir
        self.transforms = transforms
        
    def __len__(self):
        return self.length
    
    def __getitem__(self, index): 
        imgPath = os.path.join(self.root_dir, self.annotations.iloc[index, 1])
        image = Image.open(imgPath)
        if self.transforms:
            image = self.transforms(image)
        x = self.x[index]
        y = self.y[index]
        x = self.tokenizer(x, padding='max_length', max_length=self.max_title_len, truncation=True,
                           return_tensors="pt")
        return (image, x, y)

class BertClassifier(nn.Module):
    def __init__(self, dropout=0.3):
        super(BertClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-cased')
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768, 256)
        self.relu = nn.ReLU()

    def forward(self, input_id, mask):
        _, pooled_output = self.bert(input_ids=input_id, attention_mask=mask, return_dict=False)
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        final_layer = self.relu(linear_output)
        return final_layer

class Res_Bert(nn.Module):
    def __init__(self):
        super(Res_Bert, self).__init__()
        self.modelB = BertClassifier()
        model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
        num_fters = model.fc.in_features
        model.fc = nn.Linear(num_fters, 256)
        self.modelA = model
        self.classifier = nn.Linear(512, 30)

    def forward(self, image, mask, input_id):
        x1 = self.modelA(image)
        x2 = self.modelB(input_id, mask)
        x = torch.cat((x1, x2), dim=1)
        x = self.classifier(AF.relu(x))
        return x

def train(ep=5):
    # Training loop
    for epoch in range(ep):
        model.train()
        pbar = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{ep}")
        for i, (img, embed, target) in enumerate(pbar):
            img = img.to(device)
            mask = embed['attention_mask'].to(device)
            input_id = embed['input_ids'].squeeze(1).to(device)
            target = target.to(device)

            out = model(img, mask, input_id)
            loss = lossFunction(out, target)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            pbar.set_postfix({'loss': loss.item()})

        print(f'Epoch {epoch + 1} finished with loss: {loss.item()}')

def save(name):
    # Saving the model
    torch.save(model.state_dict(), name)  

def load(name):
    # Loading the model
    state_dict = torch.load(name)
    model.load_state_dict(state_dict)

def calculate_metrics(loader):
    all_preds = []
    all_targets = []
    model.eval()
    with torch.no_grad():
        for img, embed, target in tqdm(loader, desc="Calculating Metrics"):
            img = img.to(device)
            mask = embed['attention_mask'].to(device)
            input_id = embed['input_ids'].squeeze(1).to(device)
            target = target.to(device)

            out = model(img, mask, input_id)
            all_preds.extend(torch.argmax(out, dim=1).cpu().numpy())
            all_targets.extend(target.cpu().numpy())

    return np.array(all_targets), np.array(all_preds)

def print_metrics(y_true, y_pred):
    print("Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))

# Predictions function
def predictions(root_dir, path, fileName):
    dataX = pd.read_csv(path)
    id = (dataX.iloc[:, 0]).tolist()
    predi = []
    title = dataX.iloc[:, 2]
    img_names = dataX.iloc[:, 1]
    for i in range(len(title)):
        imgPath = os.path.join(root_dir, img_names[i])
        image = Image.open(imgPath)
        image = test_transform(image)
        image = image.to(device)
        embed = tokenizer(title[i], padding='max_length', max_length=61, truncation=True,
                          return_tensors="pt")
        
        mask = embed['attention_mask'].to(device)
        input_id = embed['input_ids'].squeeze(1).to(device)

        out = model(image.unsqueeze(0), mask, input_id)
        predi.append(torch.argmax(out).item())
    data = {'Id': id, 'Genre': predi}
    output = pd.DataFrame(data)
    output.to_csv(fileName + '.csv', index=None)

# Setup
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
learning_rate = 5e-7
batch_size = 32  

# Calculated mean and std of the entire data separately
mean = torch.tensor([0.5482, 0.5109, 0.4749])
std = torch.tensor([0.2526, 0.2428, 0.2356])

# Defining a custom transform pipeline
train_transform = transforms.Compose([
    transforms.RandomInvert(0.6),
    transforms.RandomSolarize(0.6),
    transforms.ToTensor(),
    transforms.ColorJitter(),
    transforms.Normalize(mean=mean, std=std),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std), 
])

training_data = BookTitleDataset(csv_file_x='/kaggle/input/col774-2022/train_x.csv',
                                  csv_file_y='/kaggle/input/col774-2022/train_y.csv',
                                  root_dir='/kaggle/input/col774-2022/images/images',
                                  tokenizer=tokenizer, max_title_len=61, transforms=train_transform)

testing_data = BookTitleDataset(csv_file_x='/kaggle/input/col774-2022/non_comp_test_x.csv',
                                 csv_file_y='/kaggle/input/col774-2022/non_comp_test_y.csv',
                                 root_dir='/kaggle/input/col774-2022/images/images',
                                 tokenizer=tokenizer, max_title_len=61, transforms=test_transform)

train_loader = DataLoader(dataset=training_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=testing_data, batch_size=batch_size, shuffle=True)    

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Res_Bert()
load('ver4')
model = model.to(device)
lossFunction = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

train(ep=10)
save('ver5')

# Calculate training and testing accuracy and metrics
y_true_train, y_pred_train = calculate_metrics(train_loader)
print("Training Metrics:")
print_metrics(y_true_train, y_pred_train)

y_true_test, y_pred_test = calculate_metrics(test_loader)
print("Testing Metrics:")
print_metrics(y_true_test, y_pred_test)

# Predictions
# predictions(root_dir='/kaggle/input/col774-2022/images/images', path='/kaggle/input/col774-2022/comp_test_x.csv', fileName='file2')


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0
Epoch 1/10: 100%|██████████| 1069/1069 [10:21<00:00,  1.72it/s, loss=2.55]


Epoch 1 finished with loss: 2.5458555221557617


Epoch 2/10: 100%|██████████| 1069/1069 [09:49<00:00,  1.81it/s, loss=2.34]


Epoch 2 finished with loss: 2.3439829349517822


Epoch 3/10: 100%|██████████| 1069/1069 [09:51<00:00,  1.81it/s, loss=2.04]


Epoch 3 finished with loss: 2.040240526199341


Epoch 4/10: 100%|██████████| 1069/1069 [09:49<00:00,  1.81it/s, loss=2.21]


Epoch 4 finished with loss: 2.210505962371826


Epoch 5/10: 100%|██████████| 1069/1069 [09:54<00:00,  1.80it/s, loss=2.39]


Epoch 5 finished with loss: 2.3855602741241455


Epoch 6/10: 100%|██████████| 1069/1069 [09:58<00:00,  1.79it/s, loss=2.28]


Epoch 6 finished with loss: 2.2802858352661133


Epoch 8/10: 100%|██████████| 1069/1069 [09:53<00:00,  1.80it/s, loss=2.22]


Epoch 8 finished with loss: 2.2200396060943604


Epoch 9/10: 100%|██████████| 1069/1069 [09:47<00:00,  1.82it/s, loss=2.14]


Epoch 9 finished with loss: 2.1417338848114014


Epoch 10/10: 100%|██████████| 1069/1069 [09:47<00:00,  1.82it/s, loss=2.32]


Epoch 10 finished with loss: 2.315274953842163


Calculating Metrics: 100%|██████████| 1069/1069 [04:38<00:00,  3.84it/s]


Training Metrics:
Confusion Matrix:
[[ 903   17    0    8    4    3   25    2    7    0    3    5   17    1
     2   17   12    0   37    0    6    0    9   45    2    1    3    4
     1    0]
 [  11  666    8   25    9   14   43   12    7    2   12   38   22    6
     0   11    4    4    2    0   12    1   82   12   10    9  100   14
     8   11]
 [   0    3  690   18   18    1    1    4    1    1   11    2    0    4
     0    1    2    7    0    3    5    1    4    0   76    5    7   85
     3  195]
 [   0   14   25  774    7    1    0    7    1    1    9    1    1    6
     1    0    1    4    0    0   15    2    4    6  108   28   39   27
     7   27]
 [   0    0   18    1  675    2    0   25    4    7   45    0    3   21
    13    1    2  227    0    6    5    0    0    2   17    8    1   18
     4   28]
 [  10    5    1    0    6  850    1   20    6  109   10    4   39    2
     3    0   23    1   23    0    2    0    6   22    1    0    1    3
     1    4]
 [ 116   39    2    3 

Calculating Metrics: 100%|██████████| 179/179 [00:50<00:00,  3.51it/s]

Testing Metrics:
Confusion Matrix:
[[125   6   0   1   0   1   4   1   1   0   0   2   4   0   0   1   2   0
    2   1   0   0   1   6   1   0   1   2   0   0]
 [  4 105   0   0   1   4   8   6   1   0   3   6   3   1   1   2   0   2
    1   1   1   0  10   4   1   1  17   1   2   3]
 [  0   1  91   3   7   0   0   2   0   0   1   1   1   1   0   0   0   0
    0   0   1   0   1   0  15   1   2  17   2  37]
 [  0   4  13 131   1   2   0   4   1   1   0   2   0   0   0   0   0   3
    0   1   5   0   4   1  24   2   6   6   0   5]
 [  0   1   1   2  95   0   1   4   0   1  11   0   0   4   1   0   0  45
    0   1   0   1   2   1   1   3   1   5   0   3]
 [  0   3   0   0   2 106   0   2   0  12   3   2   5   0   3   0   2   0
    6   0   0   0   2   0   1   0   1   0   0   0]
 [ 20   9   0   1   3   5 103   1   0   0   1   1  21   1   0   4   1   0
    4   0   0   0   2   8   4   0   5   0  13   2]
 [  0   3   5   2   5   3   0  85   1  16  26   3   1   2   0   0   1   1
    2   2   0   


