# Preliminaries

## Install and import libraries 

In [1]:
import torch
import torch.nn.functional as F
import os
import ast
import csv
import pandas as pd

## Set computation engine

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


## Connect to drive

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#Load precomputed text features 

In [4]:
ProportionTrainVal=0.7

Xtrain_item_name = torch.load('/content/drive/MyDrive/data_rakuten/Xtrain_item_name.pt')
Xtrain_item_caption = torch.load('/content/drive/MyDrive/data_rakuten/Xtrain_item_caption.pt')
Ytrain_label = torch.load('/content/drive/MyDrive/data_rakuten/Ytrain_label.pt')
X_features=torch.cat((Xtrain_item_name,Xtrain_item_caption),0)

In [10]:
X_features.shape

torch.Size([1536, 212120])

In [11]:
class TextDataset(torch.utils.data.Dataset):

    def __init__(self,features,labels):
      #Load pre-computed tensors
      self.features=features
      self.labels=labels
        
    def __len__(self):
        return self.features.shape[1]

    def __getitem__(self, idx):

        return  self.features[:,idx],self.labels[:,idx]

trainSet= TextDataset(X_features[:,:int(X_features.shape[1]*ProportionTrainVal)],Ytrain_label[:,:int(X_features.shape[1]*ProportionTrainVal)])
trainLoader = torch.utils.data.DataLoader(trainSet, batch_size=64,shuffle=True, num_workers=2)

valSet= TextDataset(X_features[:,int(X_features.shape[1]*ProportionTrainVal):],Ytrain_label[:,int(X_features.shape[1]*ProportionTrainVal):])
valLoader = torch.utils.data.DataLoader(valSet, batch_size=64,shuffle=False, num_workers=2)

In [12]:
len(valSet)+len(trainSet)==X_features.shape[1]

True

# Our prediction model : SVM approach

In [None]:
from sklearn import svm
X = X_features.permute(1,0) #(n_samples, n_features)
Y = Ytrain_label[1,:] #n_samples
clf = svm.SVC()
clf.fit(X, Y)

In [None]:
len_val  = X_features.shape[1]-int(X_features.shape[1]*ProportionTrainVal)
len_test = int(X_features.shape[1]*ProportionTrainVal)

correct=0
for i in range(len_val):
  predict=clf.predict([[2., 2.]])
  if predict==Y_val[i]

# Our prediction model : Neural approach

## Create the model

In [18]:
class CustomModel(torch.nn.Module):

    def __init__(self):
        super(CustomModel, self).__init__()

        #self.dropout = torch.nn.Dropout(0.1)
        self.fc1 = torch.nn.Linear(1536, 256)
        self.fc2 = torch.nn.Linear(256, 128)
        self.fc3 = torch.nn.Linear(128, 19)


    def forward(self, text_features):
        text_features = F.relu(self.fc1(text_features))
        text_features = F.relu(self.fc2(text_features))
        #text_features = self.dropout(text_features)
        logits = self.fc3(text_features)

        return logits

model=CustomModel()
model.to(device)

CustomModel(
  (fc1): Linear(in_features=1536, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=19, bias=True)
)

## Train the model

In [19]:
nbr_labels_positive = torch.tensor([25673,71831,34014,33338,2383,8303,21697,28814,8353,12597,25017,10378,24582,10355,23583,12911,3325,51751,14534]) #number of labels
nbr_labels_negative = nbr_labels_positive.sum()*torch.ones(19)-nbr_labels_positive
coeffs = nbr_labels_negative/nbr_labels_positive    #coefficients for each label
coeffs = coeffs.to(device)

In [33]:
criterion = torch.nn.BCEWithLogitsLoss()#pos_weight=coeffs)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.15)

# Training
def train(epoch):
    print('\nEpoch: %d' % epoch)
    model.train()
    train_loss = 0

    for batch_idx, (inputs, targets) in enumerate(trainLoader):
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)

        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

        if batch_idx%1000==0:
            trainLosses.append(loss.item())
            print('{:.0f}%|Train Loss: {:.5f} '.format(100*batch_idx/(len(trainLoader)+1),train_loss/(batch_idx+1)))
            print(outputs[0,:])


In [34]:
# training loop
trainLosses=[]
valLosses=[]

for epoch in range(3):
    train(epoch)
    scheduler.step()


Epoch: 0
0%|Train Loss: 1.07146 
tensor([0.7884, 0.9888, 0.8672, 0.8713, 0.6270, 0.7547, 0.7336, 0.7955, 0.5812,
        0.7130, 0.7642, 0.7638, 0.7653, 0.7120, 0.8583, 0.7213, 0.6513, 0.8184,
        0.7024], device='cuda:0', grad_fn=<SliceBackward>)
43%|Train Loss: 0.36020 
tensor([-1.9891, -0.6564, -1.6878, -1.7131, -4.3292, -3.1809, -2.2548, -1.8336,
        -3.2177, -2.7821, -2.0273, -3.1289, -2.0397, -3.0156, -2.1152, -2.6439,
        -4.0370, -1.1298, -2.6116], device='cuda:0', grad_fn=<SliceBackward>)
86%|Train Loss: 0.33284 
tensor([-1.9113, -0.6657, -1.6700, -1.6163, -4.4200, -3.1741, -2.2264, -1.8679,
        -3.2757, -2.7379, -2.0501, -2.9814, -2.0541, -2.8872, -2.0796, -2.6945,
        -4.1356, -1.0946, -2.5884], device='cuda:0', grad_fn=<SliceBackward>)

Epoch: 1
0%|Train Loss: 0.37917 
tensor([-1.9864, -0.6620, -1.6536, -1.6972, -4.4353, -3.1245, -2.0964, -1.9068,
        -3.2231, -2.6978, -1.9854, -2.9531, -1.9966, -2.9559, -2.0557, -2.7608,
        -4.1524, -1.0927, -

KeyboardInterrupt: ignored

In [None]:
plt.plot(generator_losses)
plt.plot(discriminator_losses)
plt.title("Losses during training")
plt.xlabel('step')
plt.ylabel('Losses')
plt.legend(["generator_losses","discriminator_losses"])
plt.show()

In [22]:
#Save weights
model_file = "/content/drive/MyDrive/data_rakuten/textmodel.pth"
torch.save(model.state_dict(), model_file)

In [23]:
#Load weights
model_file = "/content/drive/MyDrive/data_rakuten/textmodel.pth"
state_dict = torch.load(model_file)
model.load_state_dict(state_dict)

<All keys matched successfully>

# Generate csv file for submission

In [24]:
class TestDataset(torch.utils.data.Dataset):

    def __init__(self,):

        self.Xtest_item_name = torch.load('/content/drive/MyDrive/data_rakuten/Xtest_item_name.pt')
        self.Xtest_item_caption = torch.load('/content/drive/MyDrive/data_rakuten/Xtest_item_caption.pt')
        
        

    def __len__(self):
        return self.Xtest_item_name.shape[1]


    #all this processing needs to be done here because the output of __getitem__ needs to have a fixed size to use a BS>1
    def __getitem__(self, idx):
 
        return  torch.cat((self.Xtest_item_name[:,idx] ,self.Xtest_item_caption[:,idx]),0)

testSet= TestDataset()
testLoader = torch.utils.data.DataLoader(testSet, batch_size=1,shuffle=False, num_workers=2)

In [25]:
inv_dico_labels={ 0: "Beige",1:"Black",2:"Blue",3:"Brown",4:"Burgundy",5:"Gold",6:"Green",7:"Grey",
                 8:"Khaki",9:"Multiple Colors",10:"Navy",11:"Orange",12:"Pink",
                 13:"Purple",14:"Red",15:"Silver",16:"Transparent",17:"White",18:"Yellow"}

model.eval()

#Write prediction in the submission.csv file

with open('/content/drive/MyDrive/data_rakuten/submission.csv', 'w') as csvfile:
    spamwriter = csv.writer(csvfile, delimiter=',')
    spamwriter.writerow([',color_tags,'])
    with torch.no_grad():
        for batch_idx, inputs in enumerate(testLoader):
            inputs = inputs.to(device)
            outputs = model(inputs)
            prediction=[]
            for indice,logits in enumerate(outputs.squeeze(0)):
                if logits>0: #put the tag if the proba is greater than 0.5
                    prediction.append(inv_dico_labels[indice]) 
            
            if len(prediction)>1:
                spamwriter.writerow(['{},"{}"'.format(batch_idx,prediction)])
            else:
                spamwriter.writerow(['{},{}'.format(batch_idx,prediction)])
            if batch_idx>300:
              break

In [26]:
outputs

tensor([[0.8643, 0.9288, 0.7990, 0.8939, 0.8603, 0.8654, 0.8418, 0.8375, 0.8138,
         0.7771, 0.8244, 0.8475, 0.8269, 0.8410, 0.8143, 0.7514, 0.7808, 0.9759,
         0.8648]], device='cuda:0')