In [1]:
!pip install -U sentence-transformers



In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [3]:
import os
import time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, IterableDataset
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

In [4]:
# os.chdir('..')
os.chdir('drive/My Drive/Colab Notebooks/Github/fashion-recommendations') 

In [5]:
from fashion_recommendations.metrics.average_precision import mapk

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [7]:
class RecommendationDatasetMultiLabel(IterableDataset):

    def __init__(self, dataset_filepath, article_emb_bag, total_articles):
        
        self.dataset_itr = open(dataset_filepath, 'r')
        next(self.dataset_itr)  # skip header
        
        self.article_emb_bag = article_emb_bag
        
        self.total_articles = total_articles
    
    def process_label(self, label_str: str, num_purchases_str: str):
        
        labels = torch.tensor([int(v) for v in label_str.split(',')])
        
        num_purchases = torch.tensor([float(v) for v in num_purchases_str.split(',')])
        
        target = torch.zeros(self.total_articles).scatter_(0, labels, num_purchases)
        
        target = target / target.sum()  # Normalise

        return target
    
    def mean_historical_purchases_embedding(self, input_str: str):
        
        indices = torch.tensor([int(v) for v in input_str.split(',')])

        mean_emb = self.article_emb_bag(indices.unsqueeze(0)).flatten()
            
        return mean_emb
    
    def process_numeric_features(self, fn: str, active: str, age: str, example_age: str):
        numeric_features_tensor = torch.tensor([
            float(fn),
            float(active),
            float(age),
            float(example_age)
        ])

        return numeric_features_tensor
    
    def parse_itr(self, dataset_itr):
        
        for line in dataset_itr:
        
            line_items = line.rstrip('\n').split('\t')
            
            customer_id_idx, article_id_idx_last10, article_id_idx_label, fn, active, age, num_purchases, example_age = line_items
            
            customer_id_idx = int(customer_id_idx)
            
            numeric_features_tensor = self.process_numeric_features(fn, active, age, example_age)
            
            mean_emb = self.mean_historical_purchases_embedding(article_id_idx_last10)

            label = self.process_label(article_id_idx_label, num_purchases)    
            
            inputs = torch.concat((mean_emb, numeric_features_tensor))

            yield customer_id_idx, inputs, label
        
    def get_stream(self, dataset_itr):
        
        return self.parse_itr(dataset_itr)

    def __iter__(self):
        
        return self.get_stream(self.dataset_itr)

In [8]:
articles_df = pd.read_csv('data/articles_df_filt_with_counts_and_age_extended_for_submission.csv', dtype={'article_id': str})
print(articles_df.shape)
articles_df.head()

(104547, 3)


Unnamed: 0,article_id,detail_desc,article_id_idx
0,108775015,Jersey top with narrow shoulder straps.,0
1,108775044,Jersey top with narrow shoulder straps.,1
2,108775051,Jersey top with narrow shoulder straps.,2
3,110065001,"Microfibre T-shirt bra with underwired, moulde...",3
4,110065002,"Microfibre T-shirt bra with underwired, moulde...",4


In [9]:
customers = pd.read_csv('data/customers_filt_with_counts_and_age_extended_for_submission.csv')
print(customers.shape)
customers.head()

(63412, 5)


Unnamed: 0,customer_id,FN,Active,age,customer_id_idx
0,00039306476aaf41a07fed942884f16b30abfa83a2a8be...,0.0,0.0,0.27,0
1,0003e867a930d0d6842f923d6ba7c9b77aba33fe2a0fbf...,1.0,1.0,0.33,1
2,000493dd9fc463df1acc2081450c9e75ef8e87d5dd17ed...,0.0,0.0,0.29,2
3,00077dbd5c4a4991e092e63893ccf29294a9d5c46e8501...,0.0,0.0,0.23,3
4,000fb6e772c5d0023892065e659963da90b1866035558e...,1.0,1.0,0.42,4


### Create article EmbeddingBag

In [10]:
model = SentenceTransformer('all-distilroberta-v1')

Downloading:   0%|          | 0.00/737 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/9.86k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/653 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/15.7k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/349 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/329M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/333 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

In [11]:
article_descriptions = articles_df['detail_desc'].to_list()
article_descriptions[:5]

['Jersey top with narrow shoulder straps.',
 'Jersey top with narrow shoulder straps.',
 'Jersey top with narrow shoulder straps.',
 'Microfibre T-shirt bra with underwired, moulded, lightly padded cups that shape the bust and provide good support. Narrow adjustable shoulder straps and a narrow hook-and-eye fastening at the back. Without visible seams for greater comfort.',
 'Microfibre T-shirt bra with underwired, moulded, lightly padded cups that shape the bust and provide good support. Narrow adjustable shoulder straps and a narrow hook-and-eye fastening at the back. Without visible seams for greater comfort.']

In [12]:
model.encode(article_descriptions[0]).shape

(768,)

In [13]:
embeddings = model.encode(article_descriptions)

In [14]:
embeddings = torch.tensor(embeddings)

In [15]:
embeddings.shape

torch.Size([104547, 768])

In [16]:
ARTICLE_EMBEDDING_BAG = nn.EmbeddingBag.from_pretrained(embeddings=embeddings, freeze=True)

### Model

In [17]:
class FashionRecV3(nn.Module):

    def __init__(self, user_embedding_dim):
        super(FashionRecV3, self).__init__()
        
        # Embeddings
        self.user_embeddings = nn.Embedding(num_embeddings=len(customers), embedding_dim=user_embedding_dim)        
        
        # Initialise embeddings
        torch.nn.init.xavier_uniform_(self.user_embeddings.weight)
        
        # FC layers
        self.fc_1 = nn.Linear(in_features=user_embedding_dim+768+4, out_features=2048)            
        self.fc_2 = nn.Linear(in_features=2048, out_features=1024)
        # self.fc_3 = nn.Linear(in_features=1024, out_features=512)
        self.fc_3 = nn.Linear(in_features=1024, out_features=len(articles_df))
        
        # Activation functions
        self.relu = nn.ReLU()
        
        # Dropout
        self.dropout = nn.Dropout(0.01)
        
    def forward(
        self, 
        customer_id_idx, 
        inputs
    ):

        user_emb = self.user_embeddings(customer_id_idx)
              
        concatenated_features = torch.concat(
            [
                user_emb, 
                inputs
            ], 
            dim=1
        )
        
        x = self.fc_1(concatenated_features)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.fc_2(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.fc_3(x)
        # x = self.relu(x)
        # x = self.dropout(x)
        
        # x = self.fc_4(x)

        return x

In [24]:
fashion_rec_v3 = FashionRecV3(user_embedding_dim=384)

In [25]:
fashion_rec_v3.to(device)

FashionRecV3(
  (user_embeddings): Embedding(63412, 384)
  (fc_1): Linear(in_features=1156, out_features=2048, bias=True)
  (fc_2): Linear(in_features=2048, out_features=1024, bias=True)
  (fc_3): Linear(in_features=1024, out_features=104547, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.01, inplace=False)
)

In [26]:
BATCH_SIZE = 1024

In [27]:
total_training_examples = pd.read_csv('data/final_train_set_with_counts_and_age_extended_for_submission.tsv', sep='\t').shape[0]
total_batches = np.ceil(total_training_examples/BATCH_SIZE)
total_batches

62.0

In [58]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=fashion_rec_v3.parameters(), lr=0.0001)  # 0.001

200 epochs

LR: 0.001 -> 0.0001 @ 100 epochs

In [87]:
MAX_EPOCHS = 100

# training_losses = []
# dev_losses = []

for epoch in range(MAX_EPOCHS):
    
    # Since we use an IterableDataset we need to reinstaniate the dataset since file end will have been reached:
    train_dataset = RecommendationDatasetMultiLabel(dataset_filepath='data/final_train_set_with_counts_and_age_extended_for_submission.tsv', article_emb_bag=ARTICLE_EMBEDDING_BAG, total_articles=len(articles_df))    
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE)  
    
    for data in tqdm(train_loader, total=total_batches):
        
        customer_id_idx, inputs, label = data
        customer_id_idx, inputs, label = customer_id_idx.to(device), inputs.to(device), label.to(device)

        optimizer.zero_grad()  # Set gradients to 0 otherwise will accumulate

        y_pred = fashion_rec_v3(customer_id_idx, inputs)
        
        loss = criterion(y_pred, label)

        loss.backward()    
        
        optimizer.step()
    
    if ((epoch + 1) % 50 == 0) | (epoch == 0):

        # Compute train loss:
        
        total_train_loss = 0

        fashion_rec_v3.eval()
        
        train_dataset = RecommendationDatasetMultiLabel(dataset_filepath='data/final_train_set_with_counts_and_age_extended_for_submission.tsv', article_emb_bag=ARTICLE_EMBEDDING_BAG, total_articles=len(articles_df))    
        
        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE)  
        
        with torch.no_grad():
            for data in tqdm(train_loader, total=total_batches):

                customer_id_idx, inputs, label = data
                customer_id_idx, inputs, label = customer_id_idx.to(device), inputs.to(device), label.to(device)

                optimizer.zero_grad()  # Set gradients to 0 otherwise will accumulate

                y_pred = fashion_rec_v3(customer_id_idx, inputs)

                loss = nn.CrossEntropyLoss(reduction='sum')(y_pred, label)

                total_train_loss += loss.item()

            mean_train_loss = total_train_loss / total_training_examples
            print(f"Training loss: {mean_train_loss}")
            training_losses.append(mean_train_loss)
            
        fashion_rec_v3.train()

100%|██████████| 62/62.0 [00:37<00:00,  1.67it/s]
100%|██████████| 62/62.0 [00:39<00:00,  1.57it/s]


Training loss: 0.858437976754141


100%|██████████| 62/62.0 [00:35<00:00,  1.73it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.73it/s]
100%|██████████| 62/62.0 [00:36<00:00,  1.72it/s]
100%|██████████| 62/62.0 [00:36<00:00,  1.72it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.74it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.72it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.73it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.73it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.73it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.74it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.72it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.73it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.74it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.75it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.74it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.74it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.74it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.74it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.75it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.74it/s]


Training loss: 0.8595750862015223


100%|██████████| 62/62.0 [00:35<00:00,  1.76it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.77it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.75it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.77it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.75it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.75it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.77it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.76it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.76it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.75it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.76it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.75it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.75it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.75it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.76it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.76it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.76it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.77it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.75it/s]
100%|██████████| 62/62.0 [00:35<00:00,  1.76it/s]


Training loss: 0.8557884232190132





In [88]:
training_losses

[8.545034589642931,
 1.8315276435472339,
 1.1120631957230098,
 1.0304431399133223,
 0.8757716021101339,
 0.8654603500332523,
 0.858437976754141,
 0.8595750862015223,
 0.8557884232190132]

### MAP@12

#### Train

In [89]:
fashion_rec_v3.eval()

predictions = []

dev_dataset = RecommendationDatasetMultiLabel(dataset_filepath='data/final_train_set_with_counts_and_age_extended_for_submission.tsv', article_emb_bag=ARTICLE_EMBEDDING_BAG, total_articles=len(articles_df))    

dev_loader = DataLoader(dev_dataset, batch_size=BATCH_SIZE)  

with torch.no_grad():
    for data in tqdm(dev_loader, total=total_batches):

        customer_id_idx, inputs, label = data
        customer_id_idx, inputs, label = customer_id_idx.to(device), inputs.to(device), label.to(device)

        optimizer.zero_grad()  # Set gradients to 0 otherwise will accumulate

        y_pred = fashion_rec_v3(customer_id_idx, inputs)
        
        predictions.append(y_pred.topk(1000, dim=1).indices)

100%|██████████| 62/62.0 [00:34<00:00,  1.79it/s]


In [90]:
predictions = torch.concat(predictions)

In [91]:
predictions.shape

torch.Size([63412, 1000])

In [92]:
actuals = pd.read_csv('data/final_train_set_with_counts_and_age_extended_for_submission.tsv', sep='\t')['article_id_idx_label'].apply(lambda x: [int(i) for i in x.split(',')]).tolist()
len(actuals)

63412

In [93]:
mapk(actuals, predictions.tolist(), 12)

0.999269886902691

### Submission

In [94]:
total_sub_batches = np.ceil(pd.read_csv('data/final_submission_inputs_with_counts_and_age_extended_for_submission.tsv', sep='\t').shape[0]/BATCH_SIZE)
total_sub_batches

62.0

In [95]:
fashion_rec_v3.eval()

predictions = []

dev_dataset = RecommendationDatasetMultiLabel(dataset_filepath='data/final_submission_inputs_with_counts_and_age_extended_for_submission.tsv', article_emb_bag=ARTICLE_EMBEDDING_BAG, total_articles=len(articles_df))    

dev_loader = DataLoader(dev_dataset, batch_size=BATCH_SIZE)  

with torch.no_grad():
    for data in tqdm(dev_loader, total=total_sub_batches):

        customer_id_idx, inputs, label = data
        customer_id_idx, inputs, label = customer_id_idx.to(device), inputs.to(device), label.to(device)

        y_pred = fashion_rec_v3(customer_id_idx, inputs)
        
        predictions.append(y_pred.topk(12, dim=1).indices)

100%|██████████| 62/62.0 [00:34<00:00,  1.80it/s]


In [96]:
predictions = torch.concat(predictions)

In [97]:
predictions.shape

torch.Size([63412, 12])

In [98]:
sample_submission = pd.read_csv('data/sample_submission.csv')
print(sample_submission.shape)
sample_submission.head()

(1371980, 2)


Unnamed: 0,customer_id,prediction
0,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,0706016001 0706016002 0372860001 0610776002 07...
1,0000423b00ade91418cceaf3b26c6af3dd342b51fd051e...,0706016001 0706016002 0372860001 0610776002 07...
2,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,0706016001 0706016002 0372860001 0610776002 07...
3,00005ca1c9ed5f5146b52ac8639a40ca9d57aeff4d1bd2...,0706016001 0706016002 0372860001 0610776002 07...
4,00006413d8573cd20ed7128e53b7b13819fe5cfc2d801f...,0706016001 0706016002 0372860001 0610776002 07...


In [99]:
article_idx_to_id = dict(zip(articles_df['article_id_idx'], articles_df['article_id']))

In [100]:
predictions_article_id = [[article_idx_to_id[i] for i in l] for l in predictions.tolist()]
predictions_article_id[0]

['0624486001',
 '0573085042',
 '0759871025',
 '0624486064',
 '0685813040',
 '0679853030',
 '0715828013',
 '0714790020',
 '0624486069',
 '0874114002',
 '0865799006',
 '0835712003']

In [101]:
predictions_strings = [' '.join(l) for l in predictions_article_id]
predictions_strings[0]

'0624486001 0573085042 0759871025 0624486064 0685813040 0679853030 0715828013 0714790020 0624486069 0874114002 0865799006 0835712003'

In [102]:
submission_set_filt = pd.read_csv('data/final_submission_inputs_with_counts_and_age_extended_for_submission.tsv', sep='\t')
print(submission_set_filt.shape)
submission_set_filt.head()

(63412, 8)


Unnamed: 0,customer_id_idx,article_id_idx_last10,article_id_idx_label,FN,Active,age,num_purchases,example_age
0,0,"74711,2861,92243,42086,42086,59253,53988,73222...",0,0.0,0.0,0.27,1,-0.05
1,1,"100157,102878,101898,102115,33833,27875,98220,...",0,1.0,1.0,0.33,1,-0.07
2,2,"61234,13023,91804,61236,17917,84492,61235,6961...",0,0.0,0.0,0.29,1,-0.06
3,3,"103079,104210,103080,4736,103285,104379,102244...",0,0.0,0.0,0.23,1,-0.06
4,4,"103984,76557,102870,99876,102870,99876,96449,9...",0,1.0,1.0,0.42,1,-0.06


In [103]:
len(predictions_strings)

63412

In [104]:
customer_idx_to_id = dict(zip(customers['customer_id_idx'], customers['customer_id']))

In [105]:
submission_set_filt['customer_id'] = submission_set_filt['customer_id_idx'].map(customer_idx_to_id)

In [106]:
submission_set_filt['prediction_model'] = predictions_strings

In [107]:
submission_set_filt.isnull().sum()

customer_id_idx          0
article_id_idx_last10    0
article_id_idx_label     0
FN                       0
Active                   0
age                      0
num_purchases            0
example_age              0
customer_id              0
prediction_model         0
dtype: int64

In [108]:
submission_set_filt.head()

Unnamed: 0,customer_id_idx,article_id_idx_last10,article_id_idx_label,FN,Active,age,num_purchases,example_age,customer_id,prediction_model
0,0,"74711,2861,92243,42086,42086,59253,53988,73222...",0,0.0,0.0,0.27,1,-0.05,00039306476aaf41a07fed942884f16b30abfa83a2a8be...,0624486001 0573085042 0759871025 0624486064 06...
1,1,"100157,102878,101898,102115,33833,27875,98220,...",0,1.0,1.0,0.33,1,-0.07,0003e867a930d0d6842f923d6ba7c9b77aba33fe2a0fbf...,0827487003 0673677023 0536139006 0897756003 06...
2,2,"61234,13023,91804,61236,17917,84492,61235,6961...",0,0.0,0.0,0.29,1,-0.06,000493dd9fc463df1acc2081450c9e75ef8e87d5dd17ed...,0757926001 0788575004 0640021019 0788575002 07...
3,3,"103079,104210,103080,4736,103285,104379,102244...",0,0.0,0.0,0.23,1,-0.06,00077dbd5c4a4991e092e63893ccf29294a9d5c46e8501...,0903762001 0448509014 0158340001 0879189005 09...
4,4,"103984,76557,102870,99876,102870,99876,96449,9...",0,1.0,1.0,0.42,1,-0.06,000fb6e772c5d0023892065e659963da90b1866035558e...,0871519008 0889669006 0913272003 0919273004 07...


In [109]:
sample_submission = sample_submission.merge(submission_set_filt[['customer_id', 'prediction_model']], on='customer_id', how='left')
sample_submission.head()

Unnamed: 0,customer_id,prediction,prediction_model
0,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,0706016001 0706016002 0372860001 0610776002 07...,
1,0000423b00ade91418cceaf3b26c6af3dd342b51fd051e...,0706016001 0706016002 0372860001 0610776002 07...,
2,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,0706016001 0706016002 0372860001 0610776002 07...,
3,00005ca1c9ed5f5146b52ac8639a40ca9d57aeff4d1bd2...,0706016001 0706016002 0372860001 0610776002 07...,
4,00006413d8573cd20ed7128e53b7b13819fe5cfc2d801f...,0706016001 0706016002 0372860001 0610776002 07...,


In [110]:
sample_submission.isnull().sum() / sample_submission.shape[0]

customer_id         0.000000
prediction          0.000000
prediction_model    0.953781
dtype: float64

In [111]:
sample_submission['prediction_model'].fillna(
    '0751471001 0706016001 0372860002 0610776002 0448509014 0730683050 0918292001 0760084003 0866731001 0372860001 0827968001 0706016003',
    inplace=True
)

In [112]:
sample_submission.drop(columns=['prediction'], inplace=True)
sample_submission.rename(columns={'prediction_model': 'prediction'}, inplace=True)

In [113]:
sample_submission

Unnamed: 0,customer_id,prediction
0,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,0751471001 0706016001 0372860002 0610776002 04...
1,0000423b00ade91418cceaf3b26c6af3dd342b51fd051e...,0751471001 0706016001 0372860002 0610776002 04...
2,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,0751471001 0706016001 0372860002 0610776002 04...
3,00005ca1c9ed5f5146b52ac8639a40ca9d57aeff4d1bd2...,0751471001 0706016001 0372860002 0610776002 04...
4,00006413d8573cd20ed7128e53b7b13819fe5cfc2d801f...,0751471001 0706016001 0372860002 0610776002 04...
...,...,...
1371975,ffffbbf78b6eaac697a8a5dfbfd2bfa8113ee5b403e474...,0751471001 0706016001 0372860002 0610776002 04...
1371976,ffffcd5046a6143d29a04fb8c424ce494a76e5cdf4fab5...,0751471001 0706016001 0372860002 0610776002 04...
1371977,ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1...,0751471001 0706016001 0372860002 0610776002 04...
1371978,ffffd7744cebcf3aca44ae7049d2a94b87074c3d4ffe38...,0751471001 0706016001 0372860002 0610776002 04...


In [114]:
sample_submission.to_csv('data/20220508-fashionrecv3_F_all_data_300ep.csv', index=False)

0.0137 Public Leaderboard (>05/2020, 200 epochs)

0.0147 Public Leaderboard (all, 100 epochs)

0.0155 Public Leaderboard (all, 200 epochs)

0.0155 Public Leaderboard (all, 300 epochs)