In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as f
import torch
from torch import nn

ramen = pd.read_csv("../input/ramen-ratings/ramen-ratings.csv")
del ramen["Top Ten"] 
ramen = ramen[ramen['Stars'] != "Unrated"]
test = ramen.iloc[2300:]
train = ramen.iloc[:2300]
print("len train :" + str(len(train)))
print("len test :" + str(len(test)))
print("len whole dataset :" + str(len(ramen)))
ramen.head()

In [None]:
class RamenDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, df):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.df = df
        self.index_ramen = self.init_indexes(df)
        # index for each data
    def init_indexes(self, df):
        return {k:list(set(v)) + ['UNK'] for k,v in  train.iteritems()}
    def __len__(self):
        return len(self.df)
    

    def __getitem__(self, idx):
        entry = self.df.iloc[idx]
        del entry['Review #']
        rate = entry['Stars']
        del entry['Stars']
        x_obs = [self.index_ramen[k].index(v) if v in self.index_ramen[k] else self.index_ramen[k].index('UNK')
                 for k,v in entry.iteritems()]
        sample = {
            "x_obs" : torch.LongTensor(x_obs),
            "y_target" : torch.Tensor([float(rate)/5.00])
        }
        return sample
ramen_train_dt = RamenDataset(train)
ramen_train_dt[3]

In [None]:
class Ramen_Logistic(nn.Module):
    def __init__(self):
        super().__init__()
        self.lin = nn.Linear(200, 400)
        self.int = nn.Linear(400, 1)
        self.emb_brand = nn.Embedding(len(ramen_train_dt.index_ramen["Brand"]),200)
        self.emb_variety = nn.Embedding(len(ramen_train_dt.index_ramen["Variety"]),200)
        self.emb_style = nn.Embedding(len(ramen_train_dt.index_ramen["Style"]),200)
        self.emb_country = nn.Embedding(len(ramen_train_dt.index_ramen["Country"]),200)
    def forward(self, xb):
        xb = xb.t()
        emb = self.emb_brand(xb[0]) + self.emb_variety(xb[1]) + self.emb_style(xb[2]) + self.emb_country(xb[3])
        return self.int(f.relu(self.lin(emb)))

In [None]:
model = Ramen_Logistic()
loss_func = nn.MSELoss()
opt = torch.optim.Adam(model.parameters(), 0.001)
for epoch in range(30):
    for batch in DataLoader(ramen_train_dt, batch_size=16):
        x_obs = batch["x_obs"]
        y_target = batch["y_target"]
        pred = model(x_obs)
        loss = loss_func(y_target, pred)
        loss.backward()
        opt.step()
        opt.zero_grad()
    print("epoch #{} : {}".format(epoch, loss))

In [None]:
score = nn.MSELoss()
targets = []
predictions = []
model.eval()
ramen_test_dt = RamenDataset(test)
with torch.no_grad():
    for i in DataLoader(ramen_test_dt, batch_size=1):
        x_obs = batch["x_obs"]
        y_target = batch["y_target"]
        pred = model(x_obs)
        targets.append(y_target)
        predictions.append(pred)
print("predictions :",([round(i*5,2) for i in torch.cat(predictions)[:30].t().tolist()[0]]))
print("target :",([round(i*5,2) for i in torch.cat(targets)[:30].t().tolist()[0]]))
print(score(torch.cat(predictions), torch.cat(targets)))