In [1]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [2]:
train = pd.read_csv("/ML/ML Projects/calculate calories/playground-series-s5e5/train.csv")
test = pd.read_csv("/ML/ML Projects/calculate calories/playground-series-s5e5/test.csv")

In [3]:
train.columns

Index(['id', 'Sex', 'Age', 'Height', 'Weight', 'Duration', 'Heart_Rate',
       'Body_Temp', 'Calories'],
      dtype='object')

In [4]:
test.columns

Index(['id', 'Sex', 'Age', 'Height', 'Weight', 'Duration', 'Heart_Rate',
       'Body_Temp'],
      dtype='object')

In [5]:
df =  pd.get_dummies(train, columns=["Sex"], dtype=int)
df.columns

Index(['id', 'Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp',
       'Calories', 'Sex_female', 'Sex_male'],
      dtype='object')

In [6]:
df = df.drop(columns = ['id', 'Calories'])

In [7]:
df.head()

Unnamed: 0,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Sex_female,Sex_male
0,36,189.0,82.0,26.0,101.0,41.0,0,1
1,64,163.0,60.0,8.0,85.0,39.7,1,0
2,51,161.0,64.0,7.0,84.0,39.8,1,0
3,20,192.0,90.0,25.0,105.0,40.7,0,1
4,38,166.0,61.0,25.0,102.0,40.6,1,0


Now we can create our independent (predictors) and dependent (target) variables. They both need to be PyTorch tensors. Our dependent variable is Calories:

In [8]:
from torch import tensor

t_dep = tensor(train.Calories)

In [9]:
t_indep = tensor(df.values, dtype=torch.float)
t_indep

tensor([[ 36.0000, 189.0000,  82.0000,  ...,  41.0000,   0.0000,   1.0000],
        [ 64.0000, 163.0000,  60.0000,  ...,  39.7000,   1.0000,   0.0000],
        [ 51.0000, 161.0000,  64.0000,  ...,  39.8000,   1.0000,   0.0000],
        ...,
        [ 60.0000, 162.0000,  67.0000,  ...,  40.9000,   0.0000,   1.0000],
        [ 45.0000, 182.0000,  91.0000,  ...,  40.3000,   0.0000,   1.0000],
        [ 39.0000, 171.0000,  65.0000,  ...,  40.6000,   1.0000,   0.0000]])

In [10]:
t_indep.shape

torch.Size([750000, 8])

# Setting up a linear model

Now that we've got a matrix of independent variables and a dependent variable vector, we can work on calculating our predictions and our loss. In this section, we're going to manually do a single step of calculating predictions and loss for every row of our data.
Our first model will be a simple linear model. We'll need a coefficient for each column in t_indep. We'll pick random numbers in the range (-0.5,0.5), and set our manual seed.

In [11]:
torch.manual_seed(442)

n_coeff = t_indep.shape[1]
coeffs = torch.rand(n_coeff)-0.5
coeffs

tensor([-0.4629,  0.1386,  0.2409, -0.2262, -0.2632, -0.3147,  0.4876,  0.3136])

Our predictions will be calculated by multiplying each row by the coefficients, and adding them up. 

In [12]:
t_indep*coeffs

tensor([[-16.6644,  26.1915,  19.7539,  ..., -12.9033,   0.0000,   0.3136],
        [-29.6256,  22.5884,  14.4540,  ..., -12.4942,   0.4876,   0.0000],
        [-23.6079,  22.3112,  15.4177,  ..., -12.5257,   0.4876,   0.0000],
        ...,
        [-27.7740,  22.4498,  16.1404,  ..., -12.8719,   0.0000,   0.3136],
        [-20.8305,  25.2214,  21.9220,  ..., -12.6830,   0.0000,   0.3136],
        [-18.0531,  23.6970,  15.6586,  ..., -12.7775,   0.4876,   0.0000]])

Let's make all the columns contain numbers from 0 to 1, by dividing each column by its max():

In [13]:
vals,indices = t_indep.max(dim=0)
t_indep = t_indep / vals

In [14]:
val_dep = t_dep.max()
# t_dep = t_dep/val_dep

In [15]:
t_indep*coeffs

tensor([[-0.2109,  0.1180,  0.1497,  ..., -0.3109,  0.0000,  0.3136],
        [-0.3750,  0.1017,  0.1095,  ..., -0.3011,  0.4876,  0.0000],
        [-0.2988,  0.1005,  0.1168,  ..., -0.3018,  0.4876,  0.0000],
        ...,
        [-0.3516,  0.1011,  0.1223,  ..., -0.3102,  0.0000,  0.3136],
        [-0.2637,  0.1136,  0.1661,  ..., -0.3056,  0.0000,  0.3136],
        [-0.2285,  0.1067,  0.1186,  ..., -0.3079,  0.4876,  0.0000]])

We can now create predictions from our linear model, by adding up the rows of the product:

In [16]:
preds = (t_indep*coeffs).sum(axis=1)

In [17]:
preds[:10]

tensor([-0.3444, -0.2123, -0.1213, -0.2326, -0.2262, -0.1212,  0.1255, -0.3555,
        -0.2028, -0.4894])

In [18]:
loss = torch.abs(preds-t_dep).mean()
loss

tensor(88.4948, dtype=torch.float64)

Creating functions for pred and loss:

In [19]:
def calc_preds(coeffs, indeps): return (indeps*coeffs).sum(axis=1)
def calc_loss(coeffs, indeps, deps): return torch.abs(calc_preds(coeffs, indeps)-deps).mean()

# Doing a gradient descent step

In this section, we're going to do a single "epoch" of gradient descent manually. The only thing we're going to automate is calculating gradients, because let's face it that's pretty tedious and entirely pointless to do by hand! To get PyTorch to calculate gradients, we'll need to call requires_grad_() on our coeffs

In [20]:
coeffs.requires_grad_()

tensor([-0.4629,  0.1386,  0.2409, -0.2262, -0.2632, -0.3147,  0.4876,  0.3136],
       requires_grad=True)

Now when we calculate our loss, PyTorch will keep track of all the steps, so we'll be able to get the gradients afterwards:

In [21]:
loss = calc_loss(coeffs, t_indep, t_dep)
loss

tensor(88.4948, dtype=torch.float64, grad_fn=<MeanBackward0>)

In [22]:
loss.backward()

In [23]:
coeffs.grad

tensor([-0.5243, -0.7869, -0.5693, -0.5140, -0.7460, -0.9647, -0.5010, -0.4990])

In [24]:
loss = calc_loss(coeffs, t_indep, t_dep)
loss.backward()
coeffs.grad

tensor([-1.0486, -1.5739, -1.1386, -1.0281, -1.4919, -1.9295, -1.0019, -0.9981])

.grad values are have doubled. That's because it added the gradients a second time. For this reason, after we use the gradients to do a gradient descent step, we need to set them back to zero.

We can now do one gradient descent step, and check that our loss decreases:

In [25]:
loss = calc_loss(coeffs, t_indep, t_dep)
loss.backward()
with torch.no_grad():
    coeffs.sub_(coeffs.grad * 0.1)
    coeffs.grad.zero_()
    print(calc_loss(coeffs, t_indep, t_dep))

tensor(87.4539, dtype=torch.float64)


In PyTorch, any method that ends in _ changes its object in-place.
a.sub_(b) subtracts b from a in-place.
a.zero_() sets all elements of a tensor to zero.

# Training the linear model

In [26]:
from fastai.data.transforms import RandomSplitter
trn_split,val_split=RandomSplitter(seed=42)(df)

In [27]:
trn_indep,val_indep = t_indep[trn_split],t_indep[val_split]
trn_dep,val_dep = t_dep[trn_split],t_dep[val_split]
len(trn_indep),len(val_indep)

(600000, 150000)

In [28]:
def update_coeffs(coeffs, lr):
    coeffs.sub_(coeffs.grad * lr)
    coeffs.grad.zero_()

In [29]:
def one_epoch(coeffs, lr):
    loss = calc_loss(coeffs, trn_indep, trn_dep)
    loss.backward()
    with torch.no_grad(): update_coeffs(coeffs, lr)
    print(f"{loss:.3f}", end="; ")

In [30]:
def init_coeffs(): return (torch.rand(n_coeff)-0.5).requires_grad_()

In [31]:
def train_model(epochs=30, lr=0.01):
    torch.manual_seed(442)
    coeffs = init_coeffs()
    for i in range(epochs): one_epoch(coeffs, lr=lr)
    return coeffs

In [32]:
coeffs = train_model(1200, lr=0.5)

88.525; 86.791; 85.068; 83.399; 81.791; 80.250; 78.792; 77.408; 76.090; 74.844; 73.663; 72.537; 71.462; 70.437; 69.460; 68.528; 67.642; 66.796; 65.987; 65.215; 64.478; 63.772; 63.098; 62.452; 61.835; 61.243; 60.674; 60.127; 59.601; 59.097; 58.613; 58.148; 57.700; 57.270; 56.855; 56.455; 56.071; 55.702; 55.349; 55.012; 54.687; 54.376; 54.077; 53.789; 53.512; 53.246; 52.990; 52.742; 52.504; 52.274; 52.053; 51.839; 51.633; 51.434; 51.243; 51.059; 50.882; 50.712; 50.547; 50.388; 50.234; 50.085; 49.942; 49.803; 49.669; 49.540; 49.415; 49.294; 49.177; 49.064; 48.955; 48.849; 48.747; 48.648; 48.552; 48.459; 48.368; 48.280; 48.195; 48.112; 48.031; 47.953; 47.876; 47.802; 47.730; 47.660; 47.591; 47.524; 47.459; 47.395; 47.333; 47.273; 47.213; 47.156; 47.099; 47.044; 46.990; 46.937; 46.885; 46.834; 46.785; 46.736; 46.689; 46.642; 46.596; 46.551; 46.507; 46.463; 46.421; 46.379; 46.337; 46.297; 46.257; 46.217; 46.178; 46.139; 46.102; 46.064; 46.027; 45.990; 45.954; 45.918; 45.883; 45.848; 45.814; 

The coefficients for each column:

In [33]:
def show_coeffs(): return dict(zip(df.columns, coeffs.requires_grad_(False)))
show_coeffs()

{'Age': tensor(5.1482),
 'Height': tensor(-6.9203),
 'Weight': tensor(-4.7393),
 'Duration': tensor(132.3190),
 'Heart_Rate': tensor(28.0554),
 'Body_Temp': tensor(-0.2163),
 'Sex_female': tensor(-2.4490),
 'Sex_male': tensor(-4.9456)}

# Measuring RMSLE

The Kaggle competition is not, however, scored by absolute error (which is our loss function). It's scored by RMSLE. Let's see how accurate we were on the validation set. First, calculate the predictions:

In [35]:
preds = calc_preds(coeffs, val_indep)

In [36]:
results = preds
results[:20]

tensor([ 56.8126,  36.8158,  27.5813,  64.0966, 117.1311,  81.6798,  80.8388,
         93.5057, 143.2396,  52.3924,  96.7526, 118.6191, 136.0366,  71.3817,
         73.8211,  85.4227,  65.8850,  73.1081,  38.7274,  14.0739])

In [82]:
def rmsle(pred, val_dep):
    rmsle = torch.tensor((torch.log(1+pred) - torch.log(1+val_dep)).mean())
    return rmsle

In [38]:
rmsle(preds, val_dep)

tensor(0.0955, dtype=torch.float64)

# Submitting to Kaggle

In [39]:
test.isna().sum()

id            0
Sex           0
Age           0
Height        0
Weight        0
Duration      0
Heart_Rate    0
Body_Temp     0
dtype: int64

In [40]:
test_df = test.copy()

In [41]:
test_df =  pd.get_dummies(test_df, columns=["Sex"], dtype=int)

In [42]:
test_df = test_df.drop(columns = ['id'])

In [43]:
tst_indep = tensor(test_df.values, dtype=torch.float)
tst_indep = tst_indep/vals

In [44]:
preds_test = calc_preds(coeffs, tst_indep)

In [45]:
test['Calories'] = preds_test

In [46]:
sub_df = test[['id','Calories']]
sub_df.to_csv('sub.csv', index=False)

In [47]:
!head sub.csv

id,Calories
750000,39.297047
750001,97.17057
750002,83.24481
750003,103.56349
750004,82.66986
750005,36.76356
750006,48.839397
750007,15.873104
750008,22.898008


# Using Relu

Since, we need predictions starting from 0, we can use Relu function for our predictions

In [48]:
def calc_preds(coeffs, indeps): 
    tot = (indeps*coeffs).sum(axis=1)
    return torch.relu(tot)  # ReLu
def calc_loss(coeffs, indeps, deps): return torch.abs(calc_preds(coeffs, indeps)-deps).mean()

In [49]:
def update_coeffs(coeffs, lr):
    coeffs.sub_(coeffs.grad * lr)
    coeffs.grad.zero_()

In [50]:
def one_epoch(coeffs, lr):
    loss = calc_loss(coeffs, trn_indep, trn_dep)
    loss.backward()
    with torch.no_grad(): update_coeffs(coeffs, lr)
    print(f"{loss:.3f}", end="; ")

In [51]:
def init_coeffs(): return (torch.rand(n_coeff)-0.5).requires_grad_()

In [52]:
def train_model(epochs=30, lr=0.01):
    torch.manual_seed(442)
    coeffs = init_coeffs()
    for i in range(epochs): one_epoch(coeffs, lr=lr)
    return coeffs

In [53]:
coeffs = train_model(1200, lr=0.5)

88.309; 88.296; 88.077; 86.466; 84.748; 83.091; 81.493; 79.966; 78.523; 77.152; 75.849; 74.616; 73.447; 72.331; 71.265; 70.250; 69.282; 68.359; 67.481; 66.642; 65.841; 65.076; 64.345; 63.646; 62.977; 62.337; 61.725; 61.138; 60.573; 60.030; 59.509; 59.008; 58.528; 58.067; 57.623; 57.195; 56.783; 56.386; 56.005; 55.639; 55.290; 54.955; 54.633; 54.324; 54.027; 53.741; 53.467; 53.202; 52.948; 52.703; 52.466; 52.238; 52.018; 51.805; 51.601; 51.404; 51.214; 51.032; 50.856; 50.686; 50.523; 50.365; 50.212; 50.064; 49.922; 49.784; 49.651; 49.522; 49.398; 49.278; 49.162; 49.050; 48.941; 48.836; 48.735; 48.636; 48.541; 48.448; 48.358; 48.271; 48.186; 48.103; 48.023; 47.945; 47.869; 47.795; 47.723; 47.653; 47.585; 47.519; 47.454; 47.390; 47.329; 47.268; 47.209; 47.152; 47.095; 47.040; 46.986; 46.934; 46.882; 46.832; 46.782; 46.734; 46.687; 46.640; 46.594; 46.549; 46.505; 46.462; 46.420; 46.378; 46.337; 46.296; 46.256; 46.217; 46.178; 46.139; 46.101; 46.064; 46.027; 45.991; 45.954; 45.919; 45.884; 

In [54]:
show_coeffs()

{'Age': tensor(5.1262),
 'Height': tensor(-6.8705),
 'Weight': tensor(-4.7058),
 'Duration': tensor(132.1092),
 'Heart_Rate': tensor(28.0378),
 'Body_Temp': tensor(-0.1705),
 'Sex_female': tensor(-2.4075),
 'Sex_male': tensor(-4.9245)}

The coeff are similar to previous predictions for same no of epochs and learning rate. Further, we will see how it performs for validation and test sets.

In [55]:
preds = calc_preds(coeffs, val_indep)

In [56]:
rmsle(preds, val_dep)

tensor(0.0962, dtype=torch.float64)

In [57]:
preds_test = calc_preds(coeffs, tst_indep)

In [58]:
test['Calories'] = preds_test

In [59]:
sub_df = test[['id','Calories']]
sub_df.to_csv('sub_relu.csv', index=False)

In [60]:
!head sub_relu.csv

id,Calories
750000,39.348866
750001,97.14482
750002,83.26062
750003,103.54146
750004,82.678665
750005,36.837868
750006,48.876278
750007,15.959247
750008,22.983482


# Using Matrix

In [61]:
(val_indep*coeffs).sum(axis=1)

tensor([56.8435, 36.8913, 27.6741,  ..., 36.3021, 56.0637, 47.2507])

Multiplying elements together and then adding across rows is identical to doing a matrix-vector product! Python uses the @ operator to indicate matrix products, and is supported by PyTorch tensors. Therefore, we can replicate the above calculate more simply like so:

In [62]:
val_indep@coeffs

tensor([56.8435, 36.8913, 27.6741,  ..., 36.3021, 56.0637, 47.2507])

This is faster as matrix multiplication in pytorch is highly optimized

In order to do matrix-matrix products, we need to turn coeffs into a column vector (i.e. a matrix with a single column), which we can do by passing a second argument 1 to torch.rand(), indicating that we want our coefficients to have one column:

In [63]:
def init_coeffs(): return (torch.rand(n_coeff, 1)-0.5).requires_grad_()

We'll also need to turn our dependent variable into a column vector, which we can do by indexing the column dimension with the special value None, which tells PyTorch to add a new dimension in this position:

In [64]:
trn_dep = trn_dep[:,None]
val_dep = val_dep[:,None]

In [65]:
lr = 0.5
epochs = 1200
def train_model(epochs = 100, lr = 0.1):
    torch.manual_seed(442)
    coeffs = init_coeffs()
    for i in range(epochs):
        preds = torch.relu(trn_indep@coeffs)
        loss = torch.abs((preds) - trn_dep).mean()
        loss.backward()
        with torch.no_grad():
            coeffs.sub_(coeffs.grad * lr)
            coeffs.grad.zero_()
        print(f"{loss:.3f}", end="; ")
    return coeffs

In [66]:
coeffs = train_model(1200, lr = 0.5)

88.309; 88.296; 88.077; 86.466; 84.748; 83.091; 81.493; 79.966; 78.523; 77.153; 75.849; 74.616; 73.447; 72.332; 71.265; 70.251; 69.282; 68.359; 67.482; 66.643; 65.841; 65.076; 64.345; 63.646; 62.977; 62.337; 61.725; 61.137; 60.573; 60.030; 59.508; 59.008; 58.528; 58.066; 57.622; 57.194; 56.782; 56.385; 56.004; 55.638; 55.289; 54.954; 54.632; 54.323; 54.026; 53.741; 53.466; 53.201; 52.947; 52.702; 52.465; 52.237; 52.017; 51.805; 51.600; 51.403; 51.214; 51.031; 50.856; 50.686; 50.523; 50.365; 50.212; 50.065; 49.922; 49.785; 49.652; 49.523; 49.399; 49.279; 49.163; 49.051; 48.942; 48.837; 48.736; 48.637; 48.542; 48.449; 48.359; 48.272; 48.187; 48.104; 48.024; 47.946; 47.870; 47.797; 47.725; 47.655; 47.587; 47.520; 47.455; 47.392; 47.330; 47.270; 47.211; 47.153; 47.097; 47.042; 46.988; 46.935; 46.884; 46.833; 46.784; 46.736; 46.688; 46.641; 46.596; 46.551; 46.507; 46.464; 46.421; 46.379; 46.338; 46.298; 46.258; 46.218; 46.179; 46.141; 46.103; 46.065; 46.029; 45.992; 45.956; 45.920; 45.885; 

The loss observed during using matrix operation and ReLu function is almost same as before. 

In [80]:
def rmsle(pred, val_dep):
    pred = pred.reshape(-1,1)
    rmsle = torch.tensor((torch.log(1+pred) - torch.log(1+val_dep)).mean())
    return rmsle

In [81]:
rmsle(preds, val_dep)

tensor(0.0962, dtype=torch.float64)

RMSLE is same for validation set. Also, when using ones instead of initializing using random values, we get the same loss.