In [25]:
import pandas as pd
from sklearn.utils import shuffle
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from torch import nn
from numpy import sqrt

In [26]:
traind_df = pd.read_csv("/content/Train.csv")
test_df = pd.read_csv("/content/Test.csv")
traind_df.head()

Unnamed: 0,CustomerID,Gender,Age,Spending Score,Profession,Work Experience,Family Size,Annual Income
0,1,Male,19,39,Healthcare,1,4,15000
1,2,Male,21,81,Engineer,3,3,35000
2,3,Female,20,6,Engineer,1,1,86000
3,4,Female,23,77,Lawyer,0,2,59000
4,5,Female,31,40,Entertainment,2,6,38000


In [27]:
encoder ={
    "Gender": {"Male": 1 , "Female": 0},

        "Profession":{"Engineer": 1 , "Healthcare": 2 ,
                  "Lawyer": 3 , "Entertainment": 4 ,
                  "Artist": 5 , "Doctor": 6 ,
                  "Homemaker": 7 , "Executive": 8 ,
                  "Marketing": 9}
}

traind_df = traind_df.replace(encoder)
traind_df.head()

Unnamed: 0,CustomerID,Gender,Age,Spending Score,Profession,Work Experience,Family Size,Annual Income
0,1,1,19,39,2.0,1,4,15000
1,2,1,21,81,1.0,3,3,35000
2,3,0,20,6,1.0,1,1,86000
3,4,0,23,77,3.0,0,2,59000
4,5,0,31,40,4.0,2,6,38000


In [28]:
test_df = test_df.replace(encoder)
test_df.head()

Unnamed: 0,CustomerID,Gender,Age,Spending Score,Profession,Work Experience,Family Size,Annual Income
0,1601,0,24,79,4.0,0,7,51661
1,1602,0,71,90,5.0,1,6,152910
2,1603,1,1,64,4.0,0,7,114556
3,1604,1,0,62,2.0,1,6,143455
4,1605,1,30,92,2.0,0,3,173096


In [29]:
data = shuffle(traind_df)
for i in data:
  print(data[i].describe())


count    1599.000000
mean      800.000000
std       461.735855
min         1.000000
25%       400.500000
50%       800.000000
75%      1199.500000
max      1599.000000
Name: CustomerID, dtype: float64
count    1599.000000
mean        0.403377
std         0.490729
min         0.000000
25%         0.000000
50%         0.000000
75%         1.000000
max         1.000000
Name: Gender, dtype: float64
count    1599.000000
mean       48.800500
std        28.220739
min         0.000000
25%        25.000000
50%        47.000000
75%        73.000000
max        99.000000
Name: Age, dtype: float64
count    1599.000000
mean       50.635397
std        28.157660
min         0.000000
25%        27.000000
50%        50.000000
75%        75.000000
max       100.000000
Name: Spending Score, dtype: float64
count    1570.000000
mean        4.347771
std         2.159969
min         1.000000
25%         2.000000
50%         5.000000
75%         5.000000
max         9.000000
Name: Profession, dtype: float64
co

In [30]:
X_train_np = traind_df.to_numpy()[:, :-1]
Y_train_np = traind_df.to_numpy()[:, -1]

X_train_np.shape , Y_train_np.shape

((1599, 7), (1599,))

In [31]:
X_test_np = test_df.to_numpy()[:, :-1]
Y_test_np = test_df.to_numpy()[:, -1]

X_test_np.shape , Y_test_np.shape

((400, 7), (400,))

In [32]:
train_dataset = TensorDataset(torch.tensor(X_train_np, dtype=torch.float),
                              torch.tensor(Y_train_np.reshape((-1, 1)), dtype=torch.float))

train_dataset

<torch.utils.data.dataset.TensorDataset at 0x7ff398b8e080>

In [33]:
test_dataset = TensorDataset(torch.tensor(X_test_np, dtype=torch.float),
                              torch.tensor(Y_test_np.reshape((-1, 1)), dtype=torch.float))

test_dataset

<torch.utils.data.dataset.TensorDataset at 0x7ff398b8e3b0>

In [34]:
batch_size = 128

train_loader = DataLoader(train_dataset , batch_size)
test_loader = DataLoader(test_dataset , batch_size)

for X,Y in train_loader:
  print(X.shape , Y.shape)
  break

for X,Y in train_loader:
  print(X.shape , Y.shape)
  break

torch.Size([128, 7]) torch.Size([128, 1])
torch.Size([128, 7]) torch.Size([128, 1])


In [35]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f'using {device} device')

using cpu device


In [36]:
class CustomerNeuralNetwork(nn.Module):
  def __init__(self , input_dim , hidden_dim , out_dim):
    super(CustomerNeuralNetwork , self).__init__()

    self.first_layer = nn.Linear(input_dim , hidden_dim)
    self.first_activation = nn.ReLU()
    self.out_layer = nn.Linear(hidden_dim , out_dim)

  def forward(self , batch_input):
    batch_input = self.first_layer(batch_input)
    batch_input = self.first_activation(batch_input)
    batch_input = self.out_layer(batch_input)

    return batch_input

In [37]:
def train(dataloader , model, loss_function, optimizer):
  model.train()
  train_loss = 0

  for i , (X,Y) in enumerate(dataloader):
    X,Y = X.to(device) , Y.to(device)

    y_hat = model(X)
    mse = loss_function(y_hat , Y)
    train_loss += mse.item()

    optimizer.zero_grad()
    mse.backward()
    optimizer.step()

  num_batches = len(dataloader)
  train_mse = train_loss / num_batches
  print(f'Train RMSE : {sqrt(train_mse)}')

In [38]:
def test(dataloader , model , loss_function):
  model.eval()
  test_loss = 0

  with torch.no_grad():
    for i , (X,Y) in enumerate(dataloader):
      X,Y = X.to(device) , Y.to(device)

      y_hat = model(X)
      mse = loss_function(y_hat , Y)
      test_loss += mse.item()

  num_batches = len(dataloader)
  test_mse = test_loss / num_batches
  print(f'Test RMSE : {sqrt(test_mse)}')

In [39]:
input_dim = 7
hidden_dim = 5
output_dim = 1

model = CustomerNeuralNetwork(input_dim , hidden_dim , output_dim).to(device)
print(model)

CustomerNeuralNetwork(
  (first_layer): Linear(in_features=7, out_features=5, bias=True)
  (first_activation): ReLU()
  (out_layer): Linear(in_features=5, out_features=1, bias=True)
)


In [40]:
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [41]:
epochs = 10

for epoch in range(epochs):
  print(f'Epoch {epoch+1}:')

  train(train_loader , model , loss_function , optimizer)
  test(test_loader , model , loss_function)

Epoch 1:
Train RMSE : nan
Test RMSE : nan
Epoch 2:
Train RMSE : nan
Test RMSE : nan
Epoch 3:
Train RMSE : nan
Test RMSE : nan
Epoch 4:
Train RMSE : nan
Test RMSE : nan
Epoch 5:
Train RMSE : nan
Test RMSE : nan
Epoch 6:
Train RMSE : nan
Test RMSE : nan
Epoch 7:
Train RMSE : nan
Test RMSE : nan
Epoch 8:
Train RMSE : nan
Test RMSE : nan
Epoch 9:
Train RMSE : nan
Test RMSE : nan
Epoch 10:
Train RMSE : nan
Test RMSE : nan
