In [1]:
import torch.nn as nn
import pandas as pd
import numpy as np
import torchvision.transforms as transforms
import torch.cuda
import torch.optim
from sklearn.preprocessing import MinMaxScaler

In [2]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [4]:
trainset = pd.read_csv('trainset.csv', index_col=0)
testset = pd.read_csv('testset.csv', index_col=0)

header = list(trainset)

y_train = trainset['gross']
y_test = testset['gross']
# header.remove('gross')
header.remove('vote')
header.remove('metascore')
header.remove('imdb')
header.remove('movie')
# header.remove('day')
# header.remove('month')
X_train = trainset.loc[:, header]
X_test = testset.loc[:, header]
X_train = X_train.astype('float32') 
X_test = X_test.astype('float32')
y_train = y_train.astype('float32')
y_test = y_test.astype('float32')

num_features = X_train.shape[1]

normalizer = MinMaxScaler()
normalizer.fit(X_train)
X_train = normalizer.transform(X_train)


X = torch.from_numpy(X_train)
y = torch.tensor(y_train.values)

In [23]:
X.shape

torch.Size([4165, 183])

In [44]:
model = nn.Sequential(
    nn.Linear(num_features, 512),
    nn.ReLU(),
    nn.Linear(512, 256),
    nn.ReLU(),
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Linear(64, 32),
    nn.ReLU(),
    nn.Linear(32, 1)
)

In [50]:
max_epoch = 5000
learning_rate = 1e-4

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [51]:
# Training
model = model.to(device)
X = X.to(device)
y = y.to(device)

for epoch in range(max_epoch):
    optimizer.zero_grad()
    
    y_pred = model(X)
    loss = criterion(y_pred, y)
    if epoch % 100 == 0:
        print("Epoch:{}, Loss:{:.4f}".format(epoch, loss.item()))
 
    loss.backward()
 
    optimizer.step()

Epoch:0, Loss:3032130409988096.0000
Epoch:100, Loss:3032129873117184.0000
Epoch:200, Loss:3032129604681728.0000
Epoch:300, Loss:3032129067810816.0000
Epoch:400, Loss:3032128799375360.0000
Epoch:500, Loss:3032128262504448.0000
Epoch:600, Loss:3032127725633536.0000
Epoch:700, Loss:3032127457198080.0000
Epoch:800, Loss:3032127188762624.0000
Epoch:900, Loss:3032126920327168.0000
Epoch:1000, Loss:3032126651891712.0000
Epoch:1100, Loss:3032126115020800.0000
Epoch:1200, Loss:3032125846585344.0000
Epoch:1300, Loss:3032125578149888.0000
Epoch:1400, Loss:3032125041278976.0000
Epoch:1500, Loss:3032125041278976.0000
Epoch:1600, Loss:3032124504408064.0000
Epoch:1700, Loss:3032124504408064.0000
Epoch:1800, Loss:3032124235972608.0000
Epoch:1900, Loss:3032123699101696.0000
Epoch:2000, Loss:3032123699101696.0000
Epoch:2100, Loss:3032123699101696.0000
Epoch:2200, Loss:3032123162230784.0000
Epoch:2300, Loss:3032123162230784.0000
Epoch:2400, Loss:3032122893795328.0000
Epoch:2500, Loss:3032122356924416.000

In [47]:
print(model(X))

tensor([[20327150.],
        [20403086.],
        [20314036.],
        ...,
        [20359386.],
        [20309196.],
        [20322600.]], device='cuda:0', grad_fn=<AddmmBackward>)
