In [1]:
import pandas as pd
import numpy as np
import torch

In [2]:
import random
import torch

seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [3]:
x = pd.read_csv('pca_train.csv')
y = pd.read_csv('train_label.csv')['label']
test = pd.read_csv('pca_test.csv')

In [4]:
print(x.shape)
print(y.shape)
print(test.shape)

(27671, 256)
(27671,)
(13629, 256)


In [5]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x = scaler.fit_transform(x)
test = scaler.transform(test)

In [6]:
device = torch.device('mps')

In [7]:
x_tensor = torch.FloatTensor(x)#.to(device)
y_tensor = torch.LongTensor(y)#.to(device)
test_tensor = torch.FloatTensor(test)#.to(device)

In [8]:
print(x_tensor.shape)
print(y_tensor.shape)
print(test_tensor.shape)

torch.Size([27671, 256])
torch.Size([27671])
torch.Size([13629, 256])


In [9]:
len(y.unique())

198

In [10]:
linear1 = torch.nn.Linear(x.shape[1], 512, bias=True)
linear2 = torch.nn.Linear(512, 512, bias=True)
linear3 = torch.nn.Linear(512, len(y.unique()), bias=True)
sigmoid = torch.nn.Sigmoid()

model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid, linear3)

In [11]:
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1)

epochs = 2000

for epoch in range(epochs):
    optimizer.zero_grad()
    hypothesis = model(x_tensor)
    cost = loss(hypothesis, y_tensor)
    cost.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}' .format(
            epoch, epochs, cost.item()
        ))

Epoch    0/2000 Cost: 5.310937
Epoch  100/2000 Cost: 4.683626
Epoch  200/2000 Cost: 3.673515
Epoch  300/2000 Cost: 2.576214
Epoch  400/2000 Cost: 2.204597
Epoch  500/2000 Cost: 1.984971
Epoch  600/2000 Cost: 1.842402
Epoch  700/2000 Cost: 1.736564
Epoch  800/2000 Cost: 1.652856
Epoch  900/2000 Cost: 1.584927
Epoch 1000/2000 Cost: 1.530155
Epoch 1100/2000 Cost: 1.485476
Epoch 1200/2000 Cost: 1.447931
Epoch 1300/2000 Cost: 1.419337
Epoch 1400/2000 Cost: 1.391819
Epoch 1500/2000 Cost: 1.367651
Epoch 1600/2000 Cost: 1.345977
Epoch 1700/2000 Cost: 1.326213
Epoch 1800/2000 Cost: 1.307922
Epoch 1900/2000 Cost: 1.290783


In [12]:
predict = torch.argmax(model(test_tensor), dim=1)

In [13]:
predict

tensor([113,  91,  68,  ...,  23,  68, 117])

In [14]:
submit = pd.read_csv('sample_submission.csv')
submit.label = predict
submit.to_csv('submit.csv', index=False)