In [1]:
import pandas as pd
import numpy as np
import torch
from torch import optim
from sklearn.preprocessing import LabelEncoder, StandardScaler
import pandas_profiling

In [2]:
import random

seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [3]:
x = pd.read_csv('train_data.csv').drop(['ID', 'price'], axis=1)
y = pd.read_csv('train_data.csv')['price']
test = pd.read_csv('test_data.csv').drop('ID', axis=1)

In [4]:
print(x.shape)
print(y.shape)
y = y.drop(x[(x.year == 2060)].index)
x = x.drop(x[(x.year == 2060)].index)
print(x.shape)
print(y.shape)

(17085, 9)
(17085,)
(17084, 9)
(17084,)


In [5]:
x.year = x.year - 1997
test.year = test.year - 1997

In [6]:
encoder = LabelEncoder()
encoder.fit(pd.concat([x['model'], test['model']]).drop_duplicates())
x['model'] = encoder.transform(x['model'])
test['model'] = encoder.transform(test['model'])

In [7]:
scaler = StandardScaler()
x = scaler.fit_transform(x)
test = scaler.transform(test)

In [8]:
x_tensor = torch.FloatTensor(x)
y_tensor = torch.FloatTensor(np.array(y).reshape(-1, 1))
test_tensor = torch.FloatTensor(test)

In [9]:
print(x_tensor.shape)
print(y_tensor.shape)
print(test_tensor.shape)

torch.Size([17084, 9])
torch.Size([17084, 1])
torch.Size([16831, 9])


In [10]:
W = torch.zeros((x.shape[1], 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

In [11]:
W.shape

torch.Size([9, 1])

In [12]:
optimizer = optim.SGD([W, b], lr=0.001)

In [13]:
epochs = 100000
for epoch in range(epochs + 1):
    
    hypothesis = x_tensor.matmul(W) + b

    # cost 계산
    cost = torch.mean(torch.abs((hypothesis - y_tensor)))

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print('Epoch {:4d}/{} shape: {} {}'.format(epoch, epochs, torch.mean(hypothesis), torch.mean(cost)))

Epoch    0/100000 shape: 0.0 16820.369140625
Epoch  100/100000 shape: 0.10000013560056686 16820.2734375
Epoch  200/100000 shape: 0.20000027120113373 16820.16796875
Epoch  300/100000 shape: 0.29999974370002747 16820.0703125
Epoch  400/100000 shape: 0.39999860525131226 16819.96484375
Epoch  500/100000 shape: 0.49999719858169556 16819.8671875
Epoch  600/100000 shape: 0.5999959707260132 16819.771484375
Epoch  700/100000 shape: 0.699994683265686 16819.666015625
Epoch  800/100000 shape: 0.7999935150146484 16819.568359375
Epoch  900/100000 shape: 0.8999921083450317 16819.462890625
Epoch 1000/100000 shape: 0.9999906420707703 16819.369140625
Epoch 1100/100000 shape: 1.099995493888855 16819.2734375
Epoch 1200/100000 shape: 1.2000001668930054 16819.16796875
Epoch 1300/100000 shape: 1.3000048398971558 16819.072265625
Epoch 1400/100000 shape: 1.400009274482727 16818.966796875
Epoch 1500/100000 shape: 1.500013828277588 16818.869140625
Epoch 1600/100000 shape: 1.6000192165374756 16818.771484375
Epoch

In [14]:
submit = pd.read_csv('sample_submit.csv')
submit['price'] = np.array((test_tensor @ W  + b).detach())
submit.to_csv('submit.csv', index=False)
