In [1]:
# 필요한 패키지 임포트
import random
import torch
import pandas as pd
import numpy as np

# 랜덤시드 고정
seed = 1
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [2]:
# 데이터 로드
train = pd.read_csv('../input/2022-ai-midterm-p2/train.csv')
test = pd.read_csv('../input/2022-ai-midterm-p2/test.csv')
submit = pd.read_csv('../input/2022-ai-midterm-p2/submit_sample.csv')

In [3]:
# 필요없는 column drop 및 정답 라벨 분리
y = train.drop('ID', axis=1).MEDV
test = test.drop('ID', axis=1)
x = train.drop(['ID', 'MEDV'], axis=1)

In [4]:
# 데이터 정규화

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
x = sc.fit_transform(x)
test = sc.transform(test)

In [5]:
# torch tensor로 옮기기
x_tensor = torch.FloatTensor(x)
y_tensor = torch.FloatTensor(y).reshape(-1, 1)
test_tensor = torch.FloatTensor(test)

In [6]:
# 모델 설계
layer1 = torch.nn.Linear(x_tensor.shape[1], 32)
layer2 = torch.nn.Linear(32, 16)
layer3 = torch.nn.Linear(16, 1)

# 활성화 함수
sigmoid = torch.nn.Sigmoid()

# 레이어 연결
model = torch.nn.Sequential(
    layer1, sigmoid,
    layer2, sigmoid,
    layer3
)

#loss 함수 설정
loss = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [7]:
# 모델 학습
epochs = 5000
for epoch in range(epochs):
    optimizer.zero_grad()
    cost = loss(model(x_tensor), y_tensor)
    cost.backward()
    optimizer.step()
    
    if epoch % 1000 == 0:
        print(epoch, cost.item())

0 570.6197509765625
1000 130.9151611328125
2000 54.572486877441406
3000 33.389015197753906
4000 17.681982040405273


In [8]:
# 정답 도출
predict = model(test_tensor).cpu()
submit.MEDV = predict.detach()
submit.to_csv('submission.csv', index=False)