In [1]:
# 필요한 패키지 임포트
import random
import torch
import pandas as pd
import numpy as np
import pandas_profiling

# 랜덤시드 고정
seed = 1
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [2]:
# 데이터 로드
train = pd.read_csv('../input/2022-ai-midterm-p3/train.csv')
test = pd.read_csv('../input/2022-ai-midterm-p3/test.csv')
submit = pd.read_csv('../input/2022-ai-midterm-p3/submit_sample.csv')

In [3]:
# 필요없는 column drop 및 정답 라벨 분리
y = train.drop('index', axis=1).target
test = test.drop('index', axis=1)
x = train.drop(['index', 'target'], axis=1)

In [4]:
# 심장병 여부를 0 or 1로 전처리
y = pd.Series([0 if n == 0 else 1 for n in y])

In [5]:
# ? 값 처리
x.ca = pd.Series([0 if n == '?' else int(n) for n in x.ca])
x.thal = pd.Series([3 if n == '?' else int(n) for n in x.thal])
test.ca = pd.Series([0 if n == '?' else int(n) for n in test.ca])
test.thal = pd.Series([3 if n == '?' else int(n) for n in test.thal])

In [6]:
# 데이터 정규화

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
x = sc.fit_transform(x)
test = sc.transform(test)

In [7]:
# torch tensor로 옮기기
x_tensor = torch.FloatTensor(x)
y_tensor = torch.LongTensor(y)
test_tensor = torch.FloatTensor(test)

In [8]:
# 모델 설계
layer1 = torch.nn.Linear(x_tensor.shape[1], 64)
layer2 = torch.nn.Linear(64, 2)

# 활성화 함수
sigmoid = torch.nn.Sigmoid()

# softmax 함수
softmax = torch.nn.Softmax(dim=1)

# 레이어 연결
model = torch.nn.Sequential(
    layer1, sigmoid,
    layer2, softmax
)

#loss 함수 설정
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [9]:
# 모델 학습
epochs = 5000
for epoch in range(epochs):
    optimizer.zero_grad()
    cost = loss(model(x_tensor), y_tensor)
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print(epoch, cost.item())

0 0.7015807628631592
100 0.511850118637085
200 0.46545353531837463
300 0.4521282911300659
400 0.4452599585056305
500 0.4402424097061157
600 0.4360741674900055
700 0.43235474824905396
800 0.42869314551353455
900 0.4246274530887604
1000 0.4190783202648163
1100 0.41409212350845337
1200 0.4104325473308563
1300 0.40776076912879944
1400 0.4057271480560303
1500 0.40309709310531616
1600 0.3994313180446625
1700 0.39672723412513733
1800 0.3901739716529846
1900 0.3879695236682892
2000 0.3869483470916748
2100 0.38628533482551575
2200 0.38580963015556335
2300 0.38545021414756775
2400 0.3851690888404846
2500 0.38494327664375305
2600 0.38475704193115234
2700 0.38459667563438416
2800 0.38440829515457153
2900 0.3810882270336151
3000 0.37898531556129456
3100 0.3732011914253235
3200 0.372569739818573
3300 0.37227097153663635
3400 0.37207913398742676
3500 0.37194061279296875
3600 0.37183383107185364
3700 0.37174832820892334
3800 0.3716779053211212
3900 0.37161877751350403
4000 0.37156856060028076
4100 0.3

In [10]:
# 정답 도출
predict = model(test_tensor).cpu().argmax(dim=1)
submit.target = predict.detach()
submit.to_csv('submission.csv', index=False)