In [22]:
import torch
import torch.nn as nn
import torch.optim as optim

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score

In [23]:
# 데이터 로딩
train_data = pd.read_csv('/Users/lhe339/Documents/GitHub/nextorial/data/데이터 분석가_과제_Type A/match_data.csv')
test_data = pd.read_csv('/Users/lhe339/Documents/GitHub/nextorial/data/데이터 분석가_과제_Type A/test_data.csv')

In [13]:
# 데이터 전처리
def preprocess_data(data):
    data = data.drop(columns=['createdatekst', 'matchid', 'accountid', 'guildid'])  # 불필요한 열 삭제
    data['tier'] = LabelEncoder().fit_transform(data['tier'])  # 순서형 레이블 인코딩
    data = data.fillna(0)  # 결측치 처리
    return data

train_data = preprocess_data(train_data)
test_data = preprocess_data(test_data)

# 훈련 및 테스트 데이터 분리
X = train_data.drop(columns='matchresult')
y = train_data['matchresult']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# 특성 스케일링
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

In [8]:
# DNN 모델 정의
class DNN(nn.Module):
    def __init__(self, input_dim):
        super(DNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

In [14]:
# 모델 초기화
input_dim = X_train.shape[1]
model = DNN(input_dim)

In [15]:
# 손실 함수 및 최적화기 설정
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [18]:
# 모델 훈련
epochs = 10
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    
    inputs = torch.tensor(X_train, dtype=torch.float32)
    targets = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
    
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    
    loss.backward()
    optimizer.step()
    
    # 정확도 계산
    binary_outputs = (outputs > 0.5).float()
    accuracy = accuracy_score(targets.numpy(), binary_outputs.numpy())
    
    if (epoch + 1) % 1 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy * 100:.2f}%')

# 모델 검증
model.eval()
with torch.no_grad():
    inputs = torch.tensor(X_val, dtype=torch.float32)
    targets = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1)
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    
    # 정확도 계산
    binary_outputs = (outputs > 0.5).float()
    accuracy = accuracy_score(targets.numpy(), binary_outputs.numpy())
    print(f'Validation Loss: {loss.item():.4f}, Validation Accuracy: {accuracy * 100:.2f}%')

Epoch [1/10], Loss: 0.6814, Accuracy: 55.62%
Epoch [2/10], Loss: 0.6807, Accuracy: 55.78%
Epoch [3/10], Loss: 0.6800, Accuracy: 55.90%
Epoch [4/10], Loss: 0.6795, Accuracy: 56.00%
Epoch [5/10], Loss: 0.6792, Accuracy: 56.04%
Epoch [6/10], Loss: 0.6789, Accuracy: 56.11%
Epoch [7/10], Loss: 0.6787, Accuracy: 56.19%
Epoch [8/10], Loss: 0.6786, Accuracy: 56.23%
Epoch [9/10], Loss: 0.6785, Accuracy: 56.25%
Epoch [10/10], Loss: 0.6784, Accuracy: 56.25%
Validation Loss: 0.6797, Validation Accuracy: 56.10%


In [21]:
test_data

Unnamed: 0,teamid,mmr,winstreak,losestreak,recentwinprob,accumatches,tier,itemid
0,1,1952,0,1,0.4,157,1,1277
1,1,1976,3,0,0.4,307,1,1415
2,1,1952,0,1,0.5,34,1,1178
3,1,1809,0,1,0.5,65,2,1170
4,2,1988,0,1,0.5,8,1,1401
...,...,...,...,...,...,...,...,...
140483,1,2025,0,3,0.5,40,4,1365
140484,2,1811,1,0,0.4,2535,4,1398
140485,2,2384,0,2,0.4,241,1,1401
140486,2,2032,0,1,0.5,265,4,1415


In [24]:
# 데이터 전처리 - 예를 들어, 필요 없는 열 제거
# 'matchresult' 열이 없기 때문에 이전과 동일한 전처리 단계를 수행할 수 있습니다.
X_test = test_data.drop(columns=['createdatekst', 'matchid', 'accountid', 'guildid', 'tier'])

# 모델을 평가 모드로 설정
model.eval()

# 테스트 데이터를 텐서로 변환
inputs = torch.tensor(X_test.values, dtype=torch.float32)

# 그래디언트 계산을 중지
with torch.no_grad():
    # 모델을 실행하여 예측 수행
    outputs = model(inputs)

# 이진 분류의 경우 출력을 이진 레이블로 변환
binary_outputs = (outputs > 0.5).float()

# 예측 결과를 데이터 프레임으로 변환
predictions = pd.DataFrame(binary_outputs.numpy(), columns=['predicted_matchresult'])

predictions.head()

# # 예측 결과를 CSV 파일로 저장
# predictions.to_csv('test_predictions.csv', index=False)


RuntimeError: mat1 and mat2 shapes cannot be multiplied (140488x7 and 11x128)