In [2]:
# 모듈 로드

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import KNNImputer

In [3]:
# 데이터 로드

df = pd.read_csv('preprocessing_merged2.csv')
df

Unnamed: 0,year,code,score,hospital_conversion,clinic_conversion,hospital_fee,clinic_fee,C0,C1,C2,...,C70,C71,C72,C73,C74,C75,C76,C77,C78,C79
0,2009,AA154,188.11,63.4,63.4,11926.174,11926.174,2009,전국,49773145,...,183,14242,49,158,11782,17,239,1292,1914,21015
1,2009,AA154,188.11,63.4,63.4,11926.174,11926.174,2009,서울특별시,10208302,...,69,4395,6,29,3313,0,25,1,0,5289
2,2009,AA154,188.11,63.4,63.4,11926.174,11926.174,2009,부산광역시,3543030,...,9,1025,13,7,958,0,16,10,5,1530
3,2009,AA154,188.11,63.4,63.4,11926.174,11926.174,2009,대구광역시,2489781,...,17,709,1,8,734,0,8,9,9,1171
4,2009,AA154,188.11,63.4,63.4,11926.174,11926.174,2009,인천광역시,2710579,...,3,676,1,12,502,0,10,23,24,980
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1137179,2023,TA006*,3496.35,79.7,92.1,278659.095,322013.835,2023,전북특별자치도,1754757,...,3,604,0,35,502,4,11,148,240,986
1137180,2023,TA006*,3496.35,79.7,92.1,278659.095,322013.835,2023,전라남도,1804217,...,6,513,0,28,356,3,19,216,327,849
1137181,2023,TA006*,3496.35,79.7,92.1,278659.095,322013.835,2023,경상북도,2554324,...,11,696,1,8,618,2,22,216,298,1095
1137182,2023,TA006*,3496.35,79.7,92.1,278659.095,322013.835,2023,경상남도,3251158,...,22,944,1,24,781,1,20,173,220,1372


In [None]:
# 입력 데이터와 타겟 데이터로 분리
X2 = df_KNN.drop(columns=['요양기관현황']).values
y2 = df_KNN['요양기관현황'].values

In [117]:
pd.DataFrame(y2).isna().sum()

0    0
dtype: int64

In [118]:
# 스케일링 
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()

# 전처리 ver1
X1_scaled = scaler_x.fit_transform(X1) 
y1_scaled = scaler_y.fit_transform(y1.reshape(-1, 1))

# 전처리 ver2
X2_scaled = scaler_x.fit_transform(X2) 
y2_scaled = scaler_y.fit_transform(y2.reshape(-1, 1))


In [119]:
# 데이터셋 -> 시퀀스 변환 함수
def create_sequences(X, y, seq_length=3):
    sequences = []
    targets = []
    for i in range(len(X) - seq_length):
        sequences.append(X[i:i+seq_length])
        targets.append(y[i+seq_length])
    return np.array(sequences), np.array(targets)

In [120]:
seq_length = 3
X1_seq, y1_seq = create_sequences(X1_scaled, y1_scaled, seq_length) # 전처리 ver1
X2_seq, y2_seq = create_sequences(X2_scaled, y2_scaled, seq_length) # 전처리 ver2

>>> Dataset 및 DataLoader 정의

In [121]:
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [122]:
# 전처리 ver1
dataset1 = TimeSeriesDataset(X1_seq, y1_seq)
dataloader1 = DataLoader(dataset1, batch_size=16, shuffle=True)


# 전처리 ver2
dataset2 = TimeSeriesDataset(X2_seq, y2_seq)
dataloader2 = DataLoader(dataset2, batch_size=16, shuffle=True)

In [123]:
X1_seq

array([[[0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        ,        nan,        nan, 0.        ,
         0.        , 0.91082803, 0.        ],
        [0.06315789, 0.04126984, 0.06790123, 0.06478873, 0.09615385,
         0.03867403, 0.05095541, 0.19860588, 0.        , 0.134498  ,
         0.04367541, 0.50530786, 0.03349673],
        [0.11052632, 0.1015873 , 0.12654321, 0.09859155, 0.16410256,
         0.0718232 , 0.08598726, 0.22334076, 0.06716236, 0.24507121,
         0.14729166, 0.54352442, 0.06862745]],

       [[0.06315789, 0.04126984, 0.06790123, 0.06478873, 0.09615385,
         0.03867403, 0.05095541, 0.19860588, 0.        , 0.134498  ,
         0.04367541, 0.50530786, 0.03349673],
        [0.11052632, 0.1015873 , 0.12654321, 0.09859155, 0.16410256,
         0.0718232 , 0.08598726, 0.22334076, 0.06716236, 0.24507121,
         0.14729166, 0.54352442, 0.06862745],
        [0.14210526, 0.14285714, 0.20061728, 0.15492958, 0.2474359 ,
         

>>> LSTM 모델

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        

    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        out = self.fc(hn[-1])
        return out

In [125]:
# 전처리 ver1

input_size1 = X1_seq.shape[2]
hidden_size1 = 64
output_size1 = 1
num_layers1 = 2

model1 = LSTMModel(input_size1, hidden_size1, output_size1, num_layers1)
criterion1 = nn.MSELoss()
optimizer1 = torch.optim.Adam(model1.parameters(), lr=0.0001)


In [126]:
print(model1)

LSTMModel(
  (lstm): LSTM(13, 64, num_layers=2, batch_first=True)
  (fc): Linear(in_features=64, out_features=1, bias=True)
)


In [127]:
# 전처리 ver2

input_size2 = X2_seq.shape[2]
hidden_size2 = 64
output_size2 = 1
num_layers2 = 2

model2 = LSTMModel(input_size2, hidden_size2, output_size2, num_layers2)
criterion2 = nn.MSELoss()
optimizer2 = torch.optim.Adam(model2.parameters(), lr=0.0001)

>>> 모델 학습


In [128]:
model1(torch.tensor([[[0.1105, 0.1016, 0.1265, 0.0986, 0.1641, 0.0718, 0.0860, 0.2233,
          0.0672, 0.2451, 0.1473, 0.5435, 0.0686],
         [0.1421, 0.1429, 0.2006, 0.1549, 0.2474, 0.1105, 0.1369, 0.2406,
          0.1366, 0.3582, 0.2050, 0.7749, 0.0997],
         [0.2000, 0.2032, 0.2562, 0.2056, 0.3013, 0.1575, 0.1783, 0.0000,
          0.1806, 0.4181, 0.2518, 0.3843, 0.1413]]], dtype=torch.float32))

tensor([[-0.0269]], grad_fn=<AddmmBackward0>)

In [None]:
# num_epochs = 100

# # 전처리 ver1
# for epoch in range(num_epochs):
#     for X_batch, y_batch in dataloader1:
#         outputs = model1(X_batch)
#         loss = criterion1(outputs, y_batch)
#         optimizer1.zero_grad()
#         loss.backward()
#         torch.nn.utils.clip_grad_norm_(model1.parameters(), max_norm=1.0)
#         optimizer1.step()

#     if (epoch+1) % 10 == 0:
#         print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: nan
Epoch [20/100], Loss: nan
Epoch [30/100], Loss: nan
Epoch [40/100], Loss: nan
Epoch [50/100], Loss: nan
Epoch [60/100], Loss: nan
Epoch [70/100], Loss: nan
Epoch [80/100], Loss: nan
Epoch [90/100], Loss: nan
Epoch [100/100], Loss: nan


In [133]:
num_epochs = 200

tolerance = 0.1  # 예측과 실제 값 사이의 허용 오차

# 에포크 반복
for epoch in range(num_epochs):
    total_loss = 0.0  # 총 손실 초기화
    total_correct = 0  # 총 정확도 초기화
    total_samples = 0  # 총 샘플 수 초기화
    
    # 배치 반복
    for X_batch1, y_batch1 in dataloader2:
        outputs1 = model2(X_batch1)  # 모델에 입력 데이터 전달
        loss = criterion2(outputs1, y_batch1)  # 손실 계산
        
        optimizer2.zero_grad()  # 기울기 초기화
        loss.backward()  # 역전파
        torch.nn.utils.clip_grad_norm_(model2.parameters(), max_norm=1.0)  # 기울기 클리핑
        optimizer2.step()  # 가중치 업데이트

        # 예측과 실제 값을 평탄화하여 비교
        predictions = outputs1.view(-1)  # 예측 값 평탄화
        targets = y_batch1.view(-1)  # 실제 값 평탄화

        # 허용 오차 내에 있는 예측 값의 개수 계산
        correct = (torch.abs(predictions - targets) < tolerance).sum().item()
        total_correct += correct  # 총 정확도에 추가
        total_samples += targets.size(0)  # 총 샘플 수 업데이트
        total_loss += loss.item()  # 총 손실 업데이트

    # 평균 손실과 정확도 출력
    avg_loss = total_loss / len(dataloader2)  # 평균 손실 계산
    accuracy = total_correct / total_samples * 100  # 정확도 계산
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}, accuracy: {accuracy:.2f}%')


Epoch [10/200], Loss: 0.0057, accuracy: 78.57%
Epoch [20/200], Loss: 0.0055, accuracy: 78.57%
Epoch [30/200], Loss: 0.0052, accuracy: 78.57%
Epoch [40/200], Loss: 0.0049, accuracy: 92.86%
Epoch [50/200], Loss: 0.0047, accuracy: 92.86%
Epoch [60/200], Loss: 0.0045, accuracy: 92.86%
Epoch [70/200], Loss: 0.0043, accuracy: 92.86%
Epoch [80/200], Loss: 0.0041, accuracy: 92.86%
Epoch [90/200], Loss: 0.0040, accuracy: 92.86%
Epoch [100/200], Loss: 0.0038, accuracy: 92.86%
Epoch [110/200], Loss: 0.0037, accuracy: 92.86%
Epoch [120/200], Loss: 0.0035, accuracy: 92.86%
Epoch [130/200], Loss: 0.0034, accuracy: 92.86%
Epoch [140/200], Loss: 0.0032, accuracy: 92.86%
Epoch [150/200], Loss: 0.0031, accuracy: 92.86%
Epoch [160/200], Loss: 0.0030, accuracy: 92.86%
Epoch [170/200], Loss: 0.0029, accuracy: 92.86%
Epoch [180/200], Loss: 0.0028, accuracy: 92.86%
Epoch [190/200], Loss: 0.0026, accuracy: 92.86%
Epoch [200/200], Loss: 0.0025, accuracy: 92.86%


In [134]:
# 예측할 연도 데이터 준비
with torch.no_grad():
    future_data = torch.tensor(X2_seq[-1:], dtype=torch.float32)
    prediction = model2(future_data)
    prediction = scaler_y.inverse_transform(prediction.numpy())

print(f"2025년 요양기관현황 예측값: {prediction[0][0]:.2f}")

2025년 요양기관현황 예측값: 101592.78
