## 캐글 자전거 수요 예측 : https://www.kaggle.com/competitions/bike-sharing-demand/overview
### 목표 : 전처리 방법 변경 및 모델을 Tensorflow 딥러닝 모델로 변경하여 제출 후 스코어 0.8 이하 도달하기

### baseline

In [None]:
# 필요한 라이브러리 임포트
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler

# 사용할 특성 선택
features = ['season', 'holiday', 'workingday', 'weather', 'temp', 
            'atemp', 'humidity', 'windspeed', 'year', 'month', 
            'day', 'hour', 'dayofweek']

# 데이터 전처리 및 정규화 함수 정의
def preprocess_and_scale(train_data, test_data, features):
    for dataset in [train_data, test_data]:
        dataset['datetime'] = pd.to_datetime(dataset['datetime'])
        dataset['year'] = dataset['datetime'].dt.year
        dataset['month'] = dataset['datetime'].dt.month
        dataset['day'] = dataset['datetime'].dt.day
        dataset['hour'] = dataset['datetime'].dt.hour
        dataset['dayofweek'] = dataset['datetime'].dt.dayofweek
    
    # 데이터 정규화
    scaler = StandardScaler()
    X_train = scaler.fit_transform(train_data[features])
    X_test = scaler.transform(test_data[features])

    return X_train, X_test, scaler

# RMSLE 손실 함수 정의
class RMSLELoss(nn.Module):
    def __init__(self):
        super(RMSLELoss, self).__init__()
    
    def forward(self, y_pred, y_true):
        return torch.sqrt(torch.mean((torch.log1p(y_pred) - torch.log1p(y_true)) ** 2))

# 데이터 불러오기
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
y = train['count'].values

# 데이터 전처리 및 정규화
X_train, X_test, scaler = preprocess_and_scale(train, test, features)

# 학습/검증 데이터 분리
X_train, X_val, y_train, y_val = train_test_split(X_train, y, test_size=0.2, random_state=42)

# 텐서 변환 함수 정의
def convert_to_tensor(X, y=None):
    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.float32).view(-1, 1) if y is not None else None
    return X_tensor, y_tensor

# Transformer 모델 학습을 위한 시퀀스 데이터 생성
sequence_length = 5
def create_sequences(data, sequence_length):
    sequences = []
    for i in range(len(data) - sequence_length + 1):
        sequences.append(data[i:i + sequence_length])
    return np.array(sequences)

# 시퀀스 데이터 생성
X_train_seq = create_sequences(X_train, sequence_length)
y_train_seq = y_train[sequence_length - 1:]
X_val_seq = create_sequences(X_val, sequence_length)
y_val_seq = y_val[sequence_length - 1:]
X_test_seq = create_sequences(X_test, sequence_length)

# 텐서 변환 및 DataLoader 설정
X_train_seq_tensor, y_train_seq_tensor = convert_to_tensor(X_train_seq, y_train_seq)
X_val_seq_tensor, y_val_seq_tensor = convert_to_tensor(X_val_seq, y_val_seq)
train_dataset = TensorDataset(X_train_seq_tensor, y_train_seq_tensor)
val_dataset = TensorDataset(X_val_seq_tensor, y_val_seq_tensor)
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False)

# Transformer 모델 정의
class BikeDemandTransformer(nn.Module):
    def __init__(self, input_size, d_model=32, nhead=2, num_layers=2, dim_feedforward=64):
        super(BikeDemandTransformer, self).__init__()
        self.embedding = nn.Linear(input_size, d_model)  # 입력 차원 -> 임베딩 차원
        self.transformer = nn.Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=num_layers, dim_feedforward=dim_feedforward)
        self.fc = nn.Linear(d_model, 1)  # 출력층
        
    def forward(self, x):
        x = self.embedding(x)  # 임베딩 적용
        x = x.permute(1, 0, 2)  # Transformer 입력 형식에 맞게 변환 (sequence, batch, features)
        x = self.transformer(x, x)  # Self-Attention 적용
        x = x[-1, :, :]  # 마지막 시퀀스 출력 사용
        x = self.fc(x)
        return torch.relu(x)

# 모델 초기화
input_size = X_train_seq.shape[2]
transformer_model = BikeDemandTransformer(input_size=input_size)
criterion = RMSLELoss()
optimizer = optim.Adam(transformer_model.parameters(), lr=0.0005)

# 학습 및 검증 함수
def train_model(model, train_loader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(train_loader)

def evaluate_model(model, val_loader, criterion):
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            outputs = model(X_batch)
            outputs = torch.round(outputs).clamp(min=0)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item()
    return val_loss / len(val_loader)

# 모델 학습 및 조기 종료 설정
def train_with_early_stopping(model, train_loader, val_loader, criterion, optimizer, n_epochs=1000, model_path="best_transformer_model.pth", patience=50):
    best_val_loss = float('inf')
    epochs_no_improve = 0
    for epoch in range(n_epochs):
        train_loss = train_model(model, train_loader, criterion, optimizer)
        val_loss = evaluate_model(model, val_loader, criterion)
        print(f'Epoch {epoch+1}/{n_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
        
        # 조기 종료
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), model_path)
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print("조기 종료: 검증 손실이 개선되지 않음")
                break
    model.load_state_dict(torch.load(model_path))
    return model

# Transformer 모델 학습
transformer_model = train_with_early_stopping(transformer_model, train_loader, val_loader, criterion, optimizer, model_path="best_transformer_model.pth")

# 검증 데이터 성능 평가
transformer_model.eval()
with torch.no_grad():
    val_pred = transformer_model(X_val_seq_tensor).numpy().flatten()
    rmse = np.sqrt(mean_squared_error(y_val_seq, val_pred))
    mae = mean_absolute_error(y_val_seq, val_pred)
    print('검증 데이터 RMSE:', rmse)
    print('검증 데이터 MAE:', mae)

# 테스트 데이터 예측
X_test_seq_tensor, _ = convert_to_tensor(X_test_seq)
with torch.no_grad():
    test_pred = transformer_model(X_test_seq_tensor).numpy().flatten()
test_pred = np.round(test_pred).astype(int).clip(min=0)

# 제출 파일 생성
submission = pd.DataFrame({
    'datetime': test['datetime'][sequence_length - 1:].reset_index(drop=True),  # 시계열 길이에 맞춰 조정
    'count': test_pred
})
submission.to_csv('submission.csv', index=False)

print('제출 파일이 생성되었습니다.')
