In [5]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.decomposition import PCA

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from google.colab import drive

In [9]:
drive.mount('/content/drive')

Mounted at /content/drive


#### Data 불러오기

In [3]:
df = pd.read_csv("/content/drive/MyDrive/train/data_mart/data_merge.csv")
df_add = pd.read_csv("/content/drive/MyDrive/train/data_mart/data_merge_add.csv")

df_add.head()

Unnamed: 0,timestamp,x,y,z,userId,lat,lon,accuracy,hr
0,2020-08-31 00:00:00,-0.000904,4.448405,8.366823,user07,37.276019,127.115323,44.890444,
1,2020-08-31 00:00:00,-0.000904,4.448405,8.366823,user07,37.276078,127.11555,37.783222,
2,2020-08-31 00:00:00,0.025839,2.306513,8.952953,user07,37.276019,127.115323,44.890444,
3,2020-08-31 00:00:00,0.025839,2.306513,8.952953,user07,37.276078,127.11555,37.783222,
4,2020-08-31 00:02:00,0.090913,0.010326,9.815679,user07,37.276159,127.115936,42.482,


In [4]:
df_add = df_add.fillna(0)

In [5]:
df_add.isna().sum()

timestamp    0
x            0
y            0
z            0
userId       0
lat          0
lon          0
accuracy     0
hr           0
dtype: int64

In [6]:
df = pd.concat(objs=[df, df_add], axis=0)
df.head()

Unnamed: 0,timestamp,x,y,z,userId,lat,lon,accuracy,hr,activity
0,2020-08-30 03:59:00,-0.075511,0.217579,9.747272,user01,37.544928,127.05441,34.94725,0.0,3.0
1,2020-08-30 03:59:00,-0.075511,0.217579,9.747272,user01,37.544849,127.054274,61.097727,0.0,3.0
2,2020-08-30 03:59:00,-0.075511,0.217579,9.747272,user01,37.544954,127.054375,39.202667,0.0,3.0
3,2020-08-30 03:59:00,-0.062778,0.186066,9.749598,user01,37.544928,127.05441,34.94725,0.0,3.0
4,2020-08-30 03:59:00,-0.062778,0.186066,9.749598,user01,37.544849,127.054274,61.097727,0.0,3.0


In [7]:
df.isna().sum()

timestamp          0
x                  0
y                  0
z                  0
userId             0
lat                0
lon                0
accuracy           0
hr                 0
activity     2028416
dtype: int64

In [8]:
# 표준화할 열들 선택
columns_to_standardize = ['x', 'y', 'z', 'lat', 'lon', 'hr', 'activity']

# StandardScaler 객체 생성
scaler = StandardScaler()

df[columns_to_standardize] = scaler.fit_transform(df[columns_to_standardize])

df.head()

Unnamed: 0,timestamp,x,y,z,userId,lat,lon,accuracy,hr,activity
0,2020-08-30 03:59:00,-0.149766,-0.153538,0.729051,user01,0.185912,0.168258,34.94725,-2.441332,0.137235
1,2020-08-30 03:59:00,-0.149766,-0.153538,0.729051,user01,0.185899,0.168252,61.097727,-2.441332,0.137235
2,2020-08-30 03:59:00,-0.149766,-0.153538,0.729051,user01,0.185916,0.168257,39.202667,-2.441332,0.137235
3,2020-08-30 03:59:00,-0.145683,-0.161485,0.729498,user01,0.185912,0.168258,34.94725,-2.441332,0.137235
4,2020-08-30 03:59:00,-0.145683,-0.161485,0.729498,user01,0.185899,0.168252,61.097727,-2.441332,0.137235


In [9]:
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['date'] = df['timestamp'].dt.date

df['date'] = pd.to_datetime(df['date'])

#### Sleep, Survey Feature들 합치기

In [10]:
sleep_data = "/content/drive/MyDrive/train/dataset/user_sleep_2020.csv"
survey_data = "/content/drive/MyDrive/train/dataset/user_survey_2020.csv"

sleep_df = pd.read_csv(sleep_data)
survey_df = pd.read_csv(survey_data)

In [11]:
# AM,PM 시계열 상으로 같은 날짜 합치기
survey_df = survey_df.groupby(['userId', 'date']).agg({
    'startInput': 'first',  # 시작 입력값은 AM의 값을 사용
    'endInput': 'last',  # 종료 입력값은 PM의 값을 사용
    'sleep': 'max',  # 수면 시간은 최대값을 사용 (일반적으로 AM에 기록)
    'sleepProblem': 'max',  # 수면 문제 역시 최대값을 사용
    'dream': 'max',
    'amCondition': 'max',
    'amEmotion': 'max',
    'pmEmotion': 'max',
    'pmStress': 'max',
    'pmFatigue': 'max',
    'caffeine': 'last',  # 카페인 섭취는 PM에 기록된 값을 사용
    'cAmount(ml)': 'sum',  # 카페인 섭취량은 합계를 사용
    'alcohol': 'last',
    'aAmount(ml)': 'sum'
}).reset_index()

# 결측치 처리하기
survey_df.fillna({
    'sleep': 0,
    'sleepProblem': 0,
    'dream': 0,
    'amCondition': 0,
    'amEmotion': 0,
    'pmEmotion': 0,
    'pmStress': 0,
    'pmFatigue': 0,
    'cAmount(ml)': 0,
    'aAmount(ml)': 0,
    'caffeine' : 0,
    'alcohol': 0  # 'alcohol' 컬럼에 대해서는 'not specific'으로 채움
}, inplace=True)

caffeine_map = {'not specific': 0, 'tea': 1, 'coke': 1, 'caffeinated drink': 1, 'coffee': 1, 0 : 0}
alcohol_map = {
    'not specified': 0, 'soju&beer': 1, 'soju': 1,
    'beer': 1, 'beer&rice wine' : 1, 'wine': 1, 'rice wine': 1,
    'cognac': 1, 'wine&beer': 1, 'kaoliang': 1, 0 : 0
}

# 매핑 적용
survey_df['caffeine'] = survey_df['caffeine'].map(caffeine_map)
survey_df['alcohol'] = survey_df['alcohol'].map(alcohol_map)


sleep_df.drop(columns='timezone', axis=1, inplace=True)

sleep_df.head()

Unnamed: 0,userId,date,startDt,endDt,lastUpdate,wakeupduration,lightsleepduration,deepsleepduration,wakeupcount,durationtosleep,...,hr_average,hr_min,hr_max,rr_average,rr_min,rr_max,breathing_disturbances_intensity,snoring,snoringepisodecount,sleep_score
0,user24,2020-08-30,1598708760,1598740860,1598748252,3840,13800,9000,3,1740,...,72,63,82,15,10,20,10,840,1,86
1,user24,2020-08-31,1598800140,1598825940,1598833333,4740,9540,6360,1,3180,...,69,60,82,15,11,20,8,2700,3,52
2,user24,2020-09-01,1598885940,1598908800,1598916190,1920,8760,4740,1,1440,...,70,58,94,15,12,23,8,0,0,61
3,user24,2020-09-02,1598979420,1598999580,1599006969,2760,7740,7440,1,1380,...,68,60,80,15,12,19,-1,240,1,40
4,user24,2020-09-03,1599060780,1599085980,1599093370,3900,11640,7620,1,1680,...,71,61,83,15,11,25,7,300,1,55


In [12]:
data_all = pd.merge(survey_df, sleep_df, on = ["date", "userId"], how='inner')

In [13]:
data_rf_features = data_all[['pmEmotion', 'pmStress', 'userId', 'lightsleepduration', 'wakeupduration', 'deepsleepduration', 'durationtosleep', 'remsleepduration', 'hr_max', 'hr_average', 'sleep_score' ,'date']]

In [14]:
data_rf_features['date']= pd.to_datetime(data_rf_features['date'])


# 스케일러 초기화
scaler = StandardScaler()

# 스케일링할 열 선택 (datetime 열과 target 열을 제외한 수치형 열)
features_to_scale = data_rf_features.drop(columns=['date', 'userId']).columns

# 데이터프레임을 수치형 데이터와 날짜 데이터로 분리
numeric_data = data_rf_features[features_to_scale]
date_data = data_rf_features['date']
user_id_data = data_rf_features['userId']

# 스케일링 적용
scaled_features = scaler.fit_transform(numeric_data)

# 스케일링된 데이터를 데이터프레임으로 변환
data_features = pd.DataFrame(scaled_features, columns=features_to_scale)

# 날짜 데이터와 결합
data_features['date'] = date_data
data_features['userId'] = user_id_data

# 열 순서 복원
data_features = data_features[['date', 'userId'] + list(features_to_scale)]

data_features.head()

Unnamed: 0,date,userId,pmEmotion,pmStress,lightsleepduration,wakeupduration,deepsleepduration,durationtosleep,remsleepduration,hr_max,hr_average,sleep_score
0,2020-08-31,user01,0.443799,-0.016037,0.331114,0.040274,0.274698,0.039552,0.952865,0.797754,0.958187,1.217948
1,2020-09-01,user01,0.443799,-0.016037,-0.061009,-0.821648,0.529081,-0.468321,0.83184,0.663944,1.546426,1.178819
2,2020-09-02,user01,0.443799,-0.016037,-0.695324,-0.205989,0.592676,-0.141831,0.872182,0.931565,1.546426,0.044088
3,2020-09-03,user01,-1.76943,-0.855799,-0.118674,-0.649263,1.695002,-0.250661,0.448593,0.396322,1.399367,1.178819
4,2020-09-04,user01,0.443799,-0.855799,-0.268603,-0.772395,1.016648,-0.395767,-0.035509,1.065376,1.399367,0.317988


##### Feature들과 data 합치기

In [15]:
df_features = pd.merge(df, data_features, on=['date', 'userId'], how='inner')

df_features.head()

Unnamed: 0,timestamp,x,y,z,userId,lat,lon,accuracy,hr,activity,...,pmEmotion,pmStress,lightsleepduration,wakeupduration,deepsleepduration,durationtosleep,remsleepduration,hr_max,hr_average,sleep_score
0,2020-08-31 00:10:00,-0.102148,-0.226125,0.720885,user01,0.175609,0.163487,12.38,-2.441332,-3.720634,...,0.443799,-0.016037,0.331114,0.040274,0.274698,0.039552,0.952865,0.797754,0.958187,1.217948
1,2020-08-31 00:11:00,-0.101582,-0.226406,0.721456,user01,0.175611,0.163488,13.13925,-2.441332,0.137235,...,0.443799,-0.016037,0.331114,0.040274,0.274698,0.039552,0.952865,0.797754,0.958187,1.217948
2,2020-08-31 00:12:00,-0.102154,-0.226862,0.722177,user01,0.175611,0.163488,12.992,-2.441332,0.137235,...,0.443799,-0.016037,0.331114,0.040274,0.274698,0.039552,0.952865,0.797754,0.958187,1.217948
3,2020-08-31 00:12:00,-0.102154,-0.226862,0.722177,user01,0.17561,0.163488,12.7495,-2.441332,0.137235,...,0.443799,-0.016037,0.331114,0.040274,0.274698,0.039552,0.952865,0.797754,0.958187,1.217948
4,2020-08-31 00:12:00,-0.101931,-0.226364,0.720646,user01,0.175611,0.163488,12.992,-2.441332,0.137235,...,0.443799,-0.016037,0.331114,0.040274,0.274698,0.039552,0.952865,0.797754,0.958187,1.217948


#### train_label과 train_user 데이터 합성

In [16]:
# train_label.csv 파일 불러오기
train_label_path = "/content/drive/MyDrive/train/dataset/train_label.csv"

train_labels = pd.read_csv(train_label_path)
train_labels.drop(columns="Unnamed: 0", axis=1, inplace=True)

In [17]:
train_labels['date'] = pd.to_datetime(train_labels['date'])
train_labels.rename(columns={"subject_id" : 'userId'}, inplace=True)

In [18]:
df_label = pd.merge(df_features, train_labels, on=['userId', 'date'], how='inner')

df_label.head()

Unnamed: 0,timestamp,x,y,z,userId,lat,lon,accuracy,hr,activity,...,hr_max,hr_average,sleep_score,Q1,Q2,Q3,S1,S2,S3,S4
0,2020-08-31 00:10:00,-0.102148,-0.226125,0.720885,user01,0.175609,0.163487,12.38,-2.441332,-3.720634,...,0.797754,0.958187,1.217948,0,0,0,0,1,1,1
1,2020-08-31 00:11:00,-0.101582,-0.226406,0.721456,user01,0.175611,0.163488,13.13925,-2.441332,0.137235,...,0.797754,0.958187,1.217948,0,0,0,0,1,1,1
2,2020-08-31 00:12:00,-0.102154,-0.226862,0.722177,user01,0.175611,0.163488,12.992,-2.441332,0.137235,...,0.797754,0.958187,1.217948,0,0,0,0,1,1,1
3,2020-08-31 00:12:00,-0.102154,-0.226862,0.722177,user01,0.17561,0.163488,12.7495,-2.441332,0.137235,...,0.797754,0.958187,1.217948,0,0,0,0,1,1,1
4,2020-08-31 00:12:00,-0.101931,-0.226364,0.720646,user01,0.175611,0.163488,12.992,-2.441332,0.137235,...,0.797754,0.958187,1.217948,0,0,0,0,1,1,1


In [19]:
df_label.isna().sum()

timestamp                   0
x                           0
y                           0
z                           0
userId                      0
lat                         0
lon                         0
accuracy                    0
hr                          0
activity              1748562
date                        0
pmEmotion                   0
pmStress                    0
lightsleepduration          0
wakeupduration              0
deepsleepduration           0
durationtosleep             0
remsleepduration            0
hr_max                      0
hr_average                  0
sleep_score                 0
Q1                          0
Q2                          0
Q3                          0
S1                          0
S2                          0
S3                          0
S4                          0
dtype: int64

In [20]:
df_label.fillna(0, inplace=True)

In [21]:
df_label['userId'] = df_label['userId'].str.replace('user', '').astype(int)

##### Feature Data 나눠 놓기

In [22]:
label_cols = ["userId", "date", "Q1", "Q2", "Q3", "S1", "S2", "S3", "S4"]

label_data = df_label[label_cols].drop_duplicates(subset=['userId','date']).set_index(['userId','date'])
dates = df_label['date'].values
users_id = df_label['userId'].values

#### Sliding Window 나누기

In [23]:
# 슬라이딩 윈도우 함수 정의
def sliding_windows(X, dates, user_ids, label_data, window_size, step_size):
    X_windows = []
    y_windows = []
    for start in range(0, len(X) - window_size + 1, step_size):
        end = start + window_size
        X_windows.append(X[start:end])
        date_for_window = dates[start]
        user_id_for_window = user_ids[start]
        y_windows.append(label_data.loc[(date_for_window, user_id_for_window)].values)  # 날짜 및 사용자 라벨 사용
    return np.array(X_windows), np.array(y_windows)


In [24]:
# 슬라이딩 윈도우 설정 (60분)
window_size = 10  # 60분
step_size = 1   # step 1
X_data, y_label = sliding_windows(df_label[["x", "y","z", "lat", "lon","hr","activity"]], users_id, dates, label_data ,window_size, step_size)

In [25]:
X_data = np.array(X_data)
y_labels = np.array(y_label)

#### Tensor size 맞추기

In [26]:
class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x = self.data[idx]
        y = self.labels[idx]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

In [27]:
# 텐서로 변환
X_data_tensor = torch.tensor(X_data, dtype=torch.float32)
y_tensor = torch.tensor(y_labels, dtype=torch.float32)

In [28]:
print(f"Shape of X_acc_tensor: {X_data_tensor.shape}")
print(f"Shape of y_tensor: {y_tensor.shape}")

Shape of X_acc_tensor: torch.Size([2576388, 10, 7])
Shape of y_tensor: torch.Size([2576388, 7])


In [29]:
# 데이터셋 및 데이터로더 준비
train_dataset = CustomDataset(X_data_tensor, y_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

#### LSTM Model

In [6]:
import torch.nn.functional as F

class ResidualGRUBlock(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(ResidualGRUBlock, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.residual = nn.Linear(input_size, hidden_size * 2)
        self.layer_norm = nn.LayerNorm(hidden_size * 2)

    def forward(self, x):
        residual = self.residual(x)
        out, _ = self.gru(x)
        out = self.layer_norm(out + residual)
        return out

class MultiLabelGRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(MultiLabelGRU, self).__init__()
        self.conv1 = nn.Conv1d(input_size, hidden_size, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.conv2 = nn.Conv1d(hidden_size, hidden_size, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(hidden_size)
        self.residual_block = ResidualGRUBlock(hidden_size, hidden_size, num_layers)
        self.fc = nn.Linear(hidden_size * 2, num_classes)

    def forward(self, x):
        x = x.transpose(1, 2)  # (batch_size, input_size, seq_length)로 변환
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = x.transpose(1, 2)  # 다시 (batch_size, seq_length, hidden_size)로 변환
        x = self.residual_block(x)
        out = x[:, -1, :]
        out = self.fc(out)
        return out


In [31]:
# 하이퍼파라미터 설정
input_size = X_data_tensor.shape[2]
hidden_size = 64
num_layers = 2
num_classes = y_tensor.shape[1]
dropout_prob = 0.2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MultiLabelGRU(input_size, hidden_size, num_layers, num_classes).to(device)

print(device)

cuda


In [7]:
# 하이퍼파라미터 설정
input_size = 10
hidden_size = 64
num_layers = 2
num_classes = 7
dropout_prob = 0.2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MultiLabelGRU(input_size, hidden_size, num_layers, num_classes).to(device)

print(device)

cuda


In [32]:
# 에폭별 손실 값 및 F1-Score를 기록할 리스트
train_losses = []
train_f1_scores = []

# 모델 평가 함수 수정
def evaluate_model_on_train(model, dataloader):
    model.eval()
    all_targets = []
    all_predictions = []

    with torch.no_grad():
        for inputs, targets in dataloader:
            inputs = inputs.to(device)
            targets = targets.to(device)
            outputs = model(inputs)
            all_targets.append(targets.cpu().numpy())
            all_predictions.append(torch.sigmoid(outputs).cpu().numpy())

    all_targets = np.vstack(all_targets)
    all_predictions = np.vstack(all_predictions)

    # 확률을 이진화하여 F1-score 계산
    f1 = f1_score(all_targets, (all_predictions > 0.5).astype(int), average='macro')

    return f1

In [None]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 학습 루프
num_epochs = 30

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for inputs, targets in train_loader:
        inputs = inputs.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)

    train_losses.append(epoch_loss)

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss : [{epoch_loss:.4f}]')

    # 모델 저장
    torch.save(model.state_dict(), f'/content/drive/MyDrive/train/gru_residual_model/model_epoch_{epoch+1}.pt')

Epoch [1/30], Loss : [0.5674]
Epoch [2/30], Loss : [0.5114]
Epoch [3/30], Loss : [0.4933]
Epoch [4/30], Loss : [0.4827]
Epoch [5/30], Loss : [0.4741]
Epoch [6/30], Loss : [0.4675]
Epoch [7/30], Loss : [0.4621]
Epoch [8/30], Loss : [0.4570]
Epoch [9/30], Loss : [0.4529]
Epoch [10/30], Loss : [0.4494]
Epoch [11/30], Loss : [0.4449]
Epoch [12/30], Loss : [0.4409]
Epoch [13/30], Loss : [0.4383]
Epoch [14/30], Loss : [0.4357]
Epoch [15/30], Loss : [0.4324]
Epoch [16/30], Loss : [0.4296]
Epoch [17/30], Loss : [0.4272]
Epoch [18/30], Loss : [0.4255]
Epoch [19/30], Loss : [0.4233]
Epoch [20/30], Loss : [0.4214]
Epoch [21/30], Loss : [0.4198]
Epoch [22/30], Loss : [0.4186]
Epoch [23/30], Loss : [0.4169]
Epoch [24/30], Loss : [0.4156]
Epoch [25/30], Loss : [0.4149]
Epoch [26/30], Loss : [0.4136]
Epoch [27/30], Loss : [0.4131]
Epoch [28/30], Loss : [0.4119]
Epoch [29/30], Loss : [0.4111]


In [10]:
final_f1_scores = []
num_epochs = 30
model_file_path = "/content/drive/MyDrive/train/gru_residual_model"

for epoch in range(num_epochs):
    model.load_state_dict(torch.load(os.path.join(model_file_path, f'model_epoch_{epoch+1}.pt')))
    f1 = evaluate_model_on_train(model, train_loader)
    final_f1_scores.append(f1)
    print(f'F1-Score for epoch {epoch+1}: {f1:.4f}')

RuntimeError: Error(s) in loading state_dict for MultiLabelGRU:
	size mismatch for conv1.weight: copying a param with shape torch.Size([64, 7, 3]) from checkpoint, the shape in current model is torch.Size([64, 10, 3]).

In [None]:
epochs = range(1, num_epochs + 1)
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(epochs, train_losses, label='Train Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Train Loss')

plt.subplot(1, 2, 2)
plt.plot(epochs, train_f1_scores, label='Train F1-Score')
plt.xlabel('Epochs')
plt.ylabel('F1-Score')
plt.legend()
plt.title('Train F1-Score')

plt.show()

### 예측된 데이터
-> 마지막 예측값을 바탕으로 분류모델을 예측하는 코드
model.eval()
예측 값을 저기에다가 넣으면 됨.
date값 또한 sliding_window를 할때 미리 만들어 놓음

In [None]:
def aggregate_predictions_by_date(predictions, dates, user_ids, window_size, step_size):
    date_user_windows = {}

    # 슬라이딩 윈도우 적용
    for i, (date, user_id) in enumerate(zip(dates, user_ids)):
        key = (date, user_id)
        if key not in date_user_windows:
            date_user_windows[key] = []

        start_idx = i * step_size
        end_idx = start_idx + window_size

        if end_idx <= len(predictions):
            date_user_windows[key].append(predictions[start_idx:end_idx])

    # 예측 값 집계 (마지막 값 사용)
    aggregated_predictions = {}
    for key, windows in date_user_windows.items():
        # 각 윈도우의 마지막 값을 사용하여 집계
        last_values = np.array([window[-1] for window in windows])
        # 최종 결과 (날짜별 단일 레코드로 변환)
        aggregated_predictions[key] = last_values[-1] if last_values.size > 0 else np.zeros(predictions.shape[1])

    return aggregated_predictions


def predictions_to_dataframe(aggregated_predictions):
    records = []
    for (date, user_id), value in aggregated_predictions.items():
        record = {
            'date': date,
            'user_id': user_id,
            'Q1': value[0],
            'Q2': value[1],
            'Q3': value[2],
            'S1': value[3],
            'S2': value[4],
            'S3': value[5],
            'S4': value[6],
        }
        records.append(record)
    df = pd.DataFrame(records)
    return df