In [63]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

# Classifier Libraries
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier

# Other Libraries
from sklearn.metrics import precision_score, recall_score, roc_curve, roc_auc_score, f1_score, accuracy_score
from sklearn.model_selection import train_test_split, cross_val_predict

### Data preprocessing

In [64]:
df_label = pd.read_csv(r'C:\Users\user\Desktop\work\AI_study\korea_univ_std\ETRI_human\test_code\test_data\train_label.csv')
df_user_info = pd.read_csv(r'C:\Users\user\Desktop\work\AI_study\korea_univ_std\ETRI_human\test_code\test_data\user_info_2020.csv')
df_user_sleep = pd.read_csv(r'C:\Users\user\Desktop\work\AI_study\korea_univ_std\ETRI_human\test_code\test_data\user_sleep_2020.csv')


In [65]:
df_label.head()

Unnamed: 0.1,Unnamed: 0,subject_id,date,Q1,Q2,Q3,S1,S2,S3,S4
0,0,user01,2020-08-30,1,0,0,1,1,0,0
1,1,user01,2020-08-31,0,0,0,0,1,1,1
2,2,user01,2020-09-01,0,0,0,0,1,1,1
3,3,user01,2020-09-02,1,0,0,1,1,1,1
4,4,user01,2020-09-03,1,0,0,0,1,1,1


In [66]:
'''
1. subject_id - 사용자 식별자

2. age - 사용자 나이

3. gender - 사용자 성별

4. wakeupduration - 기상 시간

5. lightsleepduration - 얕은 수면 시간

6. deepsleepduration - 깊은 수면 시간

7. remsleepduration - REM 수면 시간

8. hr_average - 평균 심박수

9. amCondition - 오전 컨디션

10. pmStress - 오후 스트레스

11. pmFatigue - 오후 피로

12. date - 날짜
'''

col_list = ['subject_id','gender','wakeupduration','lightsleepduration','deepsleepduration','remsleepduration',
            'hr_average','amCondition','pmStress','pmFatigue','date']

In [69]:
'''
- label 데이터와 sleep 데이터에서 사용자별 기록된 날짜가 다름
- 공통으로 기록된 날짜만 산출
'''

combined_results = pd.DataFrame()

# 사용자 ID 설정 (user01 ~ user30)
user_ids = [f'user{i:02d}' for i in range(1, 31)]

for user_id in user_ids:
    # df_label에서 사용자의 'date' 데이터 선택
    user_dates = df_label[df_label['subject_id'] == user_id]['date']
    
    # df_user_sleep에서 해당 사용자의 데이터 선택
    user_data = df_user_sleep[df_user_sleep['userId'] == user_id]
    
    # 각 사용자별 label 데이터 와 sleep 데이터의 공통 날짜 산출
    common_dates = np.intersect1d(user_data['date'], user_dates)
    
    # 공통 날짜를 가진 데이터만 필터링
    filtered_data = user_data[user_data['date'].isin(common_dates)]
    
    # 행 방향으로 모든 결과를 합침
    combined_results = pd.concat([combined_results, filtered_data], axis=0)

# 결과 출력
print(combined_results)

     userId    timezone        date     startDt       endDt  lastUpdate  \
351  user01  Asia/Seoul  2020-08-31  1598802240  1598830980  1598838373   
352  user01  Asia/Seoul  2020-09-01  1598897280  1598922060  1598923633   
353  user01  Asia/Seoul  2020-09-02  1598988060  1599012000  1599012074   
354  user01  Asia/Seoul  2020-09-03  1599071220  1599098460  1599098830   
355  user01  Asia/Seoul  2020-09-04  1599162960  1599185580  1599185829   
..      ...         ...         ...         ...         ...         ...   
507  user30  Asia/Seoul  2020-09-22  1600702320  1600728060  1600728134   
508  user30  Asia/Seoul  2020-09-23  1600789560  1600814400  1600814550   
509  user30  Asia/Seoul  2020-09-24  1600875720  1600896300  1600896726   
510  user30  Asia/Seoul  2020-09-24  1600875720  1600900800  1600900872   
511  user30  Asia/Seoul  2020-09-25  1600965120  1600987200  1600987323   

     wakeupduration  lightsleepduration  deepsleepduration  wakeupcount  ...  \
351            3240

In [70]:
combined_results.rename(columns={'userId': 'subject_id'}, inplace=True)

In [71]:
filtered_label = pd.merge(combined_results[['subject_id', 'date']], df_label, on=['subject_id', 'date'], how='inner')

# 결과 출력
y = filtered_label[['S3','S4']]
x = combined_results

### Train, Test data setting

In [72]:
x.drop(columns=['subject_id','timezone'], inplace=True)

In [73]:
x['date'] = pd.to_datetime(x['date'])
x['date'] = x['date'].values.astype(np.int64)

In [74]:
from sklearn.preprocessing import MinMaxScaler
# from sklearn.preprocessing import StandardScaler

columns_to_scale = ['date', 'startDt', 'endDt','lastUpdate']

mms = MinMaxScaler()
# sc = StandardScaler()

# 스케일링 적용
x[columns_to_scale] = mms.fit_transform(x[columns_to_scale])


In [75]:
x

Unnamed: 0,date,startDt,endDt,lastUpdate,wakeupduration,lightsleepduration,deepsleepduration,wakeupcount,durationtosleep,remsleepduration,...,hr_average,hr_min,hr_max,rr_average,rr_min,rr_max,breathing_disturbances_intensity,snoring,snoringepisodecount,sleep_score
351,0.026316,0.028391,0.027451,0.027452,3240,11700,6120,1,1980,7500,...,69,57,83,16,12,25,18,8880,16,83
352,0.052632,0.057256,0.055195,0.053423,1140,9660,6840,0,1140,7140,...,73,63,82,16,12,22,20,8280,16,82
353,0.078947,0.084828,0.082592,0.080363,2640,6360,7020,3,1680,7260,...,73,58,84,17,9,22,17,8400,7,53
354,0.105263,0.110085,0.108928,0.106789,1560,9360,10140,1,1500,6000,...,72,64,80,16,11,24,20,11400,11,82
355,0.131579,0.137947,0.135466,0.133290,1260,8580,8220,0,1260,4560,...,72,60,85,16,12,21,14,5280,5,60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
507,0.605263,0.605474,0.605318,0.603090,3780,11340,4200,2,2520,6420,...,63,51,82,14,9,23,20,7020,14,56
508,0.631579,0.631970,0.631618,0.629413,1260,10200,3240,0,1260,10140,...,63,51,76,14,10,21,29,8940,16,74
509,0.657895,0.658138,0.656566,0.654444,1320,7500,5100,4,600,6300,...,64,52,84,15,10,21,24,8700,13,45
510,0.657895,0.658138,0.657937,0.655707,1860,10140,5100,5,600,6300,...,63,52,84,14,10,21,22,10200,15,56


In [76]:
y.head()

Unnamed: 0,S3,S4
0,1,1
1,1,1
2,1,1
3,1,1
4,0,1


In [77]:
x.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 503 entries, 351 to 511
Data columns (total 21 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   date                              503 non-null    float64
 1   startDt                           503 non-null    float64
 2   endDt                             503 non-null    float64
 3   lastUpdate                        503 non-null    float64
 4   wakeupduration                    503 non-null    int64  
 5   lightsleepduration                503 non-null    int64  
 6   deepsleepduration                 503 non-null    int64  
 7   wakeupcount                       503 non-null    int64  
 8   durationtosleep                   503 non-null    int64  
 9   remsleepduration                  503 non-null    int64  
 10  durationtowakeup                  503 non-null    int64  
 11  hr_average                        503 non-null    int64  
 12  hr_min

In [78]:
# data split
X_train, X_test, y_train, y_test = train_test_split(x, y)

In [79]:
lscores = []

def result(model, model_reg):
    predicciones = model_reg.predict(X_test)
    y_train_pred = cross_val_predict(model_reg, X_train, y_train, cv=3)
    lscores.append({
        "name" : model,
        "f1" : round(f1_score(y_train, y_train_pred, average="micro"), 4),
        "accuracy" : round(accuracy_score(y_train, y_train_pred), 4),
        "precission" : round(precision_score(y_train, y_train_pred, average="micro"), 4),
        "recall" : round(recall_score(y_train, y_train_pred, average="micro"), 4)
    })

def roc_graph(models):
    plt.figure(figsize=(10,8))
    for i, (name, model) in enumerate(models.items()):
        y_scores = cross_val_predict(model, X_train, y_train, cv=3, method="predict_proba")
        lscores[i]["roc_auc_score"] = round(roc_auc_score(y_train, y_scores, multi_class="ovr"), 4)
        y_scores = y_scores[:,1]
        fpr, tpr, _ = roc_curve(y_train, y_scores, pos_label=1, drop_intermediate=True)
        plt.plot(fpr, tpr, label=name)
    
    plt.title("ROC curve")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.legend()

In [80]:
from sklearn.multioutput import MultiOutputClassifier # multi target classifier 적용을 위한 라이브러리

models = {
    "SGDClassifier": MultiOutputClassifier(SGDClassifier(loss="log_loss")),
    "Logistic Regression": MultiOutputClassifier(LogisticRegression(solver="lbfgs", max_iter=10000)),
    "Decision Tree": MultiOutputClassifier(DecisionTreeClassifier()),
    "Random Forest": MultiOutputClassifier(RandomForestClassifier()),
    "SVM": MultiOutputClassifier(SVC(probability=True)),
    
    "Gradient Boosting": MultiOutputClassifier(GradientBoostingClassifier()),
    "XGBoost": MultiOutputClassifier(XGBClassifier(use_label_encoder=False, eval_metric='logloss')),
    "AdaBoost": MultiOutputClassifier(AdaBoostClassifier(algorithm="SAMME")),
    "Naive Bayes": MultiOutputClassifier(GaussianNB()),
    "MLP Neural Network": MultiOutputClassifier(MLPClassifier(max_iter=10000)),
}



In [81]:
for name, model in models.items():
    print(model)
    model.fit(X_train, y_train)
    result(name, model)

MultiOutputClassifier(estimator=SGDClassifier(loss='log_loss'))
MultiOutputClassifier(estimator=LogisticRegression(max_iter=10000))


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

MultiOutputClassifier(estimator=DecisionTreeClassifier())
MultiOutputClassifier(estimator=RandomForestClassifier())
MultiOutputClassifier(estimator=SVC(probability=True))
MultiOutputClassifier(estimator=GradientBoostingClassifier())
MultiOutputClassifier(estimator=XGBClassifier(base_score=None, booster=None,
                                              callbacks=None,
                                              colsample_bylevel=None,
                                              colsample_bynode=None,
                                              colsample_bytree=None,
                                              early_stopping_rounds=None,
                                              enable_categorical=False,
                                              eval_metric='logloss',
                                              feature_types=None, gamma=None,
                                              gpu_id=None, grow_policy=None,
                                              impo



MultiOutputClassifier(estimator=AdaBoostClassifier(algorithm='SAMME'))
MultiOutputClassifier(estimator=GaussianNB())
MultiOutputClassifier(estimator=MLPClassifier(max_iter=10000))


In [82]:
results = pd.DataFrame(lscores)
results

Unnamed: 0,name,f1,accuracy,precission,recall
0,SGDClassifier,0.7331,0.3422,0.6739,0.8036
1,Logistic Regression,0.7569,0.3979,0.6811,0.8517
2,Decision Tree,0.6941,0.3395,0.6948,0.6934
3,Random Forest,0.7778,0.4377,0.7034,0.8697
4,SVM,0.7887,0.3926,0.6617,0.976
5,Gradient Boosting,0.7528,0.4058,0.7019,0.8116
6,XGBoost,0.7535,0.3899,0.7031,0.8116
7,AdaBoost,0.7332,0.3634,0.6859,0.7876
8,Naive Bayes,0.7178,0.3501,0.6804,0.7595
9,MLP Neural Network,0.6167,0.2838,0.6621,0.5772


In [85]:
for name, model in models.items():
    predicciones = model.predict(X_test)
    print(f'{name} prediction : {predicciones}')

    # errores=0
    # for i, prediccion in enumerate(predicciones):
    #     if prediccion != list(y_test)[i]:
    #         errores+=1
    # print(f"{name} Errors: {errores} in {len(X_test)} with: {round(errores/len(X_test)*100, 2)}% of error")

SGDClassifier prediction : [[1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 1]
 [1 1]
 [1 0]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [1 0]
 [1 0]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 0]
 [1 0]
 [1 1]
 [1 0]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [1 0]
 [1 0]
 [1 1]
 [1 0]
 [1 0]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 0]
 [1 0]
 [1 0]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 0]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 0]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [1 0]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [1 0]
 [1 0]
 [1 1]
 [1 1]
 [1 0]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [1 0]
 [1 0]
 [1 1]
 [1 0]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 0]
 [1 0]
 [1 0]
 [1 1]
 [1 0]]
Logistic Regression prediction : [[1 0]
 [1 0]
 [1 1]
 [1 1]
 [0 1]
 [0 1]
 [1 1]
 [1 1]
 

### optuna tuning

In [38]:
# !pip install optuna

Collecting optunaNote: you may need to restart the kernel to use updated packages.

  Downloading optuna-3.6.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.1-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.5-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
   ---------------------------------------- 380.1/380.1 kB 3.9 MB/s eta 0:00:00
Downloading alembic-1.13.1-py3-none-any.whl (233 kB)
   ---------------------------------------- 233.4/233.4 kB 4.7 MB/s eta 0:00:00
Downloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
   ---------------------------------------- 78.6/78.6 kB 4.3 MB/s eta 0:00:00
Installing collected packages: Mako, colorlog, alembic, optuna
Successfully installed Mako-1.3.5 alembic-1

In [88]:
# 수정 중

import optuna
from sklearn.metrics import mean_squared_error

def objective(trial):
   param = {
       'tree_method':'hist',
       #'criterion' : trial.suggest_categorical("criterion", ["gini", "entropy"]),
       'eta': trial.suggest_float('eta', 1.25, 2),
       'max_depth': trial.suggest_int('max_depth', 6, 9),
       'learning_rate': trial.suggest_float('learning_rate', 0.020, 0.5),
       'n_estimators': trial.suggest_int('n_estimators', 300, 450),
       'min_child_weight': trial.suggest_int('min_child_weight', 7, 9),
       'gamma': trial.suggest_float('gamma', 0.5, 0.8),
       'subsample': trial.suggest_float('subsample', 0.7, 1.0),
       'colsample_bytree': trial.suggest_float('colsample_bytree', 0.4, 0.55),
       'reg_alpha': trial.suggest_float('reg_alpha', 0.2, 0.6),
       'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 0.6),
   }

   model = XGBClassifier(**param)
#  model = XGBClassifier(**param)
   model.fit(X_train, y_train)
   y_pred = model.predict(X_test)
   f1 = f1_score(y_test, y_pred, average='macro')
   accuracy = accuracy_score(y_test, y_pred, average='macro')
   precisio = precision_score(y_test, y_pred, average='macro')
   recall = recall_score(y_test, y_pred, average='macro')

   return mean_squared_error(y_test, y_pred),



study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100)

study.best_params

[I 2024-06-05 21:58:22,638] A new study created in memory with name: no-name-4c5ab846-12c4-406c-8861-7e9098031f58
[I 2024-06-05 21:58:23,050] Trial 0 finished with value: 0.376984126984127 and parameters: {'eta': 1.8939812492647512, 'max_depth': 6, 'learning_rate': 0.18718203768305255, 'n_estimators': 309, 'min_child_weight': 7, 'gamma': 0.5838951915931766, 'subsample': 0.7610968571189265, 'colsample_bytree': 0.46294608825277306, 'reg_alpha': 0.34094632614574305, 'reg_lambda': 0.13096140220504904}. Best is trial 0 with value: 0.376984126984127.
[I 2024-06-05 21:58:23,396] Trial 1 finished with value: 0.34523809523809523 and parameters: {'eta': 1.6280124560953122, 'max_depth': 8, 'learning_rate': 0.26070364738961105, 'n_estimators': 327, 'min_child_weight': 8, 'gamma': 0.5914424981471, 'subsample': 0.9206366128808114, 'colsample_bytree': 0.48162618609146896, 'reg_alpha': 0.5066659809776826, 'reg_lambda': 0.3866025139186137}. Best is trial 1 with value: 0.34523809523809523.
[I 2024-06-05

{'eta': 1.4223915845004769,
 'max_depth': 6,
 'learning_rate': 0.021684776474925022,
 'n_estimators': 319,
 'min_child_weight': 8,
 'gamma': 0.7312387368139835,
 'subsample': 0.977355736373932,
 'colsample_bytree': 0.4165905019246019,
 'reg_alpha': 0.49336036607028866,
 'reg_lambda': 0.522296059822283}

In [90]:
trial = study.best_trial
print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

Accuracy: 0.3214285714285714
Best hyperparameters: {'eta': 1.4223915845004769, 'max_depth': 6, 'learning_rate': 0.021684776474925022, 'n_estimators': 319, 'min_child_weight': 8, 'gamma': 0.7312387368139835, 'subsample': 0.977355736373932, 'colsample_bytree': 0.4165905019246019, 'reg_alpha': 0.49336036607028866, 'reg_lambda': 0.522296059822283}


In [139]:
optuna.visualization.plot_optimization_history(study)

### DL - 코드 수정 중

In [92]:
import torch

In [168]:
X_train_full, X_test, y_train_full, y_test = train_test_split(x, y)
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full)

from sklearn.preprocessing import StandardScaler
# StandarScaler for removing data that is too different see "Weight" in this dataset
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_val = sc.fit_transform(X_val)
X_test = sc.fit_transform(X_test)
print(X_train.shape)

(282, 21)


### DL model example

In [173]:
import torch.nn as nn
import torch.optim as optim

num_classes = 2 # S3, S4

# 모델 정의
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.stack = nn.Sequential(
            nn.Linear(X_train.shape[1], 32),
            nn.Linear(32, 64),
            nn.Linear(64, 128),
            nn.Linear(128, 64),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, num_classes),
            # nn.Softmax(dim=num_classes)
        )

    def forward(self, x):
        return self.stack(x)

model = NeuralNetwork()

# 손실 함수 및 옵티마이저
criterion = nn.CrossEntropyLoss()
# criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)


# 훈련 루프
def train_model(model, train_loader, val_loader, criterion, optimizer, n_epochs=700, patience=50):
    best_loss = float('inf')
    early_stopping_counter = 0

    for epoch in range(n_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        
        # 검증 손실 계산
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
        val_loss /= len(val_loader.dataset)

        print(f'Epoch {epoch + 1} - Training Loss: {epoch_loss:.4f}, Validation Loss: {val_loss:.4f}')

        # # early stopping
        # if val_loss < best_loss:
        #     best_loss = val_loss
        #     torch.save(model.state_dict(), 'model/my_torch_model.pth')
        #     early_stopping_counter = 0
        # else:
        #     early_stopping_counter += 1
        #     if early_stopping_counter >= patience:
        #         print("Early stopping triggered.")
        #         break



In [174]:
# X_train_np = X_train_numeric.values.astype(np.float32)
y_train_np = y_train.values.astype(np.float32)
y_val_np = y_val.values.astype(np.float32)
y_test_np = y_test.values.astype(np.float32)

AttributeError: 'builtin_function_or_method' object has no attribute 'astype'

In [175]:
from torch.utils.data import DataLoader, TensorDataset

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train_np, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val_np, dtype=torch.float32)

X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test_np, dtype=torch.float32)


# Create data loaders
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

val_dataset = TensorDataset(X_val, y_val)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


  X_train = torch.tensor(X_train, dtype=torch.float32)
  X_val = torch.tensor(X_val, dtype=torch.float32)
  X_test = torch.tensor(X_test, dtype=torch.float32)


In [172]:
# Train the model
train_model(model, train_loader, val_loader, criterion, optimizer)

# Save the model
torch.save(model.state_dict(), './model/torch_model_ep700.pth')

IndexError: Dimension out of range (expected to be in range of [-2, 1], but got 2)

In [142]:
# Load and evaluate the model using the test data
model.load_state_dict(torch.load('./model/torch_model_ep700.pth'))
model.eval()
test_loss = 0.0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item() * inputs.size(0)
test_loss /= len(test_loader.dataset)
print(f'Test Loss: {test_loss:.4f}')

Test Loss: 1.0316


In [141]:
outputs = []
# with torch.no_grad():
#     outputs = []
#     for inputs, labels in test_loader:
#         outputs.append(model(inputs))

In [114]:
_, preds = torch.max(outputs[0], 1)

In [143]:
_, preds0 = torch.max(outputs[0], 1)
_, preds1 = torch.max(outputs[1], 1)

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

In [161]:
outputs

tensor([[ 0.2979,  0.3285],
        [-0.0634, -0.2568],
        [-1.0328, -0.8654],
        [-0.4197,  0.5527],
        [-0.5957,  0.3472],
        [-0.4240,  0.5928],
        [-1.0050,  0.2877],
        [-0.9856, -0.0297],
        [-0.7264,  0.8510],
        [-0.9878, -0.5184],
        [-1.1781, -0.1177],
        [-0.5249, -0.1338],
        [ 0.1739,  0.8200],
        [-0.1445,  0.3792],
        [-0.4709, -0.7983],
        [-1.3446, -1.4599],
        [-1.0984,  1.1216],
        [ 0.6810, -0.5938],
        [-1.4285,  0.2711],
        [-0.9706, -0.3931],
        [-1.4500, -0.8228],
        [ 0.0775, -0.2695],
        [-0.2296, -0.4983],
        [-0.2612, -0.3220],
        [ 0.2392, -0.3585],
        [ 0.0489,  0.0479],
        [ 0.4062, -0.3668],
        [-2.0415, -1.3659],
        [-1.8899, -0.0319],
        [-1.5624,  0.5418],
        [-0.8080,  0.3701],
        [-0.9419, -0.6289],
        [-2.0424, -2.1964],
        [ 0.3966, -0.1332],
        [-0.1440,  0.1239],
        [ 0.8838, -1

In [150]:
labels

tensor([[1., 1.],
        [0., 1.],
        [1., 0.],
        [1., 1.],
        [0., 0.],
        [0., 1.],
        [1., 1.],
        [1., 0.],
        [1., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 0.],
        [0., 1.],
        [1., 1.],
        [1., 0.],
        [1., 1.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [1., 1.],
        [1., 0.],
        [0., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 0.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [1., 0.],
        [0., 1.],
        [1., 1.],
        [1., 1.],
        [1., 0.],
        [1., 1.],
        [0., 0.],
        [1., 1.],
        [1., 0.],
        [1., 0.],
        [1., 1.],
        [0., 0.],
        [0., 0.],
        [1., 1.],
        [1., 1.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        [0., 0.],
        [1., 1.],
        [1

In [120]:
import torch
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score


# Converting probabilities to class labels
_, predicted_labels = torch.max(labels, 1)
_, preds = torch.max(outputs, 1)

# Convert to numpy arrays for compatibility with sklearn
true_labels_np = preds.numpy()
predicted_labels_np = predicted_labels.numpy()

# Calculate metrics
precision = precision_score(true_labels_np, predicted_labels_np)
recall = recall_score(true_labels_np, predicted_labels_np)
f1 = f1_score(true_labels_np, predicted_labels_np)
accuracy = accuracy_score(true_labels_np, predicted_labels_np)


print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Accuracy: {accuracy}")



TypeError: max() received an invalid combination of arguments - got (list, int), but expected one of:
 * (Tensor input, *, Tensor out)
 * (Tensor input, Tensor other, *, Tensor out)
 * (Tensor input, int dim, bool keepdim, *, tuple of Tensors out)
 * (Tensor input, name dim, bool keepdim, *, tuple of Tensors out)
