In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.decomposition import PCA

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [2]:
file_path = "/Users/song/Desktop/AI Factory/HumanUnderstanding2024/train dataset"

df_user_sleep = pd.read_csv(file_path + "/user_sleep_2020.csv")
df_user_survey = pd.read_csv(file_path + "/user_survey_2020.csv")

In [3]:
# AM,PM 시계열 상으로 같은 날짜 합치기
df_user_survey = df_user_survey.groupby(['userId', 'date']).agg({
    'startInput': 'first',  # 시작 입력값은 AM의 값을 사용
    'endInput': 'last',  # 종료 입력값은 PM의 값을 사용
    'sleep': 'max',  # 수면 시간은 최대값을 사용 (일반적으로 AM에 기록)
    'sleepProblem': 'max',  # 수면 문제 역시 최대값을 사용
    'dream': 'max',
    'amCondition': 'max',
    'amEmotion': 'max',
    'pmEmotion': 'max',
    'pmStress': 'max',
    'pmFatigue': 'max',
    'caffeine': 'last',  # 카페인 섭취는 PM에 기록된 값을 사용
    'cAmount(ml)': 'sum',  # 카페인 섭취량은 합계를 사용
    'alcohol': 'last',
    'aAmount(ml)': 'sum'
}).reset_index()

# 결측치 처리하기
df_user_survey.fillna({
    'sleep': 0,
    'sleepProblem': 0,
    'dream': 0,
    'amCondition': 0,
    'amEmotion': 0,
    'pmEmotion': 0,
    'pmStress': 0,
    'pmFatigue': 0,
    'cAmount(ml)': 0,
    'aAmount(ml)': 0,
    'caffeine' : 0,
    'alcohol': 0  # 'alcohol' 컬럼에 대해서는 'not specific'으로 채움
}, inplace=True)

caffeine_map = {'not specific': 0, 'tea': 1, 'coke': 1, 'caffeinated drink': 1, 'coffee': 1, 0 : 0}
alcohol_map = {
    'not specified': 0, 'soju&beer': 1, 'soju': 1, 
    'beer': 1, 'beer&rice wine' : 1, 'wine': 1, 'rice wine': 1, 
    'cognac': 1, 'wine&beer': 1, 'kaoliang': 1, 0 : 0
}

# 매핑 적용
df_user_survey['caffeine'] = df_user_survey['caffeine'].map(caffeine_map)
df_user_survey['alcohol'] = df_user_survey['alcohol'].map(alcohol_map)


df_user_sleep.drop(columns='timezone', axis=1, inplace=True)

df_user_sleep.head()

Unnamed: 0,userId,date,startDt,endDt,lastUpdate,wakeupduration,lightsleepduration,deepsleepduration,wakeupcount,durationtosleep,...,hr_average,hr_min,hr_max,rr_average,rr_min,rr_max,breathing_disturbances_intensity,snoring,snoringepisodecount,sleep_score
0,user24,2020-08-30,1598708760,1598740860,1598748252,3840,13800,9000,3,1740,...,72,63,82,15,10,20,10,840,1,86
1,user24,2020-08-31,1598800140,1598825940,1598833333,4740,9540,6360,1,3180,...,69,60,82,15,11,20,8,2700,3,52
2,user24,2020-09-01,1598885940,1598908800,1598916190,1920,8760,4740,1,1440,...,70,58,94,15,12,23,8,0,0,61
3,user24,2020-09-02,1598979420,1598999580,1599006969,2760,7740,7440,1,1380,...,68,60,80,15,12,19,-1,240,1,40
4,user24,2020-09-03,1599060780,1599085980,1599093370,3900,11640,7620,1,1680,...,71,61,83,15,11,25,7,300,1,55


In [4]:
data_all = pd.merge(df_user_sleep, df_user_survey, on = ["date", "userId"], how='inner')

In [5]:
# train_label.csv 파일 불러오기
train_label_path = "/Users/song/Desktop/AI Factory/HumanUnderstanding2024/train_label.csv"

train_labels = pd.read_csv(train_label_path)
train_labels.drop(columns="Unnamed: 0", axis=1, inplace=True)

train_label_user01 = train_labels[train_labels['subject_id'] == "user01"]

train_label_user01.head()

Unnamed: 0,subject_id,date,Q1,Q2,Q3,S1,S2,S3,S4
0,user01,2020-08-30,1,0,0,1,1,0,0
1,user01,2020-08-31,0,0,0,0,1,1,1
2,user01,2020-09-01,0,0,0,0,1,1,1
3,user01,2020-09-02,1,0,0,1,1,1,1
4,user01,2020-09-03,1,0,0,0,1,1,1


In [6]:
data_rf_features = data_all[['pmEmotion', 'pmStress', 'userId', 'lightsleepduration', 'wakeupduration', 'deepsleepduration', 'durationtosleep', 'remsleepduration', 'hr_max', 'hr_average', 'date']]

### LSTM-Randomforest

In [7]:
import os
import pandas as pd

def load_data(base_path, detail_path='mAcc'):
    """
    주어진 기본 경로와 세부 경로를 사용하여 모든 CSV 파일을 읽고 하나의 데이터프레임으로 결합합니다.
    지정된 날짜 범위 내의 데이터만 포함합니다.

    Parameters:
    base_path (str): 기본 경로
    detail_path (str): 세부 경로 (기본값: 'mAcc')
    start_date (datetime): 포함할 데이터의 시작 날짜 (UTC 기준)
    end_date (datetime): 포함할 데이터의 종료 날짜 (UTC 기준)

    Returns:
    pd.DataFrame: 결합된 데이터프레임
    """
    # 모든 디렉토리 리스트 가져오기 ('.DS_Store' 제외)
    directories = [d for d in os.listdir(base_path) if d != '.DS_Store']

    # 모든 .csv 파일 경로 리스트 생성
    csv_files = []
    for directory in directories:
        target_path = os.path.join(base_path, directory, detail_path)
        if os.path.exists(target_path):  # 세부 경로 폴더가 존재하는지 확인
            for file in os.listdir(target_path):
                if file.endswith('.csv'):
                    csv_files.append(os.path.join(target_path, file))

    # csv_files 리스트를 오름차순으로 정렬
    csv_files = sorted(csv_files)

    # 각 파일의 데이터 읽어와서 데이터프레임 리스트에 저장
    data_frames = []
    for file in csv_files:
        df = pd.read_csv(file)
        df['source_file'] = os.path.basename(file).replace('.csv', '')  # 파일 이름에서 .csv 제거
        data_frames.append(df)

    # 모든 데이터프레임을 하나로 합치기
    combined_df = pd.concat(data_frames, ignore_index=True)

        # source_file 컬럼을 datetime으로 변환하고 timestamp와 합침
    combined_df['source_file'] = pd.to_datetime(combined_df['source_file'], unit='s')
    combined_df['timestamp'] = combined_df['source_file'] + pd.to_timedelta(combined_df['timestamp'], unit='s')

    return combined_df

In [8]:
# 예시 사용
user01_path = "/Users/song/Desktop/AI Factory/HumanUnderStanding2024/train dataset/user01-06/user01"
# resample_interval = '100L'

user01_mAcc_data = load_data(user01_path)

user01_mAcc_data.drop(columns='source_file', axis=1, inplace=True)

# user01_mAcc_data = user01_mAcc_data.set_index('timestamp').resample(resample_interval).mean().interpolate().reset_index()
user01_mAcc_data.head()

Unnamed: 0,timestamp,x,y,z
0,2020-08-30 03:59:32.251,-0.090982,0.198725,9.749487
1,2020-08-30 03:59:32.366,-0.090982,0.198725,9.749487
2,2020-08-30 03:59:32.627,-0.071828,0.244216,9.742304
3,2020-08-30 03:59:32.635,-0.076617,0.215485,9.73991
4,2020-08-30 03:59:32.682,-0.093377,0.198725,9.75667


In [9]:
user01_e4Hr_data = load_data(user01_path, 'e4Hr')

user01_e4Hr_data.drop(columns='source_file', axis=1, inplace=True)

# user01_e4Hr_data = user01_e4Hr_data.set_index('timestamp').resample(resample_interval).mean().interpolate().reset_index()
user01_e4Hr_data.head()

Unnamed: 0,timestamp,hr
0,2020-08-31 00:21:53,74.0
1,2020-08-31 00:21:54,69.0
2,2020-08-31 00:21:55,74.33
3,2020-08-31 00:21:56,89.0
4,2020-08-31 00:21:57,93.8


In [10]:
user01_mAcc_data['date'] = user01_mAcc_data['timestamp'].dt.date
user01_mAcc_data['date'] = pd.to_datetime(user01_mAcc_data['date'])

user01_e4Hr_data['date'] = user01_e4Hr_data['timestamp'].dt.date
user01_e4Hr_data['date'] = pd.to_datetime(user01_e4Hr_data['date'])

train_label_user01['date'] = pd.to_datetime(train_label_user01['date'])

In [11]:
user01_mAcc_data = pd.merge(user01_mAcc_data, train_label_user01, on='date', how='inner')

user01_mAcc_data.head()

Unnamed: 0,timestamp,x,y,z,date,subject_id,Q1,Q2,Q3,S1,S2,S3,S4
0,2020-08-30 03:59:32.251,-0.090982,0.198725,9.749487,2020-08-30,user01,1,0,0,1,1,0,0
1,2020-08-30 03:59:32.366,-0.090982,0.198725,9.749487,2020-08-30,user01,1,0,0,1,1,0,0
2,2020-08-30 03:59:32.627,-0.071828,0.244216,9.742304,2020-08-30,user01,1,0,0,1,1,0,0
3,2020-08-30 03:59:32.635,-0.076617,0.215485,9.73991,2020-08-30,user01,1,0,0,1,1,0,0
4,2020-08-30 03:59:32.682,-0.093377,0.198725,9.75667,2020-08-30,user01,1,0,0,1,1,0,0


In [12]:
user01_e4Hr_data = pd.merge(user01_e4Hr_data, train_label_user01, on='date', how='inner')

user01_e4Hr_data.head()

Unnamed: 0,timestamp,hr,date,subject_id,Q1,Q2,Q3,S1,S2,S3,S4
0,2020-08-31 00:21:53,74.0,2020-08-31,user01,0,0,0,0,1,1,1
1,2020-08-31 00:21:54,69.0,2020-08-31,user01,0,0,0,0,1,1,1
2,2020-08-31 00:21:55,74.33,2020-08-31,user01,0,0,0,0,1,1,1
3,2020-08-31 00:21:56,89.0,2020-08-31,user01,0,0,0,0,1,1,1
4,2020-08-31 00:21:57,93.8,2020-08-31,user01,0,0,0,0,1,1,1


In [13]:
data_rf_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 576 entries, 0 to 575
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   pmEmotion           576 non-null    float64
 1   pmStress            576 non-null    float64
 2   userId              576 non-null    object 
 3   lightsleepduration  576 non-null    int64  
 4   wakeupduration      576 non-null    int64  
 5   deepsleepduration   576 non-null    int64  
 6   durationtosleep     576 non-null    int64  
 7   remsleepduration    576 non-null    int64  
 8   hr_max              576 non-null    int64  
 9   hr_average          576 non-null    int64  
 10  date                576 non-null    object 
dtypes: float64(2), int64(7), object(2)
memory usage: 49.6+ KB


In [14]:
data_rf_features['date']= pd.to_datetime(data_rf_features['date'])


data_rf_features['userId'] = data_rf_features['userId'].str.replace('user', '').astype(int)

# 스케일러 초기화
scaler = StandardScaler()

# 스케일링할 열 선택 (datetime 열과 target 열을 제외한 수치형 열)
features_to_scale = data_rf_features.drop(columns=['date', 'userId']).columns

# 데이터프레임을 수치형 데이터와 날짜 데이터로 분리
numeric_data = data_rf_features[features_to_scale]
date_data = data_rf_features['date']
user_id_data = data_rf_features['userId']

# 스케일링 적용
scaled_features = scaler.fit_transform(numeric_data)

# 스케일링된 데이터를 데이터프레임으로 변환
data_features = pd.DataFrame(scaled_features, columns=features_to_scale)

# 날짜 데이터와 결합
data_features['date'] = date_data
data_features['userId'] = user_id_data

# 열 순서 복원
data_features = data_features[['date', 'userId'] + list(features_to_scale)]

data_features.head()


Unnamed: 0,date,userId,pmEmotion,pmStress,lightsleepduration,wakeupduration,deepsleepduration,durationtosleep,remsleepduration,hr_max,hr_average
0,2020-08-30,24,0.443799,1.663486,0.734769,0.286538,1.292229,-0.105555,0.206542,0.663944,1.399367
1,2020-08-31,24,0.443799,-0.855799,-0.084075,0.655933,0.359492,0.765083,0.105687,0.663944,0.958187
2,2020-09-01,24,1.550413,0.823725,-0.234004,-0.501505,-0.212869,-0.286938,0.932694,2.269674,1.105247
3,2020-09-02,24,0.443799,0.823725,-0.430065,-0.156736,0.741066,-0.323214,-0.822175,0.396322,0.811127
4,2020-09-03,24,-0.662816,-0.855799,0.319581,0.311164,0.804662,-0.141831,-0.882687,0.797754,1.252307


In [16]:
data_features_user01 = data_features[data_features['userId'] == 1].reset_index()

data_features_user01.drop(columns='index', axis=1, inplace=True)

data_features_user01.head()

Unnamed: 0,date,userId,pmEmotion,pmStress,lightsleepduration,wakeupduration,deepsleepduration,durationtosleep,remsleepduration,hr_max,hr_average
0,2020-08-31,1,0.443799,-0.016037,0.331114,0.040274,0.274698,0.039552,0.952865,0.797754,0.958187
1,2020-09-01,1,0.443799,-0.016037,-0.061009,-0.821648,0.529081,-0.468321,0.83184,0.663944,1.546426
2,2020-09-02,1,0.443799,-0.016037,-0.695324,-0.205989,0.592676,-0.141831,0.872182,0.931565,1.546426
3,2020-09-03,1,-1.76943,-0.855799,-0.118674,-0.649263,1.695002,-0.250661,0.448593,0.396322,1.399367
4,2020-09-04,1,0.443799,-0.855799,-0.268603,-0.772395,1.016648,-0.395767,-0.035509,1.065376,1.399367


In [17]:
user01_mAcc_with_targetOne = pd.merge(user01_mAcc_data, data_features_user01, on='date', how='inner')

user01_mAcc_with_targetOne.head()

Unnamed: 0,timestamp,x,y,z,date,subject_id,Q1,Q2,Q3,S1,...,userId,pmEmotion,pmStress,lightsleepduration,wakeupduration,deepsleepduration,durationtosleep,remsleepduration,hr_max,hr_average
0,2020-08-31 00:10:40.004,0.069434,-0.069434,9.703996,2020-08-31,user01,0,0,0,0,...,1,0.443799,-0.016037,0.331114,0.040274,0.274698,0.039552,0.952865,0.797754,0.958187
1,2020-08-31 00:10:40.016,0.079011,-0.062251,9.701602,2020-08-31,user01,0,0,0,0,...,1,0.443799,-0.016037,0.331114,0.040274,0.274698,0.039552,0.952865,0.797754,0.958187
2,2020-08-31 00:10:40.051,0.079011,-0.081405,9.696813,2020-08-31,user01,0,0,0,0,...,1,0.443799,-0.016037,0.331114,0.040274,0.274698,0.039552,0.952865,0.797754,0.958187
3,2020-08-31 00:10:40.082,0.086194,-0.071828,9.682447,2020-08-31,user01,0,0,0,0,...,1,0.443799,-0.016037,0.331114,0.040274,0.274698,0.039552,0.952865,0.797754,0.958187
4,2020-08-31 00:10:40.121,0.074223,-0.086194,9.718362,2020-08-31,user01,0,0,0,0,...,1,0.443799,-0.016037,0.331114,0.040274,0.274698,0.039552,0.952865,0.797754,0.958187


In [18]:
user01_e4Hr_with_targetOne = pd.merge(user01_e4Hr_data, data_features_user01, on='date', how='inner')

user01_mAcc_with_targetOne.head()

Unnamed: 0,timestamp,x,y,z,date,subject_id,Q1,Q2,Q3,S1,...,userId,pmEmotion,pmStress,lightsleepduration,wakeupduration,deepsleepduration,durationtosleep,remsleepduration,hr_max,hr_average
0,2020-08-31 00:10:40.004,0.069434,-0.069434,9.703996,2020-08-31,user01,0,0,0,0,...,1,0.443799,-0.016037,0.331114,0.040274,0.274698,0.039552,0.952865,0.797754,0.958187
1,2020-08-31 00:10:40.016,0.079011,-0.062251,9.701602,2020-08-31,user01,0,0,0,0,...,1,0.443799,-0.016037,0.331114,0.040274,0.274698,0.039552,0.952865,0.797754,0.958187
2,2020-08-31 00:10:40.051,0.079011,-0.081405,9.696813,2020-08-31,user01,0,0,0,0,...,1,0.443799,-0.016037,0.331114,0.040274,0.274698,0.039552,0.952865,0.797754,0.958187
3,2020-08-31 00:10:40.082,0.086194,-0.071828,9.682447,2020-08-31,user01,0,0,0,0,...,1,0.443799,-0.016037,0.331114,0.040274,0.274698,0.039552,0.952865,0.797754,0.958187
4,2020-08-31 00:10:40.121,0.074223,-0.086194,9.718362,2020-08-31,user01,0,0,0,0,...,1,0.443799,-0.016037,0.331114,0.040274,0.274698,0.039552,0.952865,0.797754,0.958187


In [12]:
# # user01_mAcc_with_targetOne = pd.merge(user01_mAcc_data, data_rf_features, on='date', how='inner')
# chunk_size = 100000  # 조각 크기 설정

# # 데이터 나누기 함수
# def chunk_data(df, chunk_size):
#     return [df[i:i + chunk_size] for i in range(0, df.shape[0], chunk_size)]

# # 데이터 나누기
# mAcc_chunks = chunk_data(user01_mAcc_data, chunk_size)
# merged_chunks = []

# # 각 조각을 병합
# for chunk in mAcc_chunks:
#     merged_chunk = pd.merge(chunk, data_features, on='date', how='inner')
#     merged_chunks.append(merged_chunk)

# # 병합된 데이터 결합
# user01_mAcc_with_targetOne = pd.concat(merged_chunks, ignore_index=True)


In [13]:
# user01_e4Hr_with_targetOne = pd.merge(user01_e4Hr_data, data_rf_features, on='date', how='inner')

# # 데이터 나누기
# e4Hr_chunks = chunk_data(user01_e4Hr_data, chunk_size)
# merged_chunks = []

# # 각 조각을 병합
# for chunk in e4Hr_chunks:
#     merged_chunk = pd.merge(chunk, data_features, on='date', how='inner')
#     merged_chunks.append(merged_chunk)

# # 병합된 데이터 결합
# user01_e4Hr_with_targetOne = pd.concat(merged_chunks, ignore_index=True)

In [19]:
col_label = ['subject_id','Q1', 'Q2', 'Q3', 'S1', 'S2', 'S3', 'S4']
last_label = user01_mAcc_with_targetOne[col_label]

user01_mAcc_with_targetOne.drop(columns=col_label, axis=1, inplace=True)
user01_e4Hr_with_targetOne.drop(columns=col_label, axis=1, inplace=True)

last_label.head()

Unnamed: 0,subject_id,Q1,Q2,Q3,S1,S2,S3,S4
0,user01,0,0,0,0,1,1,1
1,user01,0,0,0,0,1,1,1
2,user01,0,0,0,0,1,1,1
3,user01,0,0,0,0,1,1,1
4,user01,0,0,0,0,1,1,1


In [20]:
def create_sliding_windows(data, window_size, step_size, feature_cols, label_cols):
    X, y = [], []
    unique_dates = data['date'].unique()
    for date in unique_dates:
        daily_data = data[data['date'] == date]
        for start in range(0, len(daily_data) - window_size + 1, step_size):
            end = start + window_size
            window = daily_data.iloc[start:end]
            X.append(window[feature_cols].values)
            y.append(window[label_cols].iloc[0].values)  # 라벨은 윈도우의 첫 번째 값 사용
    return np.array(X), np.array(y)

In [21]:
# 슬라이딩 윈도우 설정 (2.5초)
window_size_acc_hr = 150  # 2.5초 * 60Hz
step_size_acc_hr = 150    # 2.5초 * 60Hz
label_columns = ['pmEmotion', 'pmStress', 'userId', 'lightsleepduration', 'wakeupduration', 'deepsleepduration', 'durationtosleep', 'remsleepduration', 'hr_max', 'hr_average', 'date']

# 슬라이딩 윈도우 생성
X_acc, y = create_sliding_windows(user01_mAcc_with_targetOne, window_size_acc_hr, step_size_acc_hr, ['x', 'y', 'z'], label_columns)
X_hr, _ = create_sliding_windows(user01_e4Hr_with_targetOne, window_size_acc_hr, step_size_acc_hr, ['hr'], label_columns)

In [22]:
# 데이터 길이를 padding 하여 맞춘다. 맞추지 않으면 tensor에서 size_error 발생

def pad_to_max_length(arr, max_length):
    """
    배열을 최대 길이에 맞추어 0으로 패딩합니다.
    """
    if len(arr) < max_length:
        pad_width = [(0, max_length - len(arr))] + [(0, 0)] * (arr.ndim - 1)
        return np.pad(arr, pad_width, mode='constant', constant_values=0)
    else:
        return arr

In [23]:
# 각 데이터셋의 길이 확인
max_length = max(len(X_acc), len(X_hr))

# 데이터셋을 최대 길이에 맞추어 패딩
X_acc_padded = pad_to_max_length(X_acc, max_length)
X_hr_padded = pad_to_max_length(X_hr, max_length)

# 텐서로 변환
X_acc_tensor = torch.tensor(X_acc_padded, dtype=torch.float32)
X_hr_tensor = torch.tensor(X_hr_padded, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32)

In [24]:
print(f"Shape of X_acc_tensor: {X_acc_tensor.shape}")
print(f"Shape of X_hr_tensor: {X_hr_tensor.shape}")
print(f"Shape of y_tensor: {y_tensor.shape}")

Shape of X_acc_tensor: torch.Size([103173, 150, 3])
Shape of X_hr_tensor: torch.Size([103173, 150, 1])
Shape of y_tensor: torch.Size([103173, 10])


In [25]:
# 데이터셋 및 데이터로더 준비
train_dataset = TensorDataset(X_acc_tensor, X_hr_tensor, y_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [26]:
# 모델 정의
class LSTMModel(nn.Module):
    def __init__(self, input_size_acc, input_size_hr, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()


        self.lstm_acc = nn.LSTM(input_size_acc, hidden_size, num_layers, batch_first=True)
        self.lstm_hr = nn.LSTM(input_size_hr, hidden_size, num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size * 2, num_classes)
    
    def forward(self, x_acc, x_hr):
        h0_acc = torch.zeros(num_layers, x_acc.size(0), hidden_size).to(device)
        c0_acc = torch.zeros(num_layers, x_acc.size(0), hidden_size).to(device)
        out_acc, _ = self.lstm_acc(x_acc, (h0_acc, c0_acc))
        
        h0_hr = torch.zeros(num_layers, x_hr.size(0), hidden_size).to(device)
        c0_hr = torch.zeros(num_layers, x_hr.size(0), hidden_size).to(device)
        out_hr, _ = self.lstm_hr(x_hr, (h0_hr, c0_hr))

        
        out_acc = out_acc[:, -1, :]
        out_hr = out_hr[:, -1, :]
        
        combined = torch.cat((out_acc, out_hr), dim=1)
        out = self.fc(combined)
        return out

In [27]:
# 하이퍼파라미터 설정
input_size_acc = X_acc.shape[2]
input_size_hr = X_hr.shape[2]
hidden_size = 64
num_layers = 2
num_classes = y.shape[1]

# 모델 인스턴스 생성
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
model = LSTMModel(input_size_acc, input_size_hr, hidden_size, num_layers, num_classes).to(device)

print(device)

mps


In [28]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 모델 학습
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for X_acc_batch, X_hr_batch, y_batch in train_loader:
        X_acc_batch, X_hr_batch, y_batch = X_acc_batch.to(device), X_hr_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(X_acc_batch, X_hr_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}')

Epoch [1/5], Train Loss: 0.4410
Epoch [2/5], Train Loss: 0.4128
Epoch [3/5], Train Loss: 0.3934
Epoch [4/5], Train Loss: 0.3818
Epoch [5/5], Train Loss: 0.3877


In [26]:
# 모델 상태 딕셔너리 저장
torch.save(model.state_dict(), 'model_state_dict_user01.pth')

In [29]:
# train_label.csv 파일 불러오기
train_label_path = "/Users/song/Desktop/AI Factory/HumanUnderstanding2024/train_label.csv"

train_labels = pd.read_csv(train_label_path)
train_labels.drop(columns="Unnamed: 0", axis=1, inplace=True)

train_label_user01 = train_labels[train_labels['subject_id'] == "user01"]

train_label_user01.head()

Unnamed: 0,subject_id,date,Q1,Q2,Q3,S1,S2,S3,S4
0,user01,2020-08-30,1,0,0,1,1,0,0
1,user01,2020-08-31,0,0,0,0,1,1,1
2,user01,2020-09-01,0,0,0,0,1,1,1
3,user01,2020-09-02,1,0,0,1,1,1,1
4,user01,2020-09-03,1,0,0,0,1,1,1


In [29]:
# 모델 평가 및 예측 값 저장
model.eval()
predictions = []
with torch.no_grad():
    for X_acc_batch, X_hr_batch, y_batch in train_loader:
        X_acc_batch, X_hr_batch = X_acc_batch.to(device), X_hr_batch.to(device)
        output = model(X_acc_batch, X_hr_batch)
        predictions.append(output.cpu().numpy())

predictions = np.concatenate(predictions, axis=0)

In [30]:
predictions.shape

(103173, 10)

In [35]:
user01_mAcc_with_label = load_data(user01_path)

user01_mAcc_with_label.drop(columns='source_file', axis=1, inplace=True)

# user01_mAcc_data = user01_mAcc_data.set_index('timestamp').resample(resample_interval).mean().interpolate().reset_index()
user01_mAcc_with_label['date'] = user01_mAcc_with_label['timestamp'].dt.date
user01_mAcc_with_label['date'] = pd.to_datetime(user01_mAcc_with_label['date'])

In [32]:
train_label_user01['date'] = pd.to_datetime(train_label_user01['date'])

train_label_user01['subject_id'] = train_label_user01['subject_id'].str.replace('user', '').astype(int)

train_label_user01.head()

Unnamed: 0,subject_id,date,Q1,Q2,Q3,S1,S2,S3,S4
0,1,2020-08-30,1,0,0,1,1,0,0
1,1,2020-08-31,0,0,0,0,1,1,1
2,1,2020-09-01,0,0,0,0,1,1,1
3,1,2020-09-02,1,0,0,1,1,1,1
4,1,2020-09-03,1,0,0,0,1,1,1


In [45]:
user01_with_label = pd.merge(user01_mAcc_with_targetOne, train_label_user01, on='date', how='inner')

user01_with_label.head()

Unnamed: 0,timestamp,x,y,z,date,userId,pmEmotion,pmStress,lightsleepduration,wakeupduration,...,hr_max,hr_average,subject_id,Q1,Q2,Q3,S1,S2,S3,S4
0,2020-08-31 00:10:40.004,0.069434,-0.069434,9.703996,2020-08-31,1,0.443799,-0.016037,0.331114,0.040274,...,0.797754,0.958187,1,0,0,0,0,1,1,1
1,2020-08-31 00:10:40.016,0.079011,-0.062251,9.701602,2020-08-31,1,0.443799,-0.016037,0.331114,0.040274,...,0.797754,0.958187,1,0,0,0,0,1,1,1
2,2020-08-31 00:10:40.051,0.079011,-0.081405,9.696813,2020-08-31,1,0.443799,-0.016037,0.331114,0.040274,...,0.797754,0.958187,1,0,0,0,0,1,1,1
3,2020-08-31 00:10:40.082,0.086194,-0.071828,9.682447,2020-08-31,1,0.443799,-0.016037,0.331114,0.040274,...,0.797754,0.958187,1,0,0,0,0,1,1,1
4,2020-08-31 00:10:40.121,0.074223,-0.086194,9.718362,2020-08-31,1,0.443799,-0.016037,0.331114,0.040274,...,0.797754,0.958187,1,0,0,0,0,1,1,1


In [46]:
# 라벨 컬럼 설정
label_cols = ['Q1', 'Q2', 'Q3', 'S1', 'S2', 'S3', 'S4']

# 슬라이딩 윈도우 생성
_ , y_labels = create_sliding_windows(user01_with_label, window_size_acc_hr, step_size_acc_hr, ['x','y','z'] ,label_cols)

In [47]:
# 아.. shpae가 맞지 않는다.. 처음부터 feature는 7 : 7로 바꿔야될꺼같다...
# 그리고 애초에 label이 목표기 때문에 이걸 붙여놓고 테스트 해야겠다..

print(predictions.shape)
print(y_labels.shape)

(103173, 10)
(103173, 7)


In [48]:
clf = MultiOutputClassifier(RandomForestClassifier(n_estimators=100, random_state=42))

clf.fit(predictions, y_labels)


#### LSTM-Randomforest Test

- User02 데이터 기반

In [77]:
train_label_user02 = train_labels[train_labels['subject_id'] == "user02"]

Unnamed: 0,Q1,Q2,Q3,S1,S2,S3,S4
0,0,0,0,0,1,1,1
1,0,0,0,0,1,1,1
2,1,0,0,0,0,0,1
3,0,0,0,0,1,1,1
4,1,1,0,0,1,0,1


In [73]:
X_test_user02 = data_all[data_all['userId'] == "user02"]

X_test_user02 = X_test_user02[['pmEmotion', 'pmStress', 'userId', 'lightsleepduration', 'wakeupduration', 'deepsleepduration', 'durationtosleep', 'remsleepduration', 'hr_max', 'hr_average', 'date']]

X_test_user02.reset_index(inplace=True)

X_test_user02.drop(columns='index', axis=1, inplace=True)

X_test_user02.head()


Unnamed: 0,pmEmotion,pmStress,userId,lightsleepduration,wakeupduration,deepsleepduration,durationtosleep,remsleepduration,hr_max,hr_average,date
0,3.0,2.0,user02,10200,1320,7860,960,6480,82,64,2020-08-31
1,3.0,2.0,user02,7500,1440,7320,1440,7260,84,71,2020-09-01
2,3.0,1.0,user02,8640,1380,8820,840,7020,79,63,2020-09-02
3,3.0,1.0,user02,10320,7920,7440,7920,5340,87,66,2020-09-03
4,4.0,2.0,user02,4740,1080,8400,1080,5760,80,64,2020-09-04


In [89]:
train_label_user02['date'] = pd.to_datetime(train_label_user02['date'])

X_test_user02_with_label = pd.merge(X_test_user02, train_label_user02, on='date', how='inner')

X_test_user02_with_label.head()

Unnamed: 0,pmEmotion,pmStress,userId,lightsleepduration,wakeupduration,deepsleepduration,durationtosleep,remsleepduration,hr_max,hr_average,date,subject_id,Q1,Q2,Q3,S1,S2,S3,S4
0,3.0,2.0,2,10200,1320,7860,960,6480,82,64,2020-08-31,user02,0,0,0,0,1,1,1
1,3.0,2.0,2,7500,1440,7320,1440,7260,84,71,2020-09-01,user02,0,0,0,0,1,1,1
2,3.0,1.0,2,8640,1380,8820,840,7020,79,63,2020-09-02,user02,1,0,0,0,0,0,1
3,3.0,1.0,2,10320,7920,7440,7920,5340,87,66,2020-09-03,user02,0,0,0,0,1,1,1
4,4.0,2.0,2,4740,1080,8400,1080,5760,80,64,2020-09-04,user02,1,1,0,0,1,0,1


In [90]:
y_test = X_test_user02_with_label[['Q1', 'Q2', 'Q3', 'S1', 'S2', 'S3', 'S4']]

y_test.reset_index(inplace=True)

y_test.drop(columns='index', axis=1, inplace=True)

y_test.head()

Unnamed: 0,Q1,Q2,Q3,S1,S2,S3,S4
0,0,0,0,0,1,1,1
1,0,0,0,0,1,1,1
2,1,0,0,0,0,0,1
3,0,0,0,0,1,1,1
4,1,1,0,0,1,0,1


In [95]:
X_test_user02  = X_test_user02_with_label.drop(columns=['Q1', 'Q2', 'Q3', 'S1', 'S2', 'S3', 'S4', 'subject_id'], axis=1)

In [96]:
X_test_user02.head()

Unnamed: 0,pmEmotion,pmStress,userId,lightsleepduration,wakeupduration,deepsleepduration,durationtosleep,remsleepduration,hr_max,hr_average,date
0,3.0,2.0,2,10200,1320,7860,960,6480,82,64,2020-08-31
1,3.0,2.0,2,7500,1440,7320,1440,7260,84,71,2020-09-01
2,3.0,1.0,2,8640,1380,8820,840,7020,79,63,2020-09-02
3,3.0,1.0,2,10320,7920,7440,7920,5340,87,66,2020-09-03
4,4.0,2.0,2,4740,1080,8400,1080,5760,80,64,2020-09-04


In [97]:
X_test_user02['date']= pd.to_datetime(X_test_user02['date'])


# X_test_user02['userId'] = X_test_user02['userId'].str.replace('user', '').astype(int)

# 스케일러 초기화
scaler = StandardScaler()

# 스케일링할 열 선택 (datetime 열과 target 열을 제외한 수치형 열)
features_to_scale = X_test_user02.drop(columns=['date', 'userId']).columns

# 데이터프레임을 수치형 데이터와 날짜 데이터로 분리
numeric_data = X_test_user02[features_to_scale]
date_data = X_test_user02['date']
user_id_data = X_test_user02['userId']

# 스케일링 적용
scaled_features = scaler.fit_transform(numeric_data)

# 스케일링된 데이터를 데이터프레임으로 변환
X_test = pd.DataFrame(scaled_features, columns=features_to_scale)

X_test.head()



# X_test.head()


Unnamed: 0,pmEmotion,pmStress,lightsleepduration,wakeupduration,deepsleepduration,durationtosleep,remsleepduration,hr_max,hr_average
0,-0.196116,-0.092848,0.053897,-1.017415,-0.225643,-1.009662,0.172693,-0.285271,-1.027001
1,-0.196116,-0.092848,-0.501056,-0.987327,-0.495748,-0.849539,0.486762,-0.015014,0.311642
2,-0.196116,-0.928477,-0.266743,-1.002371,0.254543,-1.049693,0.390126,-0.690656,-1.218235
3,-0.196116,-0.928477,0.078561,0.637417,-0.435724,1.312116,-0.286331,0.390371,-0.644532
4,1.127668,-0.092848,-1.06834,-1.077591,0.044462,-0.969631,-0.117217,-0.555528,-1.027001


In [98]:
# 날짜 데이터와 결합
# X_test['date'] = date_data
X_test['userId'] = user_id_data

# # # 열 순서 복원
X_test = X_test[['userId'] + list(features_to_scale)]

# X_test.head()
X_test.head()

Unnamed: 0,userId,pmEmotion,pmStress,lightsleepduration,wakeupduration,deepsleepduration,durationtosleep,remsleepduration,hr_max,hr_average
0,2,-0.196116,-0.092848,0.053897,-1.017415,-0.225643,-1.009662,0.172693,-0.285271,-1.027001
1,2,-0.196116,-0.092848,-0.501056,-0.987327,-0.495748,-0.849539,0.486762,-0.015014,0.311642
2,2,-0.196116,-0.928477,-0.266743,-1.002371,0.254543,-1.049693,0.390126,-0.690656,-1.218235
3,2,-0.196116,-0.928477,0.078561,0.637417,-0.435724,1.312116,-0.286331,0.390371,-0.644532
4,2,1.127668,-0.092848,-1.06834,-1.077591,0.044462,-0.969631,-0.117217,-0.555528,-1.027001


In [100]:
y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average=None)

print(f'Random Forest Classifier Accuracy: {accuracy:.4f}')
print(f'Random Forest Classifier F1-Score: {f1}')

Random Forest Classifier Accuracy: 0.0000
Random Forest Classifier F1-Score: [0.53333333 0.33333333 0.30769231 0.66666667 0.48       0.125
 0.34482759]


In [102]:
weights = np.array([1.5, 1.5, 1.0, 1.5, 1.5, 1.5, 1.5])

# 가중 평균 F1-Score 계산
weighted_f1_score = np.sum(f1 * weights)

print(f"F1-Score (Weight) : {weighted_f1_score:.4f}")

F1-Score (Weight) : 4.0324


### Valid Feature
- user01을 제외한 나머지 survey, sleep의 10개 feature로 테스트

In [105]:
# user01을 제외한 예측해야될 라벨들
test_labels_users = train_labels[train_labels['subject_id'] != 'user01']

test_labels_users.reset_index(inplace=True)

test_labels_users.drop(columns='index', axis=1, inplace=True)

test_labels_users.head()

Unnamed: 0,subject_id,date,Q1,Q2,Q3,S1,S2,S3,S4
0,user02,2020-08-31,0,0,0,0,1,1,1
1,user02,2020-09-01,0,0,0,0,1,1,1
2,user02,2020-09-02,1,0,0,0,0,0,1
3,user02,2020-09-03,0,0,0,0,1,1,1
4,user02,2020-09-04,1,1,0,0,1,0,1


In [111]:
# Col name 바꾸기
test_labels_users.rename(columns={'subject_id' : 'userId'}, inplace=True)

test_labels_users.head()

Unnamed: 0,userId,date,Q1,Q2,Q3,S1,S2,S3,S4
0,user02,2020-08-31,0,0,0,0,1,1,1
1,user02,2020-09-01,0,0,0,0,1,1,1
2,user02,2020-09-02,1,0,0,0,0,0,1
3,user02,2020-09-03,0,0,0,0,1,1,1
4,user02,2020-09-04,1,1,0,0,1,0,1


In [106]:
# user01을 제외한 예측해야될 feature들

data_features_users = data_all[data_all['userId'] != 'user01']

data_features_users.reset_index(inplace=True)
data_features_users.drop(columns='index', axis=1, inplace=True)

data_features_users.head()

Unnamed: 0,userId,date,startDt,endDt,lastUpdate,wakeupduration,lightsleepduration,deepsleepduration,wakeupcount,durationtosleep,...,dream,amCondition,amEmotion,pmEmotion,pmStress,pmFatigue,caffeine,cAmount(ml),alcohol,aAmount(ml)
0,user24,2020-08-30,1598708760,1598740860,1598748252,3840,13800,9000,3,1740,...,2.0,2.0,3.0,4.0,5.0,2.0,1,200.0,0,0.0
1,user24,2020-08-31,1598800140,1598825940,1598833333,4740,9540,6360,1,3180,...,2.0,3.0,3.0,4.0,2.0,4.0,1,200.0,0,0.0
2,user24,2020-09-01,1598885940,1598908800,1598916190,1920,8760,4740,1,1440,...,4.0,4.0,3.0,5.0,4.0,3.0,1,150.0,0,0.0
3,user24,2020-09-02,1598979420,1598999580,1599006969,2760,7740,7440,1,1380,...,4.0,2.0,4.0,4.0,4.0,2.0,0,0.0,0,0.0
4,user24,2020-09-03,1599060780,1599085980,1599093370,3900,11640,7620,1,1680,...,2.0,2.0,3.0,3.0,2.0,2.0,1,200.0,0,0.0


In [109]:
data_features_user = data_features_users[['pmEmotion', 'pmStress', 'userId', 'lightsleepduration', 'wakeupduration', 'deepsleepduration', 'durationtosleep', 'remsleepduration', 'hr_max', 'hr_average', 'date']]

data_features_user.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 549 entries, 0 to 548
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   pmEmotion           549 non-null    float64
 1   pmStress            549 non-null    float64
 2   userId              549 non-null    object 
 3   lightsleepduration  549 non-null    int64  
 4   wakeupduration      549 non-null    int64  
 5   deepsleepduration   549 non-null    int64  
 6   durationtosleep     549 non-null    int64  
 7   remsleepduration    549 non-null    int64  
 8   hr_max              549 non-null    int64  
 9   hr_average          549 non-null    int64  
 10  date                549 non-null    object 
dtypes: float64(2), int64(7), object(2)
memory usage: 47.3+ KB


In [117]:
# survey sleep & label 데이터 합치기
test_labels_users['date'] = pd.to_datetime(test_labels_users['date'])
data_features_user['date'] = pd.to_datetime(data_features_user['date'])

data_features_and_label = pd.merge(data_features_user, test_labels_users, on=['date', 'userId'], how='inner')

data_features_and_label.head()

Unnamed: 0,pmEmotion,pmStress,userId,lightsleepduration,wakeupduration,deepsleepduration,durationtosleep,remsleepduration,hr_max,hr_average,date,Q1,Q2,Q3,S1,S2,S3,S4
0,4.0,5.0,user24,13800,3840,9000,1740,5280,82,72,2020-08-30,0,1,1,0,0,0,1
1,4.0,2.0,user24,9540,4740,6360,3180,4980,82,69,2020-08-31,1,1,0,0,1,1,1
2,5.0,4.0,user24,8760,1920,4740,1440,7440,94,70,2020-09-01,1,1,1,0,1,1,0
3,4.0,4.0,user24,7740,2760,7440,1380,2220,80,68,2020-09-02,1,1,1,0,0,1,0
4,3.0,2.0,user24,11640,3900,7620,1680,2040,83,71,2020-09-03,0,0,0,0,1,1,1


In [118]:
y_label_all = data_features_and_label[['Q1', 'Q2', 'Q3', 'S1', 'S2', 'S3', 'S4']]

data_features_and_label.drop(columns=['Q1', 'Q2', 'Q3', 'S1', 'S2', 'S3', 'S4'], axis=1, inplace=True)

y_label_all.head()

Unnamed: 0,Q1,Q2,Q3,S1,S2,S3,S4
0,0,1,1,0,0,0,1
1,1,1,0,0,1,1,1
2,1,1,1,0,1,1,0
3,1,1,1,0,0,1,0
4,0,0,0,0,1,1,1


In [119]:
data_features_and_label['date']= pd.to_datetime(data_features_and_label['date'])
data_features_and_label['userId'] = data_features_and_label['userId'].str.replace('user', '').astype(int)

# X_test_user02['userId'] = X_test_user02['userId'].str.replace('user', '').astype(int)

# 스케일러 초기화
scaler = StandardScaler()

# 스케일링할 열 선택 (datetime 열과 target 열을 제외한 수치형 열)
features_to_scale = data_features_and_label.drop(columns=['date', 'userId']).columns

# 데이터프레임을 수치형 데이터와 날짜 데이터로 분리
numeric_data = data_features_and_label[features_to_scale]
date_data = data_features_and_label['date']
user_id_data = data_features_and_label['userId']

# 스케일링 적용
scaled_features = scaler.fit_transform(numeric_data)

# 스케일링된 데이터를 데이터프레임으로 변환
X_test_all = pd.DataFrame(scaled_features, columns=features_to_scale)

X_test_all.head()



# X_test.head()


Unnamed: 0,pmEmotion,pmStress,lightsleepduration,wakeupduration,deepsleepduration,durationtosleep,remsleepduration,hr_max,hr_average
0,0.45282,1.713829,0.703718,0.240881,1.356618,-0.142293,0.208967,0.680515,1.468119
1,0.45282,-0.827792,-0.121309,0.596353,0.398941,0.674048,0.106887,0.680515,1.022296
2,1.550566,0.866622,-0.27237,-0.517458,-0.188724,-0.312364,0.943948,2.293589,1.170904
3,0.45282,0.866622,-0.469912,-0.185685,0.790718,-0.346378,-0.832254,0.41167,0.873689
4,-0.644926,-0.827792,0.285395,0.26458,0.856014,-0.176307,-0.893503,0.814938,1.319511


In [120]:
# 날짜 데이터와 결합
# X_test['date'] = date_data
X_test_all['userId'] = user_id_data

# # # 열 순서 복원
X_test_all = X_test_all[['userId'] + list(features_to_scale)]

# X_test.head()
X_test_all.head()

Unnamed: 0,userId,pmEmotion,pmStress,lightsleepduration,wakeupduration,deepsleepduration,durationtosleep,remsleepduration,hr_max,hr_average
0,24,0.45282,1.713829,0.703718,0.240881,1.356618,-0.142293,0.208967,0.680515,1.468119
1,24,0.45282,-0.827792,-0.121309,0.596353,0.398941,0.674048,0.106887,0.680515,1.022296
2,24,1.550566,0.866622,-0.27237,-0.517458,-0.188724,-0.312364,0.943948,2.293589,1.170904
3,24,0.45282,0.866622,-0.469912,-0.185685,0.790718,-0.346378,-0.832254,0.41167,0.873689
4,24,-0.644926,-0.827792,0.285395,0.26458,0.856014,-0.176307,-0.893503,0.814938,1.319511


In [121]:
# Randomforest 예측
y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average=None)

print(f'Random Forest Classifier Accuracy: {accuracy:.4f}')
print(f'Random Forest Classifier F1-Score: {f1}')

Random Forest Classifier Accuracy: 0.0000
Random Forest Classifier F1-Score: [0.53333333 0.33333333 0.30769231 0.66666667 0.48       0.125
 0.34482759]


In [122]:
weights = np.array([1.5, 1.5, 1.0, 1.5, 1.5, 1.5, 1.5])

# 가중 평균 F1-Score 계산
weighted_f1_score = np.sum(f1 * weights)

print(f"F1-Score (Weight) : {weighted_f1_score:.4f}")

F1-Score (Weight) : 4.0324


In [124]:
print(y_test)

    Q1  Q2  Q3  S1  S2  S3  S4
0    0   0   0   0   1   1   1
1    0   0   0   0   1   1   1
2    1   0   0   0   0   0   1
3    0   0   0   0   1   1   1
4    1   1   0   0   1   0   1
5    1   1   1   1   0   0   1
6    1   1   1   0   1   1   1
7    0   0   0   0   0   0   0
8    0   0   0   0   0   0   1
9    1   0   0   0   1   0   1
10   0   0   0   0   0   0   1
11   1   0   0   1   0   0   1
12   0   1   1   0   0   0   0
13   0   0   0   1   1   0   1
14   0   0   0   1   1   0   1
15   0   0   0   0   0   0   1
16   0   0   0   0   1   0   1
17   0   0   0   0   0   0   1
18   0   1   0   1   1   1   1
19   1   1   1   0   0   0   1
20   1   0   1   1   0   0   1
21   1   0   0   1   1   1   1
22   0   0   0   0   0   0   1
23   0   1   0   1   1   1   0
24   0   0   0   0   0   0   1
25   1   1   0   0   0   0   0
26   1   1   1   1   0   0   1


In [123]:
print(y_pred)

[[0 0 0 0 0 0 0]
 [1 0 0 0 0 1 0]
 [1 0 0 0 0 1 0]
 [1 0 1 1 1 0 1]
 [0 0 0 0 0 0 1]
 [0 0 0 1 1 0 0]
 [1 0 0 0 1 0 1]
 [1 0 0 0 0 0 0]
 [0 0 0 1 1 0 0]
 [1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [1 0 0 0 0 1 0]
 [1 0 0 0 1 0 0]
 [1 0 0 1 1 0 0]
 [1 0 0 1 1 1 0]
 [0 0 1 0 1 0 0]
 [1 0 0 0 0 0 0]
 [0 0 0 0 0 1 0]
 [1 0 1 1 0 0 0]
 [1 1 0 0 1 1 1]
 [1 0 1 0 0 0 1]
 [1 0 0 1 1 0 0]
 [1 0 0 0 0 1 0]
 [1 1 1 1 1 0 1]
 [1 1 0 0 1 1 0]
 [1 0 1 1 1 0 0]
 [0 0 1 0 0 1 0]]
