Read data

https://towardsdatascience.com/time-series-of-price-anomaly-detection-with-lstm-11a12ba4f6d9
https://zhuanlan.zhihu.com/p/142320349

In [1]:
import os
import pandas as pd

filepath = "../Data_Q1/train"

df = pd.DataFrame()

for i in range(20): 
    file_name = f"data_{i}.csv"
    file_path = os.path.join(filepath, file_name) 
    data = pd.read_csv(file_path)
    df = pd.concat([df, data], ignore_index=True)

# print(df.head())
df

Unnamed: 0,x,y,z,a,b,c,d,Is_Falling
0,18.495860,13.766527,14.362624,0,0,0,1,0
1,18.501072,13.827225,14.270268,0,0,1,0,0
2,18.405950,13.868976,14.094804,1,0,0,0,0
3,18.444572,13.910701,14.116078,0,1,0,0,0
4,18.418470,13.933917,14.320566,0,0,0,1,0
...,...,...,...,...,...,...,...,...
134224,23.367303,15.457298,11.470211,0,0,1,0,0
134225,23.345435,15.475442,11.314662,0,1,0,0,0
134226,23.323040,15.439008,11.634412,0,0,0,1,0
134227,23.287241,15.447851,11.501362,0,0,1,0,0


In [2]:
testdata = pd.read_csv('../Data_Q1/test/test_set.csv')

Detect and fill in missing values

In [3]:

for col in ['x', 'y', 'z']:
    if df[col].isnull().any():
        df[col].fillna(df[col].mean(), inplace=True)

for col in ['a', 'b', 'c', 'd']:
    if df[col].isnull().any():
        df[col].fillna(method='ffill', inplace=True)


## Feature engineering

In [4]:
# Sliding window method to calculate statistical features
window_size = 25  

# mean&std
rolling_features = df[['x', 'y', 'z']].rolling(window=window_size)
df['x_mean'] = rolling_features.mean()['x']
df['y_mean'] = rolling_features.mean()['y']
df['z_mean'] = rolling_features.mean()['z']
df['x_std'] = rolling_features.std()['x']
df['y_std'] = rolling_features.std()['y']
df['z_std'] = rolling_features.std()['z']

# Fill the starting NaN value of the sliding window
df.fillna(method='bfill', inplace=True)

# scaler
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
columns_to_scale = ['x', 'y', 'z', 'x_mean', 'y_mean', 'z_mean', 'x_std', 'y_std', 'z_std']
df[columns_to_scale] = scaler.fit_transform(df[columns_to_scale])


df



Unnamed: 0,x,y,z,a,b,c,d,Is_Falling,x_mean,y_mean,z_mean,x_std,y_std,z_std
0,1.371850,0.584813,1.475836,0,0,0,1,0,1.349051,0.710981,1.503748,-0.667437,-0.270870,-0.015808
1,1.373198,0.607249,1.430084,0,0,1,0,0,1.349051,0.710981,1.503748,-0.667437,-0.270870,-0.015808
2,1.348606,0.622682,1.343162,1,0,0,0,0,1.349051,0.710981,1.503748,-0.667437,-0.270870,-0.015808
3,1.358591,0.638104,1.353701,0,1,0,0,0,1.349051,0.710981,1.503748,-0.667437,-0.270870,-0.015808
4,1.351843,0.646686,1.455002,0,0,0,1,0,1.349051,0.710981,1.503748,-0.667437,-0.270870,-0.015808
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
134224,2.631264,1.209772,0.042976,0,0,1,0,0,2.643493,1.283592,-0.043996,-0.640276,0.116545,-0.335021
134225,2.625611,1.216479,-0.034080,0,1,0,0,0,2.644575,1.277411,-0.042743,-0.664754,0.089113,-0.336858
134226,2.619821,1.203012,0.124319,0,0,0,1,0,2.645587,1.270309,-0.034408,-0.698732,0.048402,-0.234391
134227,2.610566,1.206280,0.058408,0,0,1,0,0,2.645762,1.263101,-0.028606,-0.705175,-0.017255,-0.214386


In [5]:
import numpy as np
import pandas as pd



# Calculated rate of change
df['x_change'] = df['x'].diff().fillna(0)
df['y_change'] = df['y'].diff().fillna(0)
df['z_change'] = df['z'].diff().fillna(0)

# Duration characteristics
df['fall_duration'] = df['Is_Falling'].groupby((df['Is_Falling'] != df['Is_Falling'].shift()).cumsum()).cumcount()
df['normal_duration'] = (1 - df['Is_Falling']).groupby((df['Is_Falling'] == df['Is_Falling'].shift()).cumsum()).cumcount()

# Peak detection
peak_threshold = 1.5  
df['is_peak'] = ((df['x_change'].abs() > peak_threshold) | 
                 (df['y_change'].abs() > peak_threshold) | 
                 (df['z_change'].abs() > peak_threshold)).astype(int)

df.head()  
# print(df.head())


Unnamed: 0,x,y,z,a,b,c,d,Is_Falling,x_mean,y_mean,z_mean,x_std,y_std,z_std,x_change,y_change,z_change,fall_duration,normal_duration,is_peak
0,1.37185,0.584813,1.475836,0,0,0,1,0,1.349051,0.710981,1.503748,-0.667437,-0.27087,-0.015808,0.0,0.0,0.0,0,0,0
1,1.373198,0.607249,1.430084,0,0,1,0,0,1.349051,0.710981,1.503748,-0.667437,-0.27087,-0.015808,0.001348,0.022436,-0.045752,1,0,0
2,1.348606,0.622682,1.343162,1,0,0,0,0,1.349051,0.710981,1.503748,-0.667437,-0.27087,-0.015808,-0.024592,0.015432,-0.086922,2,0,0
3,1.358591,0.638104,1.353701,0,1,0,0,0,1.349051,0.710981,1.503748,-0.667437,-0.27087,-0.015808,0.009985,0.015423,0.010539,3,0,0
4,1.351843,0.646686,1.455002,0,0,0,1,0,1.349051,0.710981,1.503748,-0.667437,-0.27087,-0.015808,-0.006748,0.008581,0.1013,4,0,0


In [6]:
from imblearn.over_sampling import SMOTE

X = df.drop('Is_Falling', axis=1)
y = df['Is_Falling']
X_resampled, y_resampled = X, y
# # SMOTE oversampling 
# smote = SMOTE()
# X_resampled, y_resampled = smote.fit_resample(X, y)
print(X_resampled.head())

          x         y         z  a  b  c  d    x_mean    y_mean    z_mean  \
0  1.371850  0.584813  1.475836  0  0  0  1  1.349051  0.710981  1.503748   
1  1.373198  0.607249  1.430084  0  0  1  0  1.349051  0.710981  1.503748   
2  1.348606  0.622682  1.343162  1  0  0  0  1.349051  0.710981  1.503748   
3  1.358591  0.638104  1.353701  0  1  0  0  1.349051  0.710981  1.503748   
4  1.351843  0.646686  1.455002  0  0  0  1  1.349051  0.710981  1.503748   

      x_std    y_std     z_std  x_change  y_change  z_change  fall_duration  \
0 -0.667437 -0.27087 -0.015808  0.000000  0.000000  0.000000              0   
1 -0.667437 -0.27087 -0.015808  0.001348  0.022436 -0.045752              1   
2 -0.667437 -0.27087 -0.015808 -0.024592  0.015432 -0.086922              2   
3 -0.667437 -0.27087 -0.015808  0.009985  0.015423  0.010539              3   
4 -0.667437 -0.27087 -0.015808 -0.006748  0.008581  0.101300              4   

   normal_duration  is_peak  
0                0        0  
1 

Do the same for test

In [7]:

import pandas as pd
from sklearn.preprocessing import StandardScaler

for col in ['x', 'y', 'z']:
    if testdata[col].isnull().any():
        testdata[col].fillna(testdata[col].mean(), inplace=True)

for col in ['a', 'b', 'c', 'd']:
    if testdata[col].isnull().any():
        testdata[col].fillna(method='ffill', inplace=True)


rolling_features = testdata[['x', 'y', 'z']].rolling(window=window_size)
testdata['x_mean'] = rolling_features.mean()['x']
testdata['y_mean'] = rolling_features.mean()['y']
testdata['z_mean'] = rolling_features.mean()['z']
testdata['x_std'] = rolling_features.std()['x']
testdata['y_std'] = rolling_features.std()['y']
testdata['z_std'] = rolling_features.std()['z']

# Fill the starting NaN value of the sliding window
testdata.fillna(method='bfill', inplace=True)

# scaler
scaler = StandardScaler()
columns_to_scale = ['x', 'y', 'z', 'x_mean', 'y_mean', 'z_mean', 'x_std', 'y_std', 'z_std']
testdata[columns_to_scale] = scaler.fit_transform(testdata[columns_to_scale])

# v change
testdata['x_change'] = testdata['x'].diff().fillna(0)
testdata['y_change'] = testdata['y'].diff().fillna(0)
testdata['z_change'] = testdata['z'].diff().fillna(0)

# Duration
testdata['fall_duration'] = testdata['Is_Falling'].groupby((testdata['Is_Falling'] != testdata['Is_Falling'].shift()).cumsum()).cumcount()
testdata['normal_duration'] = (1 - testdata['Is_Falling']).groupby((testdata['Is_Falling'] == testdata['Is_Falling'].shift()).cumsum()).cumcount()

# Peak detection
peak_threshold = 1.5
testdata['is_peak'] = ((testdata['x_change'].abs() > peak_threshold) |
                          (testdata['y_change'].abs() > peak_threshold) |
                          (testdata['z_change'].abs() > peak_threshold)).astype(int)



Replace useless feature(according to the rf_importance below,which I found is harmful to the result)

In [8]:
testdata.drop(['c'], axis=1, inplace=True)
testdata.drop(['d'], axis=1, inplace=True)
X_resampled.drop(['c'], axis=1, inplace=True)
X_resampled.drop(['d'], axis=1, inplace=True)
testdata.drop(['is_peak'], axis=1, inplace=True)
X_resampled.drop(['is_peak'], axis=1, inplace=True)
testdata.drop(['ID'], axis=1, inplace=True)
testdata_copy = testdata.copy()

add feature of momentum acceleration ...(I found these are the most important feature which increase my presicion from 50 to 80)

In [9]:




import numpy as np

def add_features(data, window_size):
    # momentum
    for col in ['x_change', 'y_change', 'z_change']:
        data[f'{col}_momentum_mean'] = data[col].rolling(window=window_size).mean()
        data[f'{col}_momentum_std'] = data[col].rolling(window=window_size).std()

    # acceleration
    for col in ['x', 'y', 'z']:
        data[f'{col}_acceleration'] = data[col].diff().diff().fillna(0)

    # Angular feature
    data['angleyx'] = np.arctan2(data['y'], data['x'])
    data['anglezx'] = np.arctan2(data['z'], data['x'])
    data['anglezy'] = np.arctan2(data['z'], data['y'])
    # Interaction feature
    data['x_y_interaction'] = data['x'] * data['y']
    data['x_z_interaction'] = data['x'] * data['z']
    data['y_z_interaction'] = data['y'] * data['z']
    data['x_z_change_interaction'] = data['x'] * data['z_change']
    data['x_y_change_interaction'] = data['x'] * data['y_change']
    data['y_z_change_interaction'] = data['y'] * data['z_change']
    data['y_x_change_interaction'] = data['y'] * data['x_change']

    # sma
    data['SMA'] = (data['x'].abs() + data['y'].abs() + data['z'].abs()).mean()

    # Make sure to populate any resulting NaN values
    data.fillna(method='bfill', inplace=True)

    return data

X_resampled = add_features(X_resampled, window_size)
testdata = add_features(testdata, window_size)




## Train and test(ml)

In [21]:
from imblearn.over_sampling import ADASYN
adasyn = ADASYN()


X_resampled_adasyn, y_resampled_adasyn = adasyn.fit_resample(X_resampled, y_resampled)

ADASYN + RF

In [22]:

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import recall_score, precision_score, precision_recall_curve



# rf
rf_classifier = RandomForestClassifier(class_weight={0: 6, 1: 1}, random_state=42)

# train
rf_classifier.fit(X_resampled_adasyn, y_resampled_adasyn)

# predict
probabilities = rf_classifier.predict_proba(testdata.drop('Is_Falling', axis=1))[:, 1]

# Calculate accuracy and recall rates for different thresholds
precisions, recalls, thresholds = precision_recall_curve(testdata['Is_Falling'], probabilities)

# Select a threshold that maximizes the product of accuracy and recall
optimal_idx = np.argmax(precisions * recalls)
optimal_threshold = thresholds[optimal_idx]


# Thresholds are applied for classification
predicted_labels = (probabilities >= optimal_threshold).astype(int)

# Calculate accuracy and recall rates for category 1
accuracy = precision_score(testdata['Is_Falling'], predicted_labels)
recall_class1 = recall_score(testdata['Is_Falling'], predicted_labels, pos_label=1)
recall_class = recall_score(testdata['Is_Falling'], predicted_labels)
print(f'Precision: {accuracy:.4f}')
print(f'Recall (Class 1): {recall_class1:.4f}')
print(f'Recall : {recall_class:.4f}')





Precision: 0.8564
Recall (Class 1): 0.9038
Recall : 0.9038


In [23]:
print(optimal_threshold)

0.65


In [25]:
output_df = pd.DataFrame()
output_df['ID'] = testdata.index + 1  
output_df['Is_Falling'] = predicted_labels  

output_df.to_csv('Q1_output.csv', index=False)


lightgbm

In [13]:
import lightgbm as lgb
from sklearn.metrics import recall_score, precision_score, precision_recall_curve

# LightGBM
lgb_classifier = lgb.LGBMClassifier(class_weight={0: 15, 1: 1}, random_state=42)

# train
lgb_classifier.fit(X_resampled_adasyn, y_resampled_adasyn)

probabilities = lgb_classifier.predict_proba(testdata.drop('Is_Falling', axis=1))[:, 1]

precisions, recalls, thresholds = precision_recall_curve(testdata['Is_Falling'], probabilities)

optimal_idx = np.argmax(precisions * recalls)
optimal_threshold = thresholds[optimal_idx]

predicted_labels = (probabilities >= optimal_threshold).astype(int)

accuracy = precision_score(testdata['Is_Falling'], predicted_labels)
recall_class1 = recall_score(testdata['Is_Falling'], predicted_labels, pos_label=1)

print(f'Precision: {accuracy:.4f}')
print(f'Recall (Class 1): {recall_class1:.4f}')

[LightGBM] [Info] Number of positive: 127659, number of negative: 127656
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006169 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6381
[LightGBM] [Info] Number of data points in the train set: 255315, number of used features: 28
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.062501 -> initscore=-2.708027
[LightGBM] [Info] Start training from score -2.708027
Precision: 0.4118
Recall (Class 1): 0.9942


SMOTE + RF

In [14]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE

# SMOTE
smote = SMOTE()

# Resampling the training data
X_resampled_smote, y_resampled_smote = smote.fit_resample(X_resampled, y_resampled)

param_grid = {
    'n_estimators': [100],
    'max_depth': [10],
    'min_samples_split': [20],
    'min_samples_leaf': [1]
}

# rf
rf_classifier = RandomForestClassifier(class_weight={0: 5, 1: 1}, random_state=42)

grid_search = GridSearchCV(estimator=rf_classifier, param_grid=param_grid, 
                           cv=3, n_jobs=-1, scoring='recall', verbose=1)

grid_search.fit(X_resampled_smote, y_resampled_smote)

print("Best Parameters:", grid_search.best_params_)

best_rf = grid_search.best_estimator_
probabilities = best_rf.predict_proba(testdata.drop('Is_Falling', axis=1))[:, 1]


from sklearn.metrics import precision_recall_curve

precisions, recalls, thresholds = precision_recall_curve(testdata['Is_Falling'], probabilities)

optimal_idx = np.argmax(precisions * recalls)
optimal_threshold = thresholds[optimal_idx]

predicted_labels = (probabilities >= optimal_threshold).astype(int)

accuracy = precision_score(testdata['Is_Falling'], predicted_labels)
recall_class1 = recall_score(testdata['Is_Falling'], predicted_labels, pos_label=1)

print(f'Precision: {accuracy:.4f}')
print(f'Recall (Class 1): {recall_class1:.4f}')


Fitting 3 folds for each of 1 candidates, totalling 3 fits
Best Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 20, 'n_estimators': 100}
Precision: 0.7424
Recall (Class 1): 0.8571


## Feature engineering for dl

Select features using rf

In [15]:
from sklearn.ensemble import RandomForestClassifier

# Use random forests to estimate feature importance
rf = RandomForestClassifier()
rf.fit(X_resampled, y_resampled)

# Acquired feature importance
importances = rf.feature_importances_
indices = np.argsort(importances)[-7:]

top_features = [X_resampled.columns[i] for i in indices]
print("Top 10 features:", top_features)


Top 10 features: ['z', 'z_mean', 'x', 'z_change_momentum_std', 'x_mean', 'x_change_momentum_std', 'fall_duration']


In [16]:
top_features

['z',
 'z_mean',
 'x',
 'z_change_momentum_std',
 'x_mean',
 'x_change_momentum_std',
 'fall_duration']

In [17]:
print(X_resampled.head())
print(testdata.head())

          x         y         z  a  b    x_mean    y_mean    z_mean     x_std  \
0  1.371850  0.584813  1.475836  0  0  1.349051  0.710981  1.503748 -0.667437   
1  1.373198  0.607249  1.430084  0  0  1.349051  0.710981  1.503748 -0.667437   
2  1.348606  0.622682  1.343162  1  0  1.349051  0.710981  1.503748 -0.667437   
3  1.358591  0.638104  1.353701  0  1  1.349051  0.710981  1.503748 -0.667437   
4  1.351843  0.646686  1.455002  0  0  1.349051  0.710981  1.503748 -0.667437   

     y_std  ...  y_change_momentum_std  z_change_momentum_mean  \
0 -0.27087  ...               0.012536                0.004666   
1 -0.27087  ...               0.012536                0.004666   
2 -0.27087  ...               0.012536                0.004666   
3 -0.27087  ...               0.012536                0.004666   
4 -0.27087  ...               0.012536                0.004666   

   z_change_momentum_std  x_acceleration  y_acceleration  z_acceleration  \
0               0.067089        0.000000

Select features

In [18]:
top_features = np.argsort(importances)[-7:]

selected_columns = X_resampled.columns[top_features]

X_resampled = X_resampled[selected_columns]

selected_columns = ['Is_Falling'] + [col for col in selected_columns if col != 'Is_Falling']

testdata = testdata[selected_columns]



In [19]:
X_resampled 


Unnamed: 0,z,z_mean,x,z_change_momentum_std,x_mean,x_change_momentum_std,fall_duration
0,1.475836,1.503748,1.371850,0.067089,1.349051,0.014357,0
1,1.430084,1.503748,1.373198,0.067089,1.349051,0.014357,1
2,1.343162,1.503748,1.348606,0.067089,1.349051,0.014357,2
3,1.353701,1.503748,1.358591,0.067089,1.349051,0.014357,3
4,1.455002,1.503748,1.351843,0.067089,1.349051,0.014357,4
...,...,...,...,...,...,...,...
134224,0.042976,-0.043996,2.631264,0.108588,2.643493,0.008069,539
134225,-0.034080,-0.042743,2.625611,0.109644,2.644575,0.007734,540
134226,0.124319,-0.034408,2.619821,0.113950,2.645587,0.007790,541
134227,0.058408,-0.028606,2.610566,0.114900,2.645762,0.007725,542


In [20]:
import torch
if torch.cuda.is_available():
    device = torch.device('cuda')
    print('cuda')
else:
    device = torch.device('cpu')
    print('no')


cuda


The data is printed many times below for debug because of the time series used

In [21]:
print(X_resampled.head())
print(testdata.head())

          z    z_mean         x  z_change_momentum_std    x_mean  \
0  1.475836  1.503748  1.371850               0.067089  1.349051   
1  1.430084  1.503748  1.373198               0.067089  1.349051   
2  1.343162  1.503748  1.348606               0.067089  1.349051   
3  1.353701  1.503748  1.358591               0.067089  1.349051   
4  1.455002  1.503748  1.351843               0.067089  1.349051   

   x_change_momentum_std  fall_duration  
0               0.014357              0  
1               0.014357              1  
2               0.014357              2  
3               0.014357              3  
4               0.014357              4  
   Is_Falling         z    z_mean         x  z_change_momentum_std    x_mean  \
0           0  0.937880  1.101936 -1.560579                0.13119 -1.569741   
1           0  0.910974  1.101936 -1.554662                0.13119 -1.569741   
2           0  0.900787  1.101936 -1.554954                0.13119 -1.569741   
3           0  1.18

build train_data and test

In [22]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

X_train=X_resampled
y_train=y_resampled

TIME_STEPS=10
def create_sequences(X, y, time_steps=TIME_STEPS):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X.iloc[i:(i+time_steps)].values)
        ys.append(y.iloc[i+time_steps])
    return np.array(Xs), np.array(ys)

# Create a sequence using the modified function
X_train, y_train = create_sequences(X_resampled, y_resampled)
X_test, y_test = create_sequences(testdata.drop('Is_Falling',axis=1), testdata_copy['Is_Falling'])
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_train_tensor = y_train_tensor.unsqueeze(-1)  # Add a new dimension with size 1

train_data = TensorDataset(X_train_tensor, y_train_tensor)


In [23]:
print(X_train_tensor.shape)

torch.Size([134219, 10, 7])


In [24]:
X_test.shape

(6613, 10, 7)

## Train and test(DL)

In [25]:
torch.cuda.empty_cache()

Lstm

In [26]:
train_loader = DataLoader(dataset=train_data, batch_size=32, shuffle=True)
from torch.optim import lr_scheduler

class LSTMModel(nn.Module):
    def __init__(self, num_features, hidden_layer_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(num_features, hidden_layer_size, batch_first=True)
        self.dropout = nn.Dropout(0.3)
        self.linear = nn.Linear(hidden_layer_size, 1) 

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.dropout(x)
        x = self.linear(x)
        x = x[:, -1, :]  # Takes the output of the last time point in the sequence
        return x




model = LSTMModel(X_train.shape[2], 64)
criterion = nn.BCEWithLogitsLoss()  
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Create a learning rate scheduler
# lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

model.to(device)
model.train()
# train
epochs = 1000
for epoch in range(epochs):
    optimizer.zero_grad()
    output = model(X_train_tensor)
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()
    
    # lr_scheduler.step()
    
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')


    
# save
torch.save(model.state_dict(), 'lstm_model1.pth')



  from .autonotebook import tqdm as notebook_tqdm


Epoch 1, Loss: 0.6249517798423767
Epoch 2, Loss: 0.6158474683761597
Epoch 3, Loss: 0.6075348854064941
Epoch 4, Loss: 0.5999100804328918
Epoch 5, Loss: 0.5925572514533997
Epoch 6, Loss: 0.5842264294624329
Epoch 7, Loss: 0.5744786858558655
Epoch 8, Loss: 0.5624147653579712
Epoch 9, Loss: 0.5488163232803345
Epoch 10, Loss: 0.5370526313781738
Epoch 11, Loss: 0.5276380777359009
Epoch 12, Loss: 0.5192316770553589
Epoch 13, Loss: 0.5103689432144165
Epoch 14, Loss: 0.49969249963760376
Epoch 15, Loss: 0.48740172386169434
Epoch 16, Loss: 0.4723661541938782
Epoch 17, Loss: 0.45843276381492615
Epoch 18, Loss: 0.4499123692512512
Epoch 19, Loss: 0.4418874680995941
Epoch 20, Loss: 0.43474093079566956
Epoch 21, Loss: 0.4280293583869934
Epoch 22, Loss: 0.42035263776779175
Epoch 23, Loss: 0.41141676902770996
Epoch 24, Loss: 0.4031262993812561
Epoch 25, Loss: 0.39582568407058716
Epoch 26, Loss: 0.3894079029560089
Epoch 27, Loss: 0.38227003812789917
Epoch 28, Loss: 0.3745007812976837
Epoch 29, Loss: 0.367

In [27]:
print(X_test_tensor.shape)


torch.Size([6613, 10, 7])


In [28]:
from sklearn.metrics import recall_score, precision_score
import torch.nn.functional as F

# Reshape test data to match model inputs
test_input = X_test_tensor.to(device)

#predict
model.eval()  
with torch.no_grad():
    test_predictions = model(test_input)

# Converts prediction results to binary labels
predicted_labels = (test_predictions > 0.5).type(torch.int).cpu().numpy()

recall = recall_score(y_test, predicted_labels, pos_label=1)
precision = precision_score(y_test, predicted_labels)

print(f'Recall (class 1): {recall:.4f}')
print(f'Precision: {precision:.4f}')





Recall (class 1): 0.7405
Precision: 0.3956


In [29]:
testdata

Unnamed: 0,Is_Falling,z,z_mean,x,z_change_momentum_std,x_mean,x_change_momentum_std,fall_duration
0,0,0.937880,1.101936,-1.560579,0.131190,-1.569741,0.007082,0
1,0,0.910974,1.101936,-1.554662,0.131190,-1.569741,0.007082,1
2,0,0.900787,1.101936,-1.554954,0.131190,-1.569741,0.007082,2
3,0,1.188580,1.101936,-1.567222,0.131190,-1.569741,0.007082,3
4,0,1.113441,1.101936,-1.558351,0.131190,-1.569741,0.007082,4
...,...,...,...,...,...,...,...,...
6618,0,1.303992,1.423800,-1.382111,0.151521,-1.397136,0.007242,460
6619,0,1.514922,1.439587,-1.386715,0.156802,-1.396238,0.007258,461
6620,0,1.634979,1.456326,-1.378556,0.157375,-1.395123,0.007395,462
6621,0,1.408741,1.463752,-1.373076,0.164698,-1.393836,0.007447,463
