In [31]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import lightgbm as lgb
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix

In [32]:
df = pd.read_excel('HIDROLIK 1-2024-04-17_08-55-30.xlsx')

df['timestamp'] = pd.to_datetime(df['timestamp'], format='%d.%m.%Y %H:%M')
df['minute'] = df['timestamp'].dt.minute
df['hour'] = df['timestamp'].dt.hour
df['day'] = df['timestamp'].dt.day
df['month'] = df['timestamp'].dt.month
df['year'] = df['timestamp'].dt.year
df['dayofweek'] = df['timestamp'].dt.dayofweek
df['is_sunday'] = df['timestamp'].dt.dayofweek >= 6

In [33]:
from datetime import datetime, timedelta

last_timestamp = df['timestamp'].max()

In [34]:
df.drop('timestamp', axis=1, inplace=True)

y = df['machine_state']

X = df.drop('machine_state', axis=1)

In [35]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [36]:
# Creating LightGBM dataset
train_data = lgb.Dataset(X_train, label=y_train)
val_data = lgb.Dataset(X_val, label=y_val, reference=train_data)

params = {
    'objective': 'binary',
    'metric': 'binary_logloss',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'device': 'gpu'
}

# Training the model with early stopping
bst = lgb.train(params,
                train_data,
                num_boost_round=200,
                valid_sets=[train_data, val_data],
                callbacks=[lgb.early_stopping(stopping_rounds=20), lgb.log_evaluation(20)])

[LightGBM] [Info] Number of positive: 10875, number of negative: 21809
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 126
[LightGBM] [Info] Number of data points in the train set: 32684, number of used features: 6
[LightGBM] [Info] Using GPU Device: Intel(R) Iris(R) Xe Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 64 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 5 dense feature groups (0.25 MB) transferred to GPU in 0.001036 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.332732 -> initscore=-0.695856
[LightGBM] [Info] Start training from score -0.695856
Training until validation scores don't improve for 20 rounds
[20]	training's binary_logloss: 0.357944	valid_1's binary_logloss: 0.355901
[40]	training's binary_logloss: 0.268593	valid_1's binary_logloss: 0.267273
[60]	training's binary_logloss: 0.237109	valid_1's b

In [37]:
pred_for_y_val = bst.predict(X_val)
pred_for_y_val_binary = [1 if prob > 0.5 else 0 for prob in pred_for_y_val]

In [38]:
cm= confusion_matrix(y_val, pred_for_y_val_binary)
accuracy = accuracy_score(y_val,pred_for_y_val_binary )
print(f"Accuracy: {accuracy}")
print(f"Confusion Matrix:\n {cm}")

Accuracy: 0.9127508565834557
Confusion Matrix:
 [[4823  659]
 [  54 2636]]


In [39]:
# Generate future timestamps (480 minutes)

future_timestamps = [last_timestamp + timedelta(minutes=x) for x in range(1, 481)]

future_df = pd.DataFrame(future_timestamps, columns=['timestamp'])

# Extract features from these timestamps
future_df['minute'] = future_df['timestamp'].dt.minute
future_df['hour'] = future_df['timestamp'].dt.hour
future_df['day'] = future_df['timestamp'].dt.day
future_df['month'] = future_df['timestamp'].dt.month
future_df['year'] = future_df['timestamp'].dt.year
future_df['dayofweek'] = future_df['timestamp'].dt.dayofweek
future_df['is_sunday'] = future_df['timestamp'].dt.dayofweek >= 6

future_features = future_df.drop('timestamp', axis=1)

In [40]:
# Making predictions for the next 480 minutes
future_predictions = bst.predict(future_features)

future_predictions_binary = [1 if prob > 0.5 else 0 for prob in future_predictions]

In [41]:
import openpyxl as xl
predictions = pd.DataFrame(future_predictions_binary)
predictions.to_excel(r"C:\Users\26097879\YENITAHMIN.xlsx", engine='openpyxl' , index=False)

In [42]:
print("Confusion Matrix:")
print(cm)

Confusion Matrix:
[[4823  659]
 [  54 2636]]
