In [1]:
import lightgbm as lgb
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

In [2]:
import os
folder_path = r"C:\Users\medici\901"
file_name = "learning1_data.csv"
file_path = os.path.join(folder_path, file_name)

data1 = pd.read_csv(file_path)

print(data1.head())

          0         1         2         3         4         5         6  \
0 -0.202268 -0.353959 -0.119095  0.247844  0.120783 -0.263038 -0.537206   
1  0.809660  0.977984 -0.025524 -1.094791 -1.556198 -0.060669  1.057084   
2 -0.283242 -0.188273 -0.175738 -0.118942 -0.096620  0.050365 -0.066805   
3  0.144290  0.030802  0.245908  0.400738  0.250939  0.376150 -0.265817   
4  0.147031  0.113169  0.218939 -0.442279 -0.780985 -0.673755 -0.355651   

          7         8         9  ...     25591     25592     25593     25594  \
0 -0.576838 -0.453532 -0.279206  ...  0.071124  0.173103  0.548901  0.481414   
1  0.878002  0.246451 -0.800890  ... -0.246382  0.358700  0.048618 -0.172239   
2 -0.234286  0.166423  0.164054  ... -0.197872  0.999439  0.847681  0.051769   
3  0.010329  0.693247 -0.277882  ... -0.855152 -1.051428 -1.167517 -1.048143   
4 -0.073637 -0.030152  0.159664  ... -0.257708 -0.140300  0.295603 -0.060675   

      25595     25596     25597     25598     25599  fault_type  
0 

In [4]:
y = data1['fault_type']
X = data1.drop(columns=['fault_type'])

In [5]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

In [6]:
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [7]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [8]:
accuracy_scores = []
f1_scores = []

lgb_params = {
    'objective': 'multiclass',
    'num_class': len(np.unique(y_encoded)),
    'boosting_type': 'gbdt',
    'metric': 'multi_logloss',
    'verbosity': -1
}

In [12]:
for train_idx, test_idx in skf.split(X_scaled, y_encoded):
    X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
    y_train, y_test = y_encoded[train_idx], y_encoded[test_idx]

    train_data = lgb.Dataset(X_train, label=y_train)
    test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

    model = lgb.train(lgb_params, train_data, valid_sets=[test_data])

    y_pred = np.argmax(model.predict(X_test, num_iteration=model.best_iteration), axis=1)

    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')

    accuracy_scores.append(accuracy)
    f1_scores.append(f1)

In [13]:
print(f"Average Accuracy: {np.mean(accuracy_scores):.4f}")
print(f"Average F1 Score: {np.mean(f1_scores):.4f}")

Average Accuracy: 0.8021
Average F1 Score: 0.7977
