In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import joblib  # For saving model and other artifacts

# Load your dataset
df = pd.read_csv('data_file_updated.csv')
df['Timestamp'] = pd.to_datetime(df['Timestamp'], errors='coerce')

# Drop unnecessary columns
df = df.drop(columns=['sensor_15', 'Unnamed: 0'])

# Handle missing values
df = df.dropna()

# Feature Engineering: Extract date and time components
df['year'] = df['Timestamp'].dt.year
df['month'] = df['Timestamp'].dt.month
df['day'] = df['Timestamp'].dt.day
df['hour'] = df['Timestamp'].dt.hour
df['minute'] = df['Timestamp'].dt.minute
df['second'] = df['Timestamp'].dt.second

# Optionally, you can use cyclical encoding for features like hour and day of the week
df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24.0)
df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24.0)

# Drop the original timestamp column
df = df.drop(columns=['Timestamp'])

# Separate features and target
X = df.drop(columns=['machine_status'])
y = df['machine_status']

# Normalize the data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Encode target variable for binary classification
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

# Initialize and train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions
y_pred = rf_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Random Forest Accuracy: {accuracy}")

# Print classification report
report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)
print("Classification Report:")
print(report)

# Save the trained model to a file
joblib.dump(rf_model, 'random_forest_model.pkl')

# Save the scaler parameters
np.save('scaler_params.npy', [scaler.min_, scaler.scale_])

# Save the label encoder classes
np.save('label_encoder_classes.npy', label_encoder.classes_)


Random Forest Accuracy: 0.9998329435349148
Classification Report:
              precision    recall  f1-score   support

      BROKEN       1.00      0.99      1.00       348
      NORMAL       1.00      1.00      1.00     11624

    accuracy                           1.00     11972
   macro avg       1.00      1.00      1.00     11972
weighted avg       1.00      1.00      1.00     11972

