In [15]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score, f1_score, recall_score
from sklearn.preprocessing import StandardScaler

df_train = pd.read_csv("data/train.csv")
df_test = pd.read_csv("data/test.csv")  # for final submission

# Feature engineering
df_train['Power'] = df_train['Torque [Nm]'] * df_train['Rotational speed [rpm]']
df_train['TemperatureDifference'] = df_train['Process temperature [K]'] - df_train['Air temperature [K]']
df_train['TemperatureVariability'] = df_train[['Air temperature [K]', 'Process temperature [K]']].std(axis=1)
df_train['TemperatureRatio'] = df_train['Process temperature [K]'] / df_train['Air temperature [K]']
df_train['ToolWearRate'] = df_train['Tool wear [min]'] / df_train['Tool wear [min]'].max()
df_train['TemperatureChangeRate'] = df_train['TemperatureDifference'] / df_train['Tool wear [min]']
df_train['TemperatureChangeRate'] = np.where(df_train['TemperatureChangeRate'] == float('inf'), 1, df_train['TemperatureChangeRate'])
df_train['TotalFailures'] = df_train[['TWF', 'HDF', 'PWF', 'OSF', 'RNF']].sum(axis=1)
df_train['TorqueWearRatio'] = df_train['Torque [Nm]'] / (df_train['Tool wear [min]'] + 0.0001)
df_train['TorqueWearProduct'] = df_train['Torque [Nm]'] * df_train['Tool wear [min]']

df_test['Power'] = df_test['Torque [Nm]'] * df_test['Rotational speed [rpm]']
df_test['TemperatureDifference'] = df_test['Process temperature [K]'] - df_test['Air temperature [K]']
df_test['TemperatureVariability'] = df_test[['Air temperature [K]', 'Process temperature [K]']].std(axis=1)
df_test['TemperatureRatio'] = df_test['Process temperature [K]'] / df_test['Air temperature [K]']
df_test['ToolWearRate'] = df_test['Tool wear [min]'] / df_test['Tool wear [min]'].max()
df_test['TemperatureChangeRate'] = df_test['TemperatureDifference'] / df_test['Tool wear [min]']
df_test['TemperatureChangeRate'] = np.where(df_test['TemperatureChangeRate'] == float('inf'), 1, df_test['TemperatureChangeRate'])
df_test['TotalFailures'] = df_test[['TWF', 'HDF', 'PWF', 'OSF', 'RNF']].sum(axis=1)
df_test['TorqueWearRatio'] = df_test['Torque [Nm]'] / (df_test['Tool wear [min]'] + 0.0001)
df_test['TorqueWearProduct'] = df_test['Torque [Nm]'] * df_test['Tool wear [min]']

# Feature engineering
# df_train["Temperature ratio"] = df_train['Process temperature [K]'] / df_train['Air temperature [K]']
# df_train['Torque * Rotational speed'] = df_train['Torque [Nm]'] * df_train['Rotational speed [rpm]']
# df_train['Torque * Tool wear'] = df_train['Torque [Nm]'] * df_train['Tool wear [min]']

# df_test["Temperature ratio"] = df_test['Process temperature [K]'] / df_test['Air temperature [K]']
# df_test['Torque * Rotational speed'] = df_test['Torque [Nm]'] * df_test['Rotational speed [rpm]']
# df_test['Torque * Tool wear'] = df_test['Torque [Nm]'] * df_test['Tool wear [min]']

x_train = df_train.copy()
x_train.drop('id', axis=1, inplace=True)
x_train.drop('Product ID', axis=1, inplace=True)
x_train.drop('Machine failure', axis=1, inplace=True)
y_train = df_train["Machine failure"]

x_test = df_test.copy()
x_test.drop('id', axis=1, inplace=True)
x_test.drop('Product ID', axis=1, inplace=True)

x_train = np.array(x_train)
y_train = np.array(y_train, dtype=np.float32)
x_test = np.array(x_test)

lm_column = 0

mapping = {'L': 0.0, 'M': 0.5, 'H': 1.0}
x_train[:, lm_column] = np.where(x_train[:, lm_column] == 'L', 0.0, np.where(x_train[:, lm_column] == 'M', 0.5, 1.0))
x_test[:, lm_column] = np.where(x_test[:, lm_column] == 'L', 0.0, np.where(x_test[:, lm_column] == 'M', 0.5, 1.0))

# Standardization
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

x_train, x_cv, y_train, y_cv = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

In [17]:
import tensorflow as tf
from tensorflow import keras

# Define the regularized deep neural network model
num_features = x_train.shape[1]
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.0005), input_shape=(num_features,)),
    keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, epochs=5, batch_size=32)

# Evaluate the model on cross-validation set
cv_predictions = model.predict(x_cv)
cv_predictions = (cv_predictions > 0.5).astype(int)

cv_accuracy = accuracy_score(y_cv, cv_predictions)
cv_f1 = f1_score(y_cv, cv_predictions)
cv_recall = recall_score(y_cv, cv_predictions)

print("Cross-Validation Accuracy:", cv_accuracy)
print("Cross-Validation F1 Score:", cv_f1)
print("Cross-Validation Recall:", cv_recall)

# Predict on test set
test_predictions = model.predict(x_test)
# test_predictions = (test_predictions > 0.5).astype(int)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Cross-Validation Accuracy: 0.9961518727552591
Cross-Validation F1 Score: 0.8648648648648648
Cross-Validation Recall: 0.7671232876712328


In [14]:
indices = np.array(df_test["id"], dtype=int).reshape(-1, 1)

# y_test_pred = y_test_pred.reshape(-1, 1)  

submission = np.hstack((indices, test_predictions))

submission_df = pd.DataFrame(submission, columns=['id', 'Machine failure'])
submission_df['id'] = submission_df['id'].astype(int)  # Convert "id" column to integer
submission_df.to_csv("submissions/try_1.csv", index=False)

Cross-Validation Accuracy: 0.9961518727552591
Cross-Validation F1 Score: 0.8648648648648648
Cross-Validation Recall: 0.7671232876712328

In [18]:
import lightgbm as lgb

# Create LightGBM dataset
train_data = lgb.Dataset(x_train, label=y_train)

# Set LightGBM parameters
params = {
    'objective': 'binary',
    'metric': ['binary_logloss', 'auc'],
    'boosting_type': 'gbdt',
    'verbosity': -1,
    'seed': 42
}

# Train LightGBM model
model = lgb.train(params, train_data, num_boost_round=100)

# Predict on cross-validation data (x_cv)
cv_pred = model.predict(x_cv)

# Convert probabilities to binary predictions
cv_pred_binary = np.where(cv_pred >= 0.5, 1, 0)

# Print evaluation metrics for cross-validation predictions
print("Cross-Validation Metrics:")
print("Accuracy:", accuracy_score(y_cv, cv_pred_binary))
print("F1-Score:", f1_score(y_cv, cv_pred_binary))
print("Recall:", recall_score(y_cv, cv_pred_binary))

# Predict on test data (x_test)
test_pred = model.predict(x_test)


Cross-Validation Metrics:
Accuracy: 0.995895330938943
F1-Score: 0.8571428571428571
Recall: 0.7671232876712328


In [19]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, f1_score, recall_score

# Initialize the Gaussian Naive Bayes model
gnb = GaussianNB()

# Train the model on the training data
gnb.fit(x_train, y_train)

# Predict on the cross-validation data
y_cv_pred = gnb.predict(x_cv)

# Calculate metrics for cross-validation predictions
accuracy = accuracy_score(y_cv, y_cv_pred)
f1 = f1_score(y_cv, y_cv_pred)
recall = recall_score(y_cv, y_cv_pred)

# Print cross-validation metrics
print("Cross-validation performance:")
print("Accuracy:", accuracy)
print("F1 score:", f1)
print("Recall:", recall)

# Predict on the test data
y_test_pred = gnb.predict(x_test)

# Print predictions for the test data
print("Test data predictions:")
print(y_test_pred)

Cross-validation performance:
Accuracy: 0.9961518727552591
F1 score: 0.865211810012837
Recall: 0.769406392694064
Test data predictions:
[0. 0. 0. ... 0. 0. 0.]


In [20]:
from sklearn.ensemble import RandomForestClassifier

# train model
rfc = RandomForestClassifier(n_estimators=10).fit(x_train, y_train)

# predict on test set
rfc_pred = rfc.predict(x_cv)

from sklearn.metrics import accuracy_score, f1_score, recall_score

print("Random Forest Classifier", "\n\tAccuracy:", accuracy_score(y_cv, rfc_pred), "\n\tF1:", f1_score(y_cv, rfc_pred), "\n\tRecall:", recall_score(y_cv, rfc_pred))

Random Forest Classifier 
	Accuracy: 0.9961518727552591 
	F1: 0.8662420382165605 
	Recall: 0.776255707762557
