In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder

## Titanic
data_titanic = pd.read_csv("./data/machine_failure/train.csv")

column_titanic_dict = {"PassengerId": "PassengerId", "Survived": "Survived", "Pclass": "TicketClass", "Name": "Name", "Sex": "Sex", "Age": "Age", "SibSp": "NumberSiblingsSpouses", "Parch": "NumberParentsChildren", "Ticket": "TicketNumber", "Fare": "Fare", "Cabin": "CabinNumber", "Embarked": "Port"}
data_titanic = data_titanic.rename(columns=column_titanic_dict)

train_X_titanic_columns = ["TicketClass", "Sex", "Age", "NumberSiblingsSpouses", "NumberParentsChildren", "Fare", "Port"]
train_y_titanic_columns = ["Survived"]
train_X_titanic_ordinal_columns = ["Sex", "Port"]

X_titanic = data_titanic[train_X_titanic_columns]
y_titanic = data_titanic[train_y_titanic_columns]
 
X_titanic.loc[:, train_X_titanic_ordinal_columns] = OrdinalEncoder().fit_transform(X_titanic[train_X_titanic_ordinal_columns])

X_titanic_train, X_titanic_test, y_titanic_train, y_titanic_test = train_test_split(X_titanic, y_titanic, test_size=0.4)
y_titanic_train = np.squeeze(y_titanic_train)
y_titanic_test = np.squeeze(y_titanic_test)

## Machine Failure
data_machine_failure = pd.read_csv("./data/machine_failure/train.csv")

column_machine_failure_dict = {"PassengerId": "PassengerId", "Survived": "Survived", "Pclass": "TicketClass", "Name": "Name", "Sex": "Sex", "Age": "Age", "SibSp": "NumberSiblingsSpouses", "Parch": "NumberParentsChildren", "Ticket": "TicketNumber", "Fare": "Fare", "Cabin": "CabinNumber", "Embarked": "Port"}
data_machine_failure = data_machine_failure.rename(columns=column_machine_failure_dict)

train_X_machine_failure_columns = ["TicketClass", "Sex", "Age", "NumberSiblingsSpouses", "NumberParentsChildren", "Fare", "Port"]
train_y_machine_failure_columns = ["Survived"]
train_X_machine_failure_ordinal_columns = ["Sex", "Port"]

X_machine_failure = data_machine_failure[train_X_machine_failure_columns]
y_machine_failure = data_machine_failure[train_y_machine_failure_columns]
 
X_machine_failure.loc[:, train_X_machine_failure_ordinal_columns] = OrdinalEncoder().fit_transform(X_machine_failure[train_X_machine_failure_ordinal_columns])

X_machine_failure_train, X_machine_failure_test, y_machine_failure_train, y_machine_failure_test = train_test_split(X_machine_failure, y_machine_failure, test_size=0.4)
y_machine_failure_train = np.squeeze(y_machine_failure_train)
y_machine_failure_test = np.squeeze(y_machine_failure_test)

In [None]:
## Titanic
## Standard Scaler

import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score, precision_score, accuracy_score, recall_score

X_titanic_train_1, X_titanic_train_2, y_titanic_train_1, y_titanic_train_2 = train_test_split(X_titanic_train, y_titanic_train, test_size=0.5)
y_titanic_train_1 = np.squeeze(y_titanic_train_1)
y_titanic_train_2 = np.squeeze(y_titanic_train_2)

param_grid = [
  {"ada_bst_clf__n_estimators": [100, 1000, 10000],
   "ada_bst_clf__learning_rate": [0.01, 0.1, 1]}
]
gradient_bst_clf = GridSearchCV(Pipeline([
    ("imputer", SimpleImputer(missing_values=np.nan, strategy="mean")),
    ("scaler", StandardScaler()),
    ("ada_bst_clf", GradientBoostingClassifier()),]),
    param_grid, cv=10, verbose=3)
gradient_bst_clf.fit(X_titanic_train_2, y_titanic_train_2)
# Predict
y_titanic_pred = gradient_bst_clf.predict(X_titanic_test)
# Score
print("Confusion Matrix")
print(confusion_matrix(y_titanic_test, y_titanic_pred))
print("Precision Score")
print(precision_score(y_titanic_test, y_titanic_pred))
print("Accuracy Score")
print(accuracy_score(y_titanic_test, y_titanic_pred))
print("Recall Score")
print(recall_score(y_titanic_test, y_titanic_pred))
print("F1 Score")
print(f1_score(y_titanic_test, y_titanic_pred))

In [None]:
## Machine Failure
## Standard Scaler

import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score, precision_score, accuracy_score, recall_score

X_machine_failure_train_1, X_machine_failure_train_2, y_machine_failure_train_1, y_machine_failure_train_2 = train_test_split(X_machine_failure_train, y_machine_failure_train, test_size=0.5)
y_machine_failure_train_1 = np.squeeze(y_machine_failure_train_1)
y_machine_failure_train_2 = np.squeeze(y_machine_failure_train_2)

param_grid = [
  {"ada_bst_clf__n_estimators": [100, 1000, 10000],
   "ada_bst_clf__learning_rate": [0.01, 0.1, 1]}
]
gradient_bst_clf = GridSearchCV(Pipeline([
    ("imputer", SimpleImputer(missing_values=np.nan, strategy="mean")),
    ("scaler", StandardScaler()),
    ("ada_bst_clf", GradientBoostingClassifier()),]),
    param_grid, cv=10, verbose=3)
gradient_bst_clf.fit(X_machine_failure_train_2, y_machine_failure_train_2)
# Predict
y_machine_failure_pred = gradient_bst_clf.predict(X_machine_failure_test)
# Score
print("Confusion Matrix")
print(confusion_matrix(y_machine_failure_test, y_machine_failure_pred))
print("Precision Score")
print(precision_score(y_machine_failure_test, y_machine_failure_pred))
print("Accuracy Score")
print(accuracy_score(y_machine_failure_test, y_machine_failure_pred))
print("Recall Score")
print(recall_score(y_machine_failure_test, y_machine_failure_pred))
print("F1 Score")
print(f1_score(y_machine_failure_test, y_machine_failure_pred))

In [None]:
## Titanic
## MinMax Scaler

import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score, precision_score, accuracy_score, recall_score

X_titanic_train_1, X_titanic_train_2, y_titanic_train_1, y_titanic_train_2 = train_test_split(X_titanic_train, y_titanic_train, test_size=0.5)
y_titanic_train_1 = np.squeeze(y_titanic_train_1)
y_titanic_train_2 = np.squeeze(y_titanic_train_2)

param_grid = [
  {"ada_bst_clf__n_estimators": [100, 1000, 10000],
   "ada_bst_clf__learning_rate": [0.01, 0.1, 1]}
]
gradient_bst_clf = GridSearchCV(Pipeline([
    ("imputer", SimpleImputer(missing_values=np.nan, strategy="mean")),
    ("scaler", MinMaxScaler()),
    ("ada_bst_clf", GradientBoostingClassifier()),]),
    param_grid, cv=10, verbose=3)
gradient_bst_clf.fit(X_titanic_train_2, y_titanic_train_2)
# Predict
y_titanic_pred = gradient_bst_clf.predict(X_titanic_test)
# Score
print("Confusion Matrix")
print(confusion_matrix(y_titanic_test, y_titanic_pred))
print("Precision Score")
print(precision_score(y_titanic_test, y_titanic_pred))
print("Accuracy Score")
print(accuracy_score(y_titanic_test, y_titanic_pred))
print("Recall Score")
print(recall_score(y_titanic_test, y_titanic_pred))
print("F1 Score")
print(f1_score(y_titanic_test, y_titanic_pred))

In [None]:
## Machine Failure
## MinMax Scaler

import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score, precision_score, accuracy_score, recall_score

X_machine_failure_train_1, X_machine_failure_train_2, y_machine_failure_train_1, y_machine_failure_train_2 = train_test_split(X_machine_failure_train, y_machine_failure_train, test_size=0.5)
y_machine_failure_train_1 = np.squeeze(y_machine_failure_train_1)
y_machine_failure_train_2 = np.squeeze(y_machine_failure_train_2)

param_grid = [
  {"ada_bst_clf__n_estimators": [100, 1000, 10000],
   "ada_bst_clf__learning_rate": [0.01, 0.1, 1]}
]
gradient_bst_clf = GridSearchCV(Pipeline([
    ("imputer", SimpleImputer(missing_values=np.nan, strategy="mean")),
    ("scaler", MinMaxScaler()),
    ("ada_bst_clf", GradientBoostingClassifier()),]),
    param_grid, cv=10, verbose=3)
gradient_bst_clf.fit(X_machine_failure_train_2, y_machine_failure_train_2)
# Predict
y_machine_failure_pred = gradient_bst_clf.predict(X_machine_failure_test)
# Score
print("Confusion Matrix")
print(confusion_matrix(y_machine_failure_test, y_machine_failure_pred))
print("Precision Score")
print(precision_score(y_machine_failure_test, y_machine_failure_pred))
print("Accuracy Score")
print(accuracy_score(y_machine_failure_test, y_machine_failure_pred))
print("Recall Score")
print(recall_score(y_machine_failure_test, y_machine_failure_pred))
print("F1 Score")
print(f1_score(y_machine_failure_test, y_machine_failure_pred))