In [1]:
# Dataset https://archive.ics.uci.edu/ml/datasets/AI4I+2020+Predictive+Maintenance+Dataset


In [2]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
import numpy as np

def extract_error_measures(expected_test_outputs, predictions):
    test_mean_absolute_error = mean_absolute_error(expected_test_outputs, predictions)
    test_mean_squared_error = mean_squared_error(expected_test_outputs, predictions)
    test_root_mean_squared_error = np.sqrt(mean_squared_error(expected_test_outputs, predictions))
    return test_mean_absolute_error, test_mean_squared_error, test_root_mean_squared_error


In [3]:
# Logistic regression algorithm implementation
from sklearn.linear_model import LogisticRegression
import datetime

def logistic_classification_algorithm(train, test, x_variables, y_variable, target_classes):
    x_train = train[x_variables]
    y_train = train[y_variable]
    
    start_time = datetime.datetime.now()
    
    model = LogisticRegression(max_iter=1000)
    model.fit(x_train,y_train.values.ravel())
    
    test_input = test[x_variables]
    test_output = test[y_variable]
    predictions = model.predict(test_input)
    
    elapsed_time = datetime.datetime.now() - start_time
    
    test_mean_absolute_error, test_mean_squared_error, test_root_mean_squared_error = extract_error_measures(test_output, predictions)
    
    return model, test_mean_absolute_error, test_mean_squared_error, test_root_mean_squared_error, (elapsed_time.total_seconds() * 1000)


In [4]:
# Decision Tree Classifier
from sklearn import tree
import datetime
import matplotlib.pyplot as plt

def decision_tree_algorithm(train, test, x_variables, y_variable, target_classes):
    x_train = train[x_variables]
    y_train = train[y_variable]
    
    start_time = datetime.datetime.now()
    
    model = tree.DecisionTreeClassifier(random_state=0)
    model = model.fit(x_train,y_train.values.ravel())
    
    test_input = test[x_variables]
    test_output = test[y_variable]
    predictions = model.predict(test_input)
    
    elapsed_time = datetime.datetime.now() - start_time
    
    tree.plot_tree(model, filled=True, feature_names = x_variables, class_names = target_classes)
    plt.savefig('out.pdf')
    
    test_mean_absolute_error, test_mean_squared_error, test_root_mean_squared_error = extract_error_measures(test_output, predictions)
    
    return model, test_mean_absolute_error, test_mean_squared_error, test_root_mean_squared_error, (elapsed_time.total_seconds() * 1000)


In [5]:
# Gaussian Naive Bayes model
from sklearn.naive_bayes import GaussianNB
import datetime

def gaussian_naive_bayes_algorithm(train, test, x_variables, y_variable, target_classes):
    x_train = train[x_variables]
    y_train = train[y_variable]
    
    start_time = datetime.datetime.now()
    
    model = GaussianNB()
    model.fit(x_train,y_train.values.ravel())
    
    test_input = test[x_variables]
    test_output = test[y_variable]
    predictions = model.predict(test_input)
    
    elapsed_time = datetime.datetime.now() - start_time
    
    test_mean_absolute_error, test_mean_squared_error, test_root_mean_squared_error = extract_error_measures(test_output, predictions)
    
    return model, test_mean_absolute_error, test_mean_squared_error, test_root_mean_squared_error, (elapsed_time.total_seconds() * 1000)


In [6]:
from sklearn.ensemble import RandomForestRegressor
import datetime

def random_forest_regression_algorithm(train, test, x_variables, y_variable, target_classes):
    x_train = train[x_variables]
    y_train = train[y_variable]
    
    start_time = datetime.datetime.now()
    
    model = RandomForestRegressor(max_depth=2, random_state=0)
    model.fit(x_train,y_train.values.ravel())
    
    test_input = test[x_variables]
    test_output = test[y_variable]
    predictions = model.predict(test_input)
    
    elapsed_time = datetime.datetime.now() - start_time
    
    test_mean_absolute_error, test_mean_squared_error, test_root_mean_squared_error = extract_error_measures(test_output, predictions)
    
    return model, test_mean_absolute_error, test_mean_squared_error, test_root_mean_squared_error, (elapsed_time.total_seconds() * 1000)


In [7]:
# Linear Discriminant analysis model
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import datetime

def linear_discriminant_analysis_algorithm(train, test, x_variables, y_variable, target_classes):
    x_train = train[x_variables]
    y_train = train[y_variable]
    
    start_time = datetime.datetime.now()
    
    model = LinearDiscriminantAnalysis()
    model.fit(x_train,y_train.values.ravel())
    
    test_input = test[x_variables]
    test_output = test[y_variable]
    predictions = model.predict(test_input)
    
    elapsed_time = datetime.datetime.now() - start_time
    
    test_mean_absolute_error, test_mean_squared_error, test_root_mean_squared_error = extract_error_measures(test_output, predictions)
    
    return model, test_mean_absolute_error, test_mean_squared_error, test_root_mean_squared_error, (elapsed_time.total_seconds() * 1000)


In [8]:
# Knn classifier model
from sklearn.neighbors import KNeighborsClassifier
import datetime

def knn_classifier_algorithm(train, test, x_variables, y_variable, target_classes):
    x_train = train[x_variables]
    y_train = train[y_variable]
    
    start_time = datetime.datetime.now()
    
    model = KNeighborsClassifier(n_neighbors=3)
    model.fit(x_train,y_train.values.ravel())
    
    test_input = test[x_variables]
    test_output = test[y_variable]
    predictions = model.predict(test_input)
    
    elapsed_time = datetime.datetime.now() - start_time
    
    test_mean_absolute_error, test_mean_squared_error, test_root_mean_squared_error = extract_error_measures(test_output, predictions)
    
    return model, test_mean_absolute_error, test_mean_squared_error, test_root_mean_squared_error, (elapsed_time.total_seconds() * 1000)


In [9]:
# Support Vector Classification model
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import datetime

def svn_algorithm(train, test, x_variables, y_variable, target_classes):
    x_train = train[x_variables]
    y_train = train[y_variable]
    
    start_time = datetime.datetime.now()
    
    model = make_pipeline(StandardScaler(), SVC(gamma='auto'))
    model.fit(x_train,y_train.values.ravel())
    
    test_input = test[x_variables]
    test_output = test[y_variable]
    predictions = model.predict(test_input)
    
    elapsed_time = datetime.datetime.now() - start_time
    
    test_mean_absolute_error, test_mean_squared_error, test_root_mean_squared_error = extract_error_measures(test_output, predictions)
    
    return model, test_mean_absolute_error, test_mean_squared_error, test_root_mean_squared_error, (elapsed_time.total_seconds() * 1000)


In [10]:
strategies = {
    "logistic_classification_algorithm": logistic_classification_algorithm,
    "decision_tree_algorithm": decision_tree_algorithm,
    "gaussian_naive_bayes_algorithm": gaussian_naive_bayes_algorithm,
    "random_forest_regression_algorithm": random_forest_regression_algorithm,
    "linear_discriminant_analysis_algorithm": linear_discriminant_analysis_algorithm,
    "knn_classifier_algorithm": knn_classifier_algorithm,
    "svn_algorithm": svn_algorithm
}