In [4]:
import pandas as pd
import numpy as np

import sklearn.preprocessing
import sklearn.ensemble
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

from joblib import dump, load

# Importando, Pré-processando os Dados

In [5]:
df_train = pd.read_csv("concatened-data/train.csv")
df_test = pd.read_csv("concatened-data/test.csv")

cols = ['x','y','z','anomaly']

df_test.anomaly = df_test.anomaly.astype('boolean')
df_train.anomaly = df_train.anomaly.astype('boolean')

# Treinamento

In [6]:
X_train = df_train.drop(columns=["anomaly"], axis=1)
y_train = df_train["anomaly"]
X_test = df_test.drop(columns=["anomaly"], axis=1)
y_test = df_test["anomaly"]

def train_model(model, model_name):
    print(f"Evaluating: {model_name}")
    accuracy = {}
    
    model = model.fit(X_train, y_train)
    pred = model.predict(X_test)
    
    acc = accuracy_score(y_test, pred)*100
    accuracy[model_name] = acc
    print('accuracy_score',acc)
    print()
    
    print('Classification Report')
    print(classification_report(y_test, pred))    
    
random_forest = sklearn.ensemble.RandomForestClassifier(n_estimators = 1500, n_jobs=-1, max_depth=15, 
                             min_samples_split=5, min_samples_leaf=3)

train_model(random_forest, 'Random Forest Classifier')

Evaluating: Random Forest Classifier
accuracy_score 94.42890442890443

Classification Report
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.97     28420
         1.0       0.26      0.02      0.04      1610

    accuracy                           0.94     30030
   macro avg       0.60      0.51      0.51     30030
weighted avg       0.91      0.94      0.92     30030



# Exportando o Modelo

In [7]:
dump(random_forest, 'model.joblib')

['model.joblib']