In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import sklearn 
import imblearn
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler

from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

from sklearn.model_selection import cross_val_score

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC 

from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.metrics import f1_score, balanced_accuracy_score, precision_recall_fscore_support, roc_auc_score

In [None]:
#Ignorando avisos
import warnings
warnings.filterwarnings('ignore')

In [None]:
#Formatação
pd.set_option('display.max_columns', None)
np.set_printoptions(threshold= 15)
np.set_printoptions(precision=3)
sns.set(style="darkgrid")
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

In [None]:
df = pd.read_csv('Train_Test_IoT_Garage_Door.csv')
df

In [None]:
# Substituindo os espaços em branco na coluna 'time'
df['time'] = df['time'].str.replace(' ', '')

In [None]:
df['hour'] = ''
df['minute'] = ''
df['second'] = ''

In [None]:
df[['hour', 'minute', 'second']] = df['time'].str.split(':', expand=True)

In [None]:
df['hour'] = df['hour'].astype(int)
df['minute'] = df['minute'].astype(int)
df['second'] = df['second'].astype(int)

In [None]:
df.head()

In [None]:
df['door_state'].unique()

In [None]:
df['sphone_signal'].unique()

In [None]:
#Pequena Alteração:
df['sphone_signal'] = df['sphone_signal'].replace(['false  ', 0.0, '0'], 0)
df['sphone_signal'] = df['sphone_signal'].replace(['true  ', 1.0, '1'], 1)

In [None]:
df['sphone_signal'].unique()

In [None]:
# Teste sem a feature de TS. 
df.drop(['ts', 'type', 'time'], axis=1, inplace=True)

# Criando outras features usando a data (day-month-year)
df.date = pd.to_datetime(df.date)
df['day'] = df.date.dt.day
df['month'] = df.date.dt.month
df['year'] = df.date.dt.year

df.drop(labels=['date'], inplace=True, axis=1)
df.head()

## OneHot-Encoder

In [None]:
door_state_dummies = pd.get_dummies(df['door_state'], prefix='door_state')

In [None]:
df = pd.concat([df, door_state_dummies], axis=1)

In [None]:
df = df.drop(['door_state'], axis=1)

In [None]:
df.head(3)

### Ajustando dados X e y

In [None]:
X = df[['sphone_signal', 'door_state_closed', 'door_state_open', 'day', 'hour', 'minute', 'second']]
y = df['label']

In [None]:
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder
y = LabelEncoder().fit_transform(y)

In [None]:
# determine categorical and numerical features
numerical_ix = X.select_dtypes(include=['int64', 'float64']).columns
categorical_ix = X.select_dtypes(include=['object', 'bool']).columns

In [None]:
numerical_ix

In [None]:
categorical_ix

In [None]:
def classification_report_with_accuracy_score(y_true, y_pred):
    print (classification_report(y_true, y_pred) )
    return accuracy_score(y_true, y_pred)

In [None]:
from sklearn.ensemble import VotingClassifier
from sklearn import tree

est_RL  = LogisticRegression(C = 0.1, 
                             class_weight = 'balanced', 
                             max_iter = 100, 
                             multi_class = 'ovr', 
                             penalty= 'l2', 
                             solver = 'liblinear')

est_KNN = KNeighborsClassifier(metric = 'euclidean', 
                               n_neighbors = 1)


est_GBM = GradientBoostingClassifier(learning_rate = 0.1, 
                                     max_depth = 2, 
                                     max_features = 'auto', 
                                     min_samples_leaf = 1, 
                                     min_samples_split = 2, 
                                     n_estimators = 50, 
                                     subsample = 0.8)

est_NB  = BernoulliNB(alpha = 0.1)

est_LDA = LinearDiscriminantAnalysis(solver = 'svd')

est_DTC = DecisionTreeClassifier(class_weight =  None, 
                                 criterion = 'gini', 
                                 max_depth = 4, 
                                 max_features = 'sqrt', 
                                 min_samples_leaf = 2, 
                                 min_samples_split = 5)

est_RF  = RandomForestClassifier(bootstrap = True, 
                                 max_depth = 5, 
                                 max_features = 'sqrt', 
                                 min_samples_leaf = 1,
                                 min_samples_split =2, 
                                 n_estimators = 100)

est_Ensemble = VotingClassifier(estimators=[('RL', est_RL), ('KNN', est_KNN), ('GBM', est_GBM), ('NB', est_NB), ('LDA', est_LDA), ('DTC', est_DTC), ('RF', est_RF)],
                        voting='soft')

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import StratifiedKFold

clf = make_pipeline(est_Ensemble)

In [None]:
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import classification_report, accuracy_score

# Realize a validação cruzada e obtenha as previsões do modelo
y_pred = cross_val_predict(clf, X, y, cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42))

# Calcule e exiba o classification report para cada estimador
for name, estimator in est_Ensemble.estimators:
    estimator.fit(X, y)  # Treine o estimador
    y_pred_estimator = estimator.predict(X)  # Faça previsões
    report = classification_report(y, y_pred_estimator)
    print(f"Estimador: {name}")
    print(report)
    print("---")

# Encontre o estimador com o melhor desempenho
best_estimator_name = None
best_estimator_score = 0.0

for name, estimator in est_Ensemble.estimators:
    estimator.fit(X, y)  # Treine o estimador
    y_pred_estimator = estimator.predict(X)  # Faça previsões
    accuracy = accuracy_score(y, y_pred_estimator)
    if accuracy > best_estimator_score:
        best_estimator_score = accuracy
        best_estimator_name = name

# Exiba o melhor estimador
print("Melhor estimador: ", best_estimator_name)
print("Pontuação de acurácia: ", best_estimator_score)
