## Exemplo Regressão Logística e MLPs

### Classificador para avaliar satisfação de usuários em linhas aereas

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris, load_digits
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import normalize, OneHotEncoder, OrdinalEncoder
from sklearn.metrics import mean_absolute_error, roc_auc_score, f1_score, confusion_matrix

'''
Customer Satisfaction in Airline
Source: https://www.kaggle.com/datasets/yakhyojon/customer-satisfaction-in-airline
'''

data_path = 'data/Invistico_Airline.csv'

# Carregando dataset em data_df
data_df   = pd.read_csv(data_path)
pd.set_option('display.max_columns',None)

print(data_df.columns)

In [None]:
## Exemplo de treino com regressão logistica (Dataset iris)
X,y = load_iris(return_X_y=True)

X, Xtest, y, ytest = train_test_split(X,y,test_size=.2,random_state=0)

clf = LogisticRegression(penalty='l1', solver='liblinear',random_state=0, max_iter=1000).fit(X, y)
clf.score(Xtest, ytest)

In [None]:
X,y = load_iris(return_X_y=True)

X, Xtest, y, ytest = train_test_split(X,y,test_size=.2,random_state=0)

clf = LogisticRegression(penalty='l2', solver='lbfgs',random_state=0, max_iter=1000).fit(X, y)
clf.score(Xtest, ytest)

predictions = clf.predict(Xtest)
cmatrix = confusion_matrix(ytest, predictions)

print(cmatrix)

In [None]:
# retorna as informações estatísticas do dataset
data_df.describe()

In [None]:
# Exibir os 5 primeiros exemplos do dataset
# Verificar tipos de dados e features disponíveis (colunas)
data_df.head(5)

In [None]:
# Separando os dados em subconjunto (treino e teste)
# 20% dos dados atribuidos ao conjunto de teste

train_df_full, test_df_full = train_test_split(data_df, test_size=.2)

# Removendo colunas nao numericas
# obs: poderiamos utilizar essas colunas fazendo a transformacao das labels em numeros

train_df = train_df_full.drop(['Class', 'Type of Travel', 'Customer Type'], axis='columns')
test_df  = test_df_full.drop(['Class', 'Type of Travel', 'Customer Type'], axis='columns')

train_df = train_df.dropna()

# Atribuindo a coluna satisfaction como o rotulo esperado/resposta do modelo (y)
X = train_df.drop('satisfaction', axis='columns')
y = train_df.satisfaction

test_df = test_df.dropna()
Xtest = test_df.drop('satisfaction', axis='columns')
ytest = test_df.satisfaction

print(X.describe())
print(y.describe())


In [None]:
X, y, Xtest, ytest = X.to_numpy(), y.to_numpy(), Xtest.to_numpy(), ytest.to_numpy()

In [None]:
# Treininado um modelo de regressao logistica com nosso dataset
clf = LogisticRegression(penalty='l2', solver='lbfgs',random_state=0, max_iter=10000).fit(X, y)
clf.score(Xtest, ytest)

In [None]:
print(f"Ground truth: {y[-1]}, Predicted Value: {clf.predict([Xtest[-1]])}")

# visualizando scores através da matriz de confunsão
predictions = clf.predict(Xtest)
cmatrix = confusion_matrix(ytest, predictions)

print(cmatrix)


In [None]:
ynum = [1.0 if x == 'satisfied' else 0.0 for x in ytest]
prednum = [1.0 if x == 'satisfied' else 0.0 for x in predictions]

print(f1_score(ynum, prednum))
print(roc_auc_score(ynum, prednum))

In [None]:
# testando diferentes configurações com MLPs
Xnorm = normalize(X, norm='l1')
mlp1 = MLPClassifier(hidden_layer_sizes=(2,), activation='tanh', solver='sgd', learning_rate='constant',learning_rate_init=0.01, max_iter=1000)
mlp1.fit(Xnorm,y)
Xtest_norm = normalize(Xtest, norm='l1')

print(f'Train score: {mlp1.score(Xnorm, y)}')
print(f'Test score: {mlp1.score(Xtest_norm, ytest)}')

In [None]:

Xnorm = normalize(X, norm='l1')
mlp2 = MLPClassifier(hidden_layer_sizes=(10,), activation='tanh', solver='sgd', learning_rate='constant',learning_rate_init=0.01, max_iter=1000)
mlp2.fit(Xnorm, y)
Xtest_norm = normalize(Xtest, norm='l1')

print(f'Train score: {mlp2.score(Xnorm, y)}')
print(f'Test score: {mlp2.score(Xtest_norm, ytest)}')

In [None]:
Xnorm = normalize(X, norm='l1')
mlp3 = MLPClassifier(hidden_layer_sizes=(100,), activation='tanh', solver='sgd', learning_rate='constant',learning_rate_init=0.01, max_iter=1000)
mlp3.fit(Xnorm, y)
Xtest_norm = normalize(Xtest, norm='l1')

print(f'Train score: {mlp3.score(Xnorm, y)}')
print(f'Test score: {mlp3.score(Xtest_norm, ytest)}')

In [None]:
Xnorm = normalize(X, norm='l1')
mlp4 = MLPClassifier(hidden_layer_sizes=(20,10), activation='relu', solver='adam', learning_rate='constant',learning_rate_init=0.01, max_iter=1000)
mlp4.fit(Xnorm, y)
Xtest_norm = normalize(Xtest, norm='l1')

print(f'Train score: {mlp4.score(Xnorm, y)}')
print(f'Test score: {mlp4.score(Xtest_norm, ytest)}')

In [None]:
Xnorm = normalize(X, norm='l1')
mlp5 = MLPClassifier(hidden_layer_sizes=(5,10,5), activation='relu', solver='adam', learning_rate='constant',learning_rate_init=0.001, max_iter=1000)
mlp5.fit(Xnorm, y)
Xtest_norm = normalize(Xtest, norm='l1')

print(f'Train score: {mlp5.score(Xnorm, y)}')
print(f'Test score: {mlp5.score(Xtest_norm, ytest)}')

In [None]:
cols_mask = (data_df.dtypes == 'object')
categorical_cols = list(cols_mask[cols_mask].index)
print(categorical_cols)

encoder = OrdinalEncoder()

X = train_df_full.copy()
X = X.dropna()
X[categorical_cols] = encoder.fit_transform(X[categorical_cols])
X = X.drop('satisfaction', axis='columns')
y = train_df.satisfaction

Xtest = test_df_full.copy()
Xtest = Xtest.dropna()
Xtest[categorical_cols] = encoder.fit_transform(Xtest[categorical_cols])
ytest = test_df.satisfaction
Xtest = Xtest.drop('satisfaction', axis='columns')

print(X.describe())
print(y.describe())

X, y, Xtest, ytest = X.to_numpy(), y.to_numpy(), Xtest.to_numpy(), ytest.to_numpy()

clf = LogisticRegression(penalty='l2', solver='lbfgs',random_state=0, max_iter=10000).fit(X, y)
print(f"Logistic Train score: {clf.score(X,y)}")
print(f"Logistic Test  score: {clf.score(Xtest, ytest)}")

Xnorm = normalize(X, norm='l1')
mlp = MLPClassifier(hidden_layer_sizes=(20,10), activation='relu', solver='adam', learning_rate='constant',learning_rate_init=0.01, max_iter=1000)
mlp.fit(Xnorm, y)
Xtest_norm = normalize(Xtest, norm='l1')

print(f'MLP Train score: {mlp.score(Xnorm, y)}')
print(f'MLP Test score: {mlp.score(Xtest_norm, ytest)}')

