In [139]:
import pandas as pd
import numpy as np
import plotly.figure_factory as ff
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn import svm
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier


from helpers import get_datasets
from helpers import MUSIC_GENRES

# Preparación de datos

In [140]:
(
    x_train, x_test, y_train, y_test,
    x_train_cf, x_test_cf, y_train_cf, y_test_cf,
    x_train_filtered, x_test_filtered, y_train_filtered, y_test_filtered,
    x_train_filtered_cf, x_test_filtered_cf, y_train_filtered_cf, y_test_filtered_cf
) = get_datasets()

In [141]:
modelo_list = []
escenario_list = []
train_score_list = []
test_score_list = []
precision_list = []
recall_list = []
fscore_list = []

## CART

### Original

In [142]:
tree = DecisionTreeClassifier().fit(x_train,y_train)

train_score = tree.score(x_train,y_train)

print('Score de entrenamiento: ')
print(train_score)

y_pred = tree.predict(x_test)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('CART')
escenario_list.append('Original')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = tree.score(x_test,y_test)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9991991457554725
Precisión:  0.6408821386325125  - Recall:  0.6397117694155324  - f1:  0.639702040054553


In [143]:
print(modelo_list)
print(escenario_list)
print(train_score_list)
print(precision_list)
print(recall_list)
print(fscore_list)

['CART']
['Original']
[0.9991991457554725]
[0.6408821386325125]
[0.6397117694155324]
[0.639702040054553]


### Columnas filtradas

In [144]:
tree = DecisionTreeClassifier().fit(x_train_cf,y_train_cf)

train_score = tree.score(x_train_cf,y_train_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = tree.predict(x_test_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('CART')
escenario_list.append('Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = tree.score(x_test_cf,y_test_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9991991457554725
Precisión:  0.6603051370203716  - Recall:  0.6601281024819856  - f1:  0.6596911234263239


### Outliers filtrados

In [145]:
tree = DecisionTreeClassifier().fit(x_train_filtered,y_train_filtered)

train_score = tree.score(x_train_filtered,y_train_filtered)

print('Score de entrenamiento: ')
print(train_score)

y_pred = tree.predict(x_test_filtered)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('CART')
escenario_list.append('Outliers filtrados')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = tree.score(x_test_filtered,y_test_filtered)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9991865509761388
Precisión:  0.6479391091504287  - Recall:  0.6482309882065881  - f1:  0.6476004536198635


### Columnas filtradas + Outliers filtrados

In [146]:
tree = DecisionTreeClassifier().fit(x_train_filtered_cf,y_train_filtered_cf)

train_score = tree.score(x_train_filtered_cf,y_train_filtered_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = tree.predict(x_test_filtered_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('CART')
escenario_list.append('Outliers filtrados + Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = tree.score(x_test_filtered_cf,y_test_filtered_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9991865509761388
Precisión:  0.6502822388858542  - Recall:  0.6486376575843839  - f1:  0.6490504038057148


## Naive Bayes

### Original

In [147]:
model = GaussianNB().fit(x_train,y_train)

train_score = model.score(x_train,y_train)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Naive Bayes')
escenario_list.append('Original')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test,y_test)
test_score_list.append(test_score)

Score de entrenamiento: 
0.4237853710624666
Precisión:  0.4395225944025703  - Recall:  0.42193755004003203  - f1:  0.3928869371730821


### Columnas filtradas

In [148]:
model = GaussianNB().fit(x_train_cf,y_train_cf)

train_score = model.score(x_train_cf,y_train_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Naive Bayes')
escenario_list.append('Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_cf,y_test_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.38320875600640686
Precisión:  0.44554780314941916  - Recall:  0.3767013610888711  - f1:  0.359777778474413


### Outliers filtrados

In [149]:
model = GaussianNB().fit(x_train_filtered,y_train_filtered)

train_score = model.score(x_train_filtered,y_train_filtered)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_filtered)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Naive Bayes')
escenario_list.append('Outliers filtrados')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_filtered,y_test_filtered)
test_score_list.append(test_score)

Score de entrenamiento: 
0.521556399132321
Precisión:  0.5295568048584859  - Recall:  0.5144367629117528  - f1:  0.4951908635416927


### Outliers filtrados + Columnas filtradas

In [150]:
model = GaussianNB().fit(x_train_filtered_cf,y_train_filtered_cf)

train_score = model.score(x_train_filtered_cf,y_train_filtered_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_filtered_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Naive Bayes')
escenario_list.append('Outliers filtrados + Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_filtered_cf,y_test_filtered_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.5281995661605207
Precisión:  0.5324569630298007  - Recall:  0.5152501016673444  - f1:  0.49602366110689855


## KNN

### Original

In [151]:
model = KNeighborsClassifier().fit(x_train,y_train)

train_score = model.score(x_train,y_train)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Knn')
escenario_list.append('Original')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test,y_test)
test_score_list.append(test_score)

Score de entrenamiento: 
0.48852108916177256
Precisión:  0.2751846383544419  - Recall:  0.27942353883106485  - f1:  0.27437544365920985


### Columnas filtradas

In [152]:
model = KNeighborsClassifier().fit(x_train_cf,y_train_cf)

train_score = model.score(x_train_cf,y_train_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Knn')
escenario_list.append('Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_cf,y_test_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.48678590496529633
Precisión:  0.27320542192199926  - Recall:  0.27742193755004  - f1:  0.2724562378662088


### Outliers filtrados

In [153]:
model = KNeighborsClassifier().fit(x_train_filtered,y_train_filtered)

train_score = model.score(x_train_filtered,y_train_filtered)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_filtered)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Knn')
escenario_list.append('Outliers filtrados')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_filtered,y_test_filtered)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9433297180043384
Precisión:  0.9028051117771679  - Recall:  0.9007726718178121  - f1:  0.900877190097251


### Outliers filtrados + Columnas filtradas

In [154]:
model = KNeighborsClassifier().fit(x_train_filtered_cf,y_train_filtered_cf)

train_score = model.score(x_train_filtered_cf,y_train_filtered_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_filtered_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Knn')
escenario_list.append('Outliers filtrados + Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_filtered_cf,y_test_filtered_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.940347071583514
Precisión:  0.8956228252932336  - Recall:  0.8938592923952826  - f1:  0.8938944606275244


## Stochastic Gradient Descent

### Original

In [155]:
model = SGDClassifier().fit(x_train,y_train)

train_score = model.score(x_train,y_train)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Stochastic Gradient Descent')
escenario_list.append('Original')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test,y_test)
test_score_list.append(test_score)

Score de entrenamiento: 
0.16577682861719167
Precisión:  0.1649967479431264  - Recall:  0.1489191353082466  - f1:  0.09253516879359204



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



### Columnas filtradas

In [156]:
model = SGDClassifier().fit(x_train_cf,y_train_cf)

train_score = model.score(x_train_cf,y_train_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Stochastic Gradient Descent')
escenario_list.append('Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_cf,y_test_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.13214095034703685
Precisión:  0.13260607984862183  - Recall:  0.12409927942353884  - f1:  0.062344949370177545



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



### Outliers filtrados

In [157]:
model = SGDClassifier().fit(x_train_filtered,y_train_filtered)

train_score = model.score(x_train_filtered,y_train_filtered)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_filtered)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Stochastic Gradient Descent')
escenario_list.append('Outliers filtrados')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_filtered,y_test_filtered)
test_score_list.append(test_score)

Score de entrenamiento: 
0.6973969631236443
Precisión:  0.6911158944532606  - Recall:  0.6657177714518097  - f1:  0.6659983772412743


### Outliers filtrados + Columnas filtradas

In [158]:
model = SGDClassifier().fit(x_train_filtered_cf,y_train_filtered_cf)

train_score = model.score(x_train_filtered_cf,y_train_filtered_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_filtered_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Stochastic Gradient Descent')
escenario_list.append('Outliers filtrados + Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_filtered_cf,y_test_filtered_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.6496746203904555
Precisión:  0.671830675942693  - Recall:  0.618137454249695  - f1:  0.5939382115561507


## Random Forest

### Original

In [159]:
model = RandomForestClassifier().fit(x_train,y_train)

train_score = model.score(x_train,y_train)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Random Forest')
escenario_list.append('Original')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test,y_test)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9991991457554725
Precisión:  0.8767116986452436  - Recall:  0.8767013610888711  - f1:  0.8757216526920107


### Columnas filtradas

In [160]:
model = RandomForestClassifier().fit(x_train_cf,y_train_cf)

train_score = model.score(x_train_cf,y_train_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Random Forest')
escenario_list.append('Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_cf,y_test_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9991991457554725
Precisión:  0.8701796493511303  - Recall:  0.8698959167333867  - f1:  0.8688565850105179


### Outliers filtrados

In [161]:
model = RandomForestClassifier().fit(x_train_filtered,y_train_filtered)

train_score = model.score(x_train_filtered,y_train_filtered)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_filtered)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Random Forest')
escenario_list.append('Outliers filtrados')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_filtered,y_test_filtered)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9991865509761388
Precisión:  0.8687650203696349  - Recall:  0.8662057747051647  - f1:  0.8659691299250831


### Outliers filtrados + Columnas filtradas

In [162]:
model = RandomForestClassifier().fit(x_train_filtered_cf,y_train_filtered_cf)

train_score = model.score(x_train_filtered_cf,y_train_filtered_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_filtered_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Random Forest')
escenario_list.append('Outliers filtrados + Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_filtered_cf,y_test_filtered_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9991865509761388
Precisión:  0.8682262121552883  - Recall:  0.8657991053273688  - f1:  0.8657652654463462


## Support Vector Machines

### Original

In [163]:
model = svm.SVC(C=250, gamma=0.8).fit(x_train,y_train)

train_score = model.score(x_train,y_train)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Support Vector Machines')
escenario_list.append('Original')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test,y_test)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9991991457554725



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Precisión:  0.41197356582334277  - Recall:  0.10488390712570056  - f1:  0.04359413176170824


### Columnas filtradas

In [164]:
model = svm.SVC(C=250, gamma=0.8).fit(x_train_cf,y_train_cf)

train_score = model.score(x_train_cf,y_train_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Support Vector Machines')
escenario_list.append('Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_cf,y_test_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9991991457554725



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Precisión:  0.41197356582334277  - Recall:  0.10488390712570056  - f1:  0.04359413176170824


### Outliers filtrados

In [165]:
model = svm.SVC(C=250, gamma=0.8).fit(x_train_filtered,y_train_filtered)

train_score = model.score(x_train_filtered,y_train_filtered)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_filtered)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Support Vector Machines')
escenario_list.append('Outliers filtrados')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_filtered,y_test_filtered)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9991865509761388
Precisión:  0.9133429123638513  - Recall:  0.9133794225294836  - f1:  0.9132225446584054


### Outliers filtrados + Columnas filtrados

In [166]:
model = svm.SVC(C=250, gamma=0.8).fit(x_train_filtered_cf,y_train_filtered_cf)

train_score = model.score(x_train_filtered_cf,y_train_filtered_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_filtered_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Support Vector Machines')
escenario_list.append('Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_filtered_cf,y_test_filtered_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9991865509761388
Precisión:  0.9108623332405617  - Recall:  0.9109394062627084  - f1:  0.910730015965815


## Logistic Regression

### Original

In [167]:
model = LogisticRegression(random_state=0).fit(x_train,y_train)

train_score = model.score(x_train,y_train)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Logistic Regression')
escenario_list.append('Original')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test,y_test)
test_score_list.append(test_score)

Score de entrenamiento: 
0.22504004271222638
Precisión:  0.23104652930006403  - Recall:  0.20816653322658127  - f1:  0.16158378779086163



lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



### Columnas filtradas

In [168]:
model = LogisticRegression(random_state=0).fit(x_train_cf,y_train_cf)

train_score = model.score(x_train_cf,y_train_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Logistic Regression')
escenario_list.append('Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_cf,y_test_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.22303790710090762
Precisión:  0.19465709718143212  - Recall:  0.20176140912730184  - f1:  0.1534737673666398



lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



### Outliers filtrados

In [169]:
model = LogisticRegression(random_state=0).fit(x_train_filtered,y_train_filtered)

train_score = model.score(x_train_filtered,y_train_filtered)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_filtered)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Logistic Regression')
escenario_list.append('Outliers filtrados')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_filtered,y_test_filtered)
test_score_list.append(test_score)

Score de entrenamiento: 
0.7040401301518439
Precisión:  0.6798166145544465  - Recall:  0.6832045546970313  - f1:  0.6800010872351886



lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



### Outliers filtrados + Columnas filtradas

In [170]:
model = LogisticRegression(random_state=0).fit(x_train_filtered_cf,y_train_filtered_cf)

train_score = model.score(x_train_filtered_cf,y_train_filtered_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_filtered_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Logistic Regression')
escenario_list.append('Outliers filtrados + Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_filtered_cf,y_test_filtered_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.6998373101952278
Precisión:  0.6771734437665846  - Recall:  0.6807645384302562  - f1:  0.6772741053761625



lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



## Extra Trees

### Original

In [171]:
model = ExtraTreesClassifier().fit(x_train,y_train)

train_score = model.score(x_train,y_train)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Extra Trees')
escenario_list.append('Original')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test,y_test)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9991991457554725
Precisión:  0.8896895687475057  - Recall:  0.88871096877502  - f1:  0.8881292469536146


### Columnas filtradas

In [172]:
model = ExtraTreesClassifier().fit(x_train_cf,y_train_cf)

train_score = model.score(x_train_cf,y_train_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Extra Trees')
escenario_list.append('Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_cf,y_test_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9991991457554725
Precisión:  0.8898802415271728  - Recall:  0.888310648518815  - f1:  0.8879885489741495


### Outliers filtrados

In [173]:
model = ExtraTreesClassifier(n_estimators=500, n_jobs=-1).fit(x_train_filtered,y_train_filtered)

train_score = model.score(x_train_filtered,y_train_filtered)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_filtered)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Extra Trees')
escenario_list.append('Outliers filtrados')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_filtered,y_test_filtered)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9991865509761388
Precisión:  0.9022873230282322  - Recall:  0.9003660024400163  - f1:  0.9002685178825804


### Outliers filtrados + Columnas filtradas

In [174]:
model = ExtraTreesClassifier(n_estimators=500, n_jobs=-1).fit(x_train_filtered_cf,y_train_filtered_cf)

train_score = model.score(x_train_filtered_cf,y_train_filtered_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_filtered_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Extra Trees')
escenario_list.append('Outliers filtrados + Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_filtered_cf,y_test_filtered_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9991865509761388
Precisión:  0.89435201163598  - Recall:  0.8914192761285076  - f1:  0.891435894637359


## Gradient Boosting

### Original

In [175]:
model = GradientBoostingClassifier().fit(x_train,y_train)

train_score = model.score(x_train,y_train)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Gradient Boosting')
escenario_list.append('Original')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test,y_test)
test_score_list.append(test_score)

Score de entrenamiento: 
0.952349172450614
Precisión:  0.8414039832245899  - Recall:  0.8402722177742193  - f1:  0.840198366656622


### Columnas filtradas

In [176]:
model = GradientBoostingClassifier().fit(x_train_cf,y_train_cf)

train_score = model.score(x_train_cf,y_train_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Gradient Boosting')
escenario_list.append('Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_cf,y_test_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9510144153764015
Precisión:  0.8364636035717103  - Recall:  0.8362690152121698  - f1:  0.8356300103266742


### Outliers filtrados

In [177]:
model = GradientBoostingClassifier().fit(x_train_filtered,y_train_filtered)

train_score = model.score(x_train_filtered,y_train_filtered)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_filtered)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Gradient Boosting')
escenario_list.append('Outliers filtrados')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_filtered,y_test_filtered)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9575650759219089
Precisión:  0.8179777318090669  - Recall:  0.815778771858479  - f1:  0.816245763848645


### Outliers filtrados + Columnas filtradas

In [178]:
model = GradientBoostingClassifier().fit(x_train_filtered_cf,y_train_filtered_cf)

train_score = model.score(x_train_filtered_cf,y_train_filtered_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_filtered_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('Gradient Boosting')
escenario_list.append('Outliers filtrados + Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_filtered_cf,y_test_filtered_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.9545824295010846
Precisión:  0.815737387189836  - Recall:  0.8141520943472956  - f1:  0.8142121397386144


## AdaBoost

### Original

In [179]:
model = AdaBoostClassifier().fit(x_train,y_train)

train_score = model.score(x_train,y_train)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('AdaBoost')
escenario_list.append('Original')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test,y_test)
test_score_list.append(test_score)

Score de entrenamiento: 
0.48932194340630003
Precisión:  0.466329269653537  - Recall:  0.47598078462770216  - f1:  0.46037225734615345


### Columnas filtradas

In [180]:
model = AdaBoostClassifier().fit(x_train_cf,y_train_cf)

train_score = model.score(x_train_cf,y_train_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('AdaBoost')
escenario_list.append('Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_cf,y_test_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.4766417512012814
Precisión:  0.4560695359175033  - Recall:  0.4643714971977582  - f1:  0.44451060178342994


### Outliers filtrados

In [181]:
model = AdaBoostClassifier().fit(x_train_filtered,y_train_filtered)

train_score = model.score(x_train_filtered,y_train_filtered)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_filtered)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('AdaBoost')
escenario_list.append('Outliers filtrados')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_filtered,y_test_filtered)
test_score_list.append(test_score)

Score de entrenamiento: 
0.47071583514099785
Precisión:  0.4516785541493694  - Recall:  0.4591297275315169  - f1:  0.4306611273314511


### Outleirs filtrados + Columnas filtradas

In [182]:
model = AdaBoostClassifier().fit(x_train_filtered_cf,y_train_filtered_cf)

train_score = model.score(x_train_filtered_cf,y_train_filtered_cf)

print('Score de entrenamiento: ')
print(train_score)

y_pred = model.predict(x_test_filtered_cf)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test_filtered_cf, y_pred, average='weighted')
print('Precisión: ', precision, ' - Recall: ', recall, ' - f1: ', fscore)
######
modelo_list.append('AdaBoost')
escenario_list.append('Outliers filtrados + Columnas filtradas')
train_score_list.append(train_score)
precision_list.append(precision)
recall_list.append(recall)
fscore_list.append(fscore)
test_score = model.score(x_test_filtered_cf,y_test_filtered_cf)
test_score_list.append(test_score)

Score de entrenamiento: 
0.47369848156182215
Precisión:  0.44484405753786627  - Recall:  0.4668564457096381  - f1:  0.443178346049311


In [184]:
df_resultados = pd.DataFrame(list(zip(modelo_list,escenario_list,train_score_list,test_score_list,precision_list,recall_list,fscore_list)),columns=['Modelo','Escenario','Score entrenamiento','Score testeo','Precisión','Recall','F1 Score'])
df_resultados = df_resultados.sort_values('Score testeo',ascending=False)
df_resultados

Unnamed: 0,Modelo,Escenario,Score entrenamiento,Score testeo,Precisión,Recall,F1 Score
22,Support Vector Machines,Outliers filtrados,0.999187,0.913379,0.913343,0.913379,0.913223
23,Support Vector Machines,Columnas filtradas,0.999187,0.910939,0.910862,0.910939,0.91073
10,Knn,Outliers filtrados,0.94333,0.900773,0.902805,0.900773,0.900877
30,Extra Trees,Outliers filtrados,0.999187,0.900366,0.902287,0.900366,0.900269
11,Knn,Outliers filtrados + Columnas filtradas,0.940347,0.893859,0.895623,0.893859,0.893894
31,Extra Trees,Outliers filtrados + Columnas filtradas,0.999187,0.891419,0.894352,0.891419,0.891436
28,Extra Trees,Original,0.999199,0.888711,0.88969,0.888711,0.888129
29,Extra Trees,Columnas filtradas,0.999199,0.888311,0.88988,0.888311,0.887989
16,Random Forest,Original,0.999199,0.876701,0.876712,0.876701,0.875722
17,Random Forest,Columnas filtradas,0.999199,0.869896,0.87018,0.869896,0.868857
