## Importar librerías e inicializar variables

In [256]:
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [257]:
############# HTRU_2 Dataset #########
names_HTRU = [
	'Mean_i_p',
	'Standard_deviation_i_p',
	'Excess_kurtosis_i_p',
	'Skewness_i_p',
	'Mean_DM-SNR_curve',
	'Standard_deviation_DM-SNR_curve',
	'Excess_kurtosis_DM-SNR_curve',
	'Skewness_DM-SNR_curve', 
	'Class'
]

data_HTRU = pd.read_csv("../Datasets/HTRU_2.csv", delimiter = ",", decimal = ".", names=names_HTRU)

## Pre-procesado de datos

In [276]:
############# HTRU_2 Dataset #########
target_HTRU = data_HTRU['Class']
train_HTRU = data_HTRU.drop('Class', axis=1)

# 1. [2, 3, 5, 6]
train1_HTRU = train_HTRU[[names_HTRU[2], names_HTRU[3], names_HTRU[5], names_HTRU[6]]]
X_train1_HTRU, X_test1_HTRU, y_train1_HTRU, y_test1_HTRU = train_test_split(train1_HTRU, target_HTRU, random_state=0, test_size=.2)

# 2. [1, 2, 5]
train2_HTRU = train_HTRU[[names_HTRU[1], names_HTRU[2], names_HTRU[5]]]
X_train2_HTRU, X_test2_HTRU, y_train2_HTRU, y_test2_HTRU = train_test_split(train2_HTRU, target_HTRU, random_state=0, test_size=.2)

# 3. [0, 2, 3, 5]
train3_HTRU = train_HTRU[[names_HTRU[0], names_HTRU[2], names_HTRU[3], names_HTRU[5]]]
X_train3_HTRU, X_test3_HTRU, y_train3_HTRU, y_test3_HTRU = train_test_split(train3_HTRU, target_HTRU, random_state=0, test_size=.2)

# 4. [0, 1, 2, 3, 4, 5, 6, 7]
train4_HTRU = train_HTRU[[names_HTRU[1], names_HTRU[2], names_HTRU[5]]]
X_train4_HTRU, X_test4_HTRU, y_train4_HTRU, y_test4_HTRU = train_test_split(train4_HTRU, target_HTRU, random_state=0, test_size=.2)

# 5. [2, 3, 5, 6]
train5_HTRU = train_HTRU[[names_HTRU[2], names_HTRU[3], names_HTRU[5], names_HTRU[6]]]
X_train5_HTRU, X_test5_HTRU, y_train5_HTRU, y_test5_HTRU = train_test_split(train5_HTRU, target_HTRU, random_state=0, test_size=.2)

# 6. [0, 2, 3, 6]
train6_HTRU = train_HTRU[[names_HTRU[0], names_HTRU[2], names_HTRU[3], names_HTRU[6]]]
X_train6_HTRU, X_test6_HTRU, y_train6_HTRU, y_test6_HTRU = train_test_split(train6_HTRU, target_HTRU, random_state=0, test_size=.2)

# 7. [0, 3, 4, 5]
train7_HTRU = train_HTRU[[names_HTRU[0], names_HTRU[3], names_HTRU[4], names_HTRU[5]]]
X_train7_HTRU, X_test7_HTRU, y_train7_HTRU, y_test7_HTRU = train_test_split(train7_HTRU, target_HTRU, random_state=0, test_size=.2)

# 8. [0, 4, 5, 7]
train8_HTRU = train_HTRU[[names_HTRU[0], names_HTRU[4], names_HTRU[5], names_HTRU[7]]]
X_train8_HTRU, X_test8_HTRU, y_train8_HTRU, y_test8_HTRU = train_test_split(train8_HTRU, target_HTRU, random_state=0, test_size=.2)


## Clasificación

#### 1. Clasificación con Random Forest

In [259]:
rf_class_HTRU = RandomForestClassifier(n_estimators=100, random_state=42, max_depth=9)

In [260]:
rf_class_HTRU.fit(X_train1_HTRU, y_train1_HTRU)

accuracy_RF1 = accuracy_score(y_test1_HTRU, rf_class_HTRU.predict(X_test1_HTRU))
print('Clasificación featureset 1 con accuracy HTRU2: {:.3f}'.format(accuracy_RF1))

f1_RF1 = f1_score(y_test1_HTRU, rf_class_HTRU.predict(X_test1_HTRU), average='weighted')
print('Clasificación featureset 1 con f-measure HTRU2: {:.3f}'.format(f1_RF1))

total_RF1 = ((0.6 * f1_RF1) + (0.4 * accuracy_RF1)) / 2

Clasificación featureset 1 con accuracy HTRU2: 0.985
Clasificación featureset 1 con f-measure HTRU2: 0.985


In [261]:
rf_class_HTRU.fit(X_train2_HTRU, y_train2_HTRU)

accuracy_RF2 = accuracy_score(y_test2_HTRU, rf_class_HTRU.predict(X_test2_HTRU))
print('Clasificación featureset 2 con accuracy HTRU2: {:.3f}'.format(accuracy_RF2))

f1_RF2 = f1_score(y_test2_HTRU, rf_class_HTRU.predict(X_test2_HTRU), average='weighted')
print('Clasificación featureset 2 con f-measure HTRU2: {:.3f}'.format(f1_RF2))

total_RF2 = ((0.6 * f1_RF2) + (0.4 * accuracy_RF2)) / 2

Clasificación featureset 2 con accuracy HTRU2: 0.984
Clasificación featureset 2 con f-measure HTRU2: 0.984


In [262]:
rf_class_HTRU.fit(X_train3_HTRU, y_train3_HTRU)

accuracy_RF3 = accuracy_score(y_test3_HTRU, rf_class_HTRU.predict(X_test3_HTRU))
print('Clasificación featureset 3 con accuracy HTRU2: {:.3f}'.format(accuracy_RF3))

f1_RF3 = f1_score(y_test3_HTRU, rf_class_HTRU.predict(X_test3_HTRU), average='weighted')
print('Clasificación featureset 3 con f-measure HTRU2: {:.3f}'.format(f1_RF3))

total_RF3 = ((0.6 * f1_RF3) + (0.4 * accuracy_RF3)) / 2

Clasificación featureset 3 con accuracy HTRU2: 0.984
Clasificación featureset 3 con f-measure HTRU2: 0.984


In [263]:
rf_class_HTRU.fit(X_train4_HTRU, y_train4_HTRU)

accuracy_RF4 = accuracy_score(y_test4_HTRU, rf_class_HTRU.predict(X_test4_HTRU))
print('Clasificación featureset 4 con accuracy HTRU2: {:.3f}'.format(accuracy_RF4))

f1_RF4 = f1_score(y_test4_HTRU, rf_class_HTRU.predict(X_test4_HTRU), average='weighted')
print('Clasificación featureset 4 con f-measure HTRU2: {:.3f}'.format(f1_RF4))

total_RF4 = ((0.6 * f1_RF4) + (0.4 * accuracy_RF4)) / 2

Clasificación featureset 4 con accuracy HTRU2: 0.984
Clasificación featureset 4 con f-measure HTRU2: 0.984


In [278]:
rf_class_HTRU.fit(X_train5_HTRU, y_train5_HTRU)

accuracy_RF5 = accuracy_score(y_test5_HTRU, rf_class_HTRU.predict(X_test5_HTRU))
print('Clasificación featureset 5 con accuracy HTRU2: {:.3f}'.format(accuracy_RF5))

f1_RF5 = f1_score(y_test5_HTRU, rf_class_HTRU.predict(X_test5_HTRU), average='weighted')
print('Clasificación featureset 5 con f-measure HTRU2: {:.3f}'.format(f1_RF5))

total_RF5 = ((0.6 * f1_RF5) + (0.4 * accuracy_RF5)) / 2

Clasificación featureset 5 con accuracy HTRU2: 0.986
Clasificación featureset 5 con f-measure HTRU2: 0.985


In [279]:
rf_class_HTRU.fit(X_train6_HTRU, y_train6_HTRU)

accuracy_RF6 = accuracy_score(y_test6_HTRU, rf_class_HTRU.predict(X_test6_HTRU))
print('Clasificación featureset 6 con accuracy HTRU2: {:.3f}'.format(accuracy_RF6))

f1_RF6 = f1_score(y_test6_HTRU, rf_class_HTRU.predict(X_test6_HTRU), average='weighted')
print('Clasificación featureset 6 con f-measure HTRU2: {:.3f}'.format(f1_RF6))

total_RF6 = ((0.6 * f1_RF6) + (0.4 * accuracy_RF6)) / 2

Clasificación featureset 6 con accuracy HTRU2: 0.985
Clasificación featureset 6 con f-measure HTRU2: 0.984


In [280]:
rf_class_HTRU.fit(X_train7_HTRU, y_train7_HTRU)

accuracy_RF7 = accuracy_score(y_test7_HTRU, rf_class_HTRU.predict(X_test7_HTRU))
print('Clasificación featureset 7 con accuracy HTRU2: {:.3f}'.format(accuracy_RF7))

f1_RF7 = f1_score(y_test7_HTRU, rf_class_HTRU.predict(X_test7_HTRU), average='weighted')
print('Clasificación featureset 7 con f-measure HTRU2: {:.3f}'.format(f1_RF7))

total_RF7 = ((0.6 * f1_RF7) + (0.4 * accuracy_RF7)) / 2

Clasificación featureset 7 con accuracy HTRU2: 0.979
Clasificación featureset 7 con f-measure HTRU2: 0.978


In [281]:
rf_class_HTRU.fit(X_train8_HTRU, y_train8_HTRU)

accuracy_RF8 = accuracy_score(y_test8_HTRU, rf_class_HTRU.predict(X_test8_HTRU))
print('Clasificación featureset 8 con accuracy HTRU2: {:.3f}'.format(accuracy_RF8))

f1_RF8 = f1_score(y_test8_HTRU, rf_class_HTRU.predict(X_test8_HTRU), average='weighted')
print('Clasificación featureset 8 con f-measure HTRU2: {:.3f}'.format(f1_RF8))

total_RF8 = ((0.6 * f1_RF8) + (0.4 * accuracy_RF8)) / 2

Clasificación featureset 8 con accuracy HTRU2: 0.978
Clasificación featureset 8 con f-measure HTRU2: 0.978


#### 2. Clasificaión con LAMDA-HAD

In [264]:
target_LAMDA_HAD = target_HTRU[8949:]

In [265]:
y_pred1 = pd.read_csv("results1_HTRU2.csv", header=None)

In [266]:
accuracy_LAMDA1 = accuracy_score(target_LAMDA_HAD, y_pred1)
print('Clasificación featureset 1 con accuracy HTRU2: {:.3f}'.format(accuracy_LAMDA1))

f1_LAMDA1 = f1_score(target_LAMDA_HAD, y_pred1, average='weighted')
print('Clasificación featureset 1 con f-measure HTRU2: {:.3f}'.format(f1_LAMDA1))

total_LAMDA1 = ((0.6 * f1_LAMDA1) + (0.4 * accuracy_LAMDA1)) / 2

Clasificación featureset 1 con accuracy HTRU2: 0.983
Clasificación featureset 1 con f-measure HTRU2: 0.982


In [267]:
y_pred2 = pd.read_csv("results2_HTRU2.csv", header=None)

In [268]:
accuracy_LAMDA2 = accuracy_score(target_LAMDA_HAD, y_pred2)
print('Clasificación featureset 2 con accuracy HTRU2: {:.3f}'.format(accuracy_LAMDA2))

f1_LAMDA2 = f1_score(target_LAMDA_HAD, y_pred2, average='weighted')
print('Clasificación featureset 2 con f-measure HTRU2: {:.3f}'.format(f1_LAMDA2))

total_LAMDA2 = ((0.6 * f1_LAMDA2) + (0.4 * accuracy_LAMDA2)) / 2

Clasificación featureset 2 con accuracy HTRU2: 0.981
Clasificación featureset 2 con f-measure HTRU2: 0.980


In [269]:
y_pred3 = pd.read_csv("results3_HTRU2.csv", header=None)

In [270]:
accuracy_LAMDA3 = accuracy_score(target_LAMDA_HAD, y_pred3)
print('Clasificación featureset 3 con accuracy HTRU2: {:.3f}'.format(accuracy_LAMDA3))

f1_LAMDA3 = f1_score(target_LAMDA_HAD, y_pred3, average='weighted')
print('Clasificación featureset 3 con f-measure HTRU2: {:.3f}'.format(f1_LAMDA3))

total_LAMDA3 = ((0.6 * f1_LAMDA3) + (0.4 * accuracy_LAMDA3)) / 2

Clasificación featureset 3 con accuracy HTRU2: 0.983
Clasificación featureset 3 con f-measure HTRU2: 0.982


In [271]:
y_pred4 = pd.read_csv("results4_HTRU2.csv", header=None)

In [272]:
accuracy_LAMDA4 = accuracy_score(target_LAMDA_HAD, y_pred4)
print('Clasificación featureset 4 con accuracy HTRU2: {:.3f}'.format(accuracy_LAMDA4))

f1_LAMDA4 = f1_score(target_LAMDA_HAD, y_pred4, average='weighted')
print('Clasificación featureset 4 con f-measure HTRU2: {:.3f}'.format(f1_LAMDA4))

total_LAMDA4 = ((0.6 * f1_LAMDA4) + (0.4 * accuracy_LAMDA4)) / 2

Clasificación featureset 4 con accuracy HTRU2: 0.947
Clasificación featureset 4 con f-measure HTRU2: 0.921


  'precision', 'predicted', average, warn_for)


In [282]:
y_pred5 = pd.read_csv("results5_HTRU2.csv", header=None)

In [283]:
accuracy_LAMDA5 = accuracy_score(target_LAMDA_HAD, y_pred5)
print('Clasificación featureset 5 con accuracy HTRU2: {:.3f}'.format(accuracy_LAMDA5))

f1_LAMDA5 = f1_score(target_LAMDA_HAD, y_pred5, average='weighted')
print('Clasificación featureset 5 con f-measure HTRU2: {:.3f}'.format(f1_LAMDA5))

total_LAMDA5 = ((0.6 * f1_LAMDA5) + (0.4 * accuracy_LAMDA5)) / 2

Clasificación featureset 5 con accuracy HTRU2: 0.983
Clasificación featureset 5 con f-measure HTRU2: 0.983


In [284]:
y_pred6 = pd.read_csv("results6_HTRU2.csv", header=None)

In [285]:
accuracy_LAMDA6 = accuracy_score(target_LAMDA_HAD, y_pred6)
print('Clasificación featureset 6 con accuracy HTRU2: {:.3f}'.format(accuracy_LAMDA6))

f1_LAMDA6 = f1_score(target_LAMDA_HAD, y_pred6, average='weighted')
print('Clasificación featureset 6 con f-measure HTRU2: {:.3f}'.format(f1_LAMDA6))

total_LAMDA6 = ((0.6 * f1_LAMDA6) + (0.4 * accuracy_LAMDA6)) / 2

Clasificación featureset 6 con accuracy HTRU2: 0.981
Clasificación featureset 6 con f-measure HTRU2: 0.981


In [286]:
y_pred7 = pd.read_csv("results7_HTRU2.csv", header=None)

In [287]:
accuracy_LAMDA7 = accuracy_score(target_LAMDA_HAD, y_pred7)
print('Clasificación featureset 7 con accuracy HTRU2: {:.3f}'.format(accuracy_LAMDA7))

f1_LAMDA7 = f1_score(target_LAMDA_HAD, y_pred7, average='weighted')
print('Clasificación featureset 7 con f-measure HTRU2: {:.3f}'.format(f1_LAMDA7))

total_LAMDA7 = ((0.6 * f1_LAMDA7) + (0.4 * accuracy_LAMDA7)) / 2

Clasificación featureset 7 con accuracy HTRU2: 0.979
Clasificación featureset 7 con f-measure HTRU2: 0.978


In [288]:
y_pred8 = pd.read_csv("results8_HTRU2.csv", header=None)

In [289]:
accuracy_LAMDA8 = accuracy_score(target_LAMDA_HAD, y_pred8)
print('Clasificación featureset 8 con accuracy HTRU2: {:.3f}'.format(accuracy_LAMDA8))

f1_LAMDA8 = f1_score(target_LAMDA_HAD, y_pred8, average='weighted')
print('Clasificación featureset 8 con f-measure HTRU2: {:.3f}'.format(f1_LAMDA8))

total_LAMDA8 = ((0.6 * f1_LAMDA8) + (0.4 * accuracy_LAMDA8)) / 2

Clasificación featureset 8 con accuracy HTRU2: 0.874
Clasificación featureset 8 con f-measure HTRU2: 0.902


## Selección

In [290]:
classif = ['RF', 'LAMDA']
class_sel = ''
f_sel = 0
clases = []
val_sel = []
mayor = 0


clases.append(0 if total_RF1 > total_LAMDA1 else 1)
val_sel.append(total_RF1 if total_RF1 > total_LAMDA1 else total_LAMDA1)

clases.append(0 if total_RF2 > total_LAMDA2 else 1)
val_sel.append(total_RF2 if total_RF2 > total_LAMDA2 else total_LAMDA2)

clases.append(0 if total_RF3 > total_LAMDA3 else 1)
val_sel.append(total_RF3 if total_RF3 > total_LAMDA3 else total_LAMDA3)

clases.append(0 if total_RF4 > total_LAMDA4 else 1)
val_sel.append(total_RF4 if total_RF4 > total_LAMDA4 else total_LAMDA4)

clases.append(0 if total_RF5 > total_LAMDA5 else 1)
val_sel.append(total_RF5 if total_RF5 > total_LAMDA5 else total_LAMDA5)

clases.append(0 if total_RF6 > total_LAMDA6 else 1)
val_sel.append(total_RF6 if total_RF6 > total_LAMDA6 else total_LAMDA6)

clases.append(0 if total_RF7 > total_LAMDA7 else 1)
val_sel.append(total_RF7 if total_RF7 > total_LAMDA7 else total_LAMDA7)

clases.append(0 if total_RF8 > total_LAMDA8 else 1)
val_sel.append(total_RF8 if total_RF8 > total_LAMDA8 else total_LAMDA8)

for i in range(0,8):
    if val_sel[i] > mayor:
        mayor = val_sel[i]
        class_sel = classif[clases[i]]
        f_sel = i + 1
        
print('Seleccionado: ',class_sel, f_sel)

Seleccionado:  RF 5
