In [1]:
## IMPORTS ##
import scipy.sparse
import numpy as np
import matplotlib.pyplot as pyplot
import sklearn.metrics as mtc
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import BernoulliRBM
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import TruncatedSVD
from sklearn.model_selection import train_test_split
from sklearn.multioutput import ClassifierChain

In [86]:
## FUNCIONES ##

class TFM:
    simplificador= False
    def __init__(self,path):
        self.X_tr = scipy.sparse.load_npz(path+'X_tr.npz')
        self.X_tst = scipy.sparse.load_npz(path+'X_tst.npz')
        self.y_tr = np.load(path+'y_tr.npy')
        self.y_tst = np.load(path+'y_tst.npy')
        # Se binarizan las caracteristicas 
        self.y_tr=MultiLabelBinarizer().fit_transform(self.y_tr)
        self.y_tst=MultiLabelBinarizer().fit_transform(self.y_tst)
        # Escalado 
        scaler = MaxAbsScaler().fit(self.X_tr)
        self.X_tr=scaler.transform(self.X_tr)
        self.X_tst=scaler.transform(self.X_tst)
        print ('Dataset cargado y preprocesado')

    def reduce_features(self,num_data):
        columns=np.array([])
        for i in range(np.shape(self.X_tr)[1]):
            if self.X_tr[:,i].count_nonzero()> num_data:
                columns=np.append(columns,i)
        np.savetxt('features'+num_data+'.txt',columns,delimiter=',')
        print('Las características han sido reducidas')

    def simplify_dataset(self,features,tr_size):
        self.X_tr_simpli=self.X_tr
        self.y_tr_simpli=self.y_tr
        columns=np.loadtxt('features'+features+'.txt',delimiter=',')
        self.X_tr_simpli=self.X_tr[:,columns]
        self.X_tr_simpli, __, self.y_tr_simpli, __ = train_test_split(self.X_tr_simpli, self.y_tr_simpli, test_size=(1-tr_size), random_state=42)
        self.simplificador= True
        print('Simplificación finalizada')

    def grid_search_cv(self,classifier,parameters,metodo):
        if (metodo == 0):
            classif = OneVsRestClassifier(classifier)
        elif (metodo==1):
            classif = ClassifierChain(classifier)
        model_tunning = GridSearchCV(classif, param_grid=parameters,cv=4)
        if (self.simplificador):
            model_tunning.fit(X_tr_simpli, y_tr_simpli)
        else :
            model_tunning.fit(X_tr, y_tr)
        print (model_tunning.best_score_)
        print (model_tunning.best_params_)
        self.modelo = model_tunning

    def mlp(self):
        clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(300,200,100,50), random_state=0)
        if (simplificador):
            clf.fit(self.X_tr_simpli, self.y_tr_simpli)
        else: 
            clf.fit(self.X_tr, self.y_tr)
        self.modelo = clf

    def metrics(self):
        y_pred=self.modelo.predict(self.X_tst)
        accuracy=mtc.accuracy_score(self.y_tst,y_pred)
        hamming=mtc.hamming_loss(self.y_tst,y_pred)
        precision=mtc.precision_score(self.y_tst,y_pred,average='micro')
        print("Total accuracy: ", accuracy)
        print("Hamming loss: ", hamming)
        print("Precision: ", precision)
        print(mtc.classification_report(self.y_tst,y_pred))
        print("Accuracy per class:")
        aux=0
        for i in range(np.shape(y_pred)[1]):
            print ("Class " ,i,": " ,mtc.accuracy_score(self.y_tst[:,i],y_pred[:,i]))
            aux=aux+mtc.accuracy_score(self.y_tst[:,i],y_pred[:,i])
        print("Accuracy media: ",aux/37)


In [87]:
#PRIMERA SOLUCIÓN 
#Variables a usar 
path_string='../../Datasets/dataset/'
non_zero_data='250'
training_size=0.1
clasificador=LogisticRegression()
metod=0
parameters = {
    "estimator__C": [1],
    "estimator__solver": ["liblinear"],
    "estimator__tol": [0.01],
    "estimator__class_weight":[None]
}
clasificacion= TFM(path_string)

Dataset cargado y preprocesado


In [88]:
#clasificacion.reduce_features(10000)
clasificacion.simplify_dataset('3000',0.1)


Simplificación finalizada


In [None]:
#SEGUNDA SOLUCIÓN
#tunning 
parameters = {
    "base_estimator__C": [1,10,100,1000],
    "base_estimator__solver": ["liblinear"],
    "base_estimator__tol": [0.1,0.01,0.001,0.0001],
    "base_estimator__max_iter":[10000]
}

modelo2=classifier_chain_tunning(X_aux,y_aux,LogisticRegression(),parameters)

In [39]:
#metricas
metrics(X_tst,y_tst,modelo2)

Total accuracy:  0.4042672655811342
Hamming loss:  0.02954610983808064
Precision:  0.739946380697051
              precision    recall  f1-score   support

           0       0.83      0.67      0.74       109
           1       0.72      0.51      0.60       138
           2       0.75      0.66      0.70       224
           3       0.69      0.62      0.65       180
           4       0.75      0.69      0.72       177
           5       0.75      0.66      0.70       262
           6       0.63      0.53      0.57        55
           7       0.79      0.69      0.74       341
           8       0.60      0.53      0.56        91
           9       0.47      0.42      0.44        65
          10       0.74      0.67      0.70       181
          11       0.59      0.40      0.48        25
          12       0.68      0.59      0.63       172
          13       1.00      0.40      0.57        10
          14       0.92      0.71      0.80       173
          15       0.54      0.40 

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [6]:
#DEEP LEARNING
from sklearn.neural_network import MLPClassifier

In [13]:
#MLP
modelo3 = mlp(X_tr,y_tr)

In [14]:
#metricas
metrics(X_tst,y_tst,modelo3)

Total accuracy:  0.39191465468837733
Hamming loss:  0.030031716163103024
Precision:  0.7275437942601566
              precision    recall  f1-score   support

           0       0.83      0.61      0.71       109
           1       0.78      0.50      0.61       138
           2       0.75      0.68      0.71       224
           3       0.72      0.61      0.66       180
           4       0.81      0.67      0.73       177
           5       0.69      0.68      0.69       262
           6       0.62      0.65      0.64        55
           7       0.78      0.69      0.73       341
           8       0.61      0.48      0.54        91
           9       0.48      0.40      0.44        65
          10       0.68      0.72      0.70       181
          11       0.39      0.44      0.42        25
          12       0.63      0.55      0.59       172
          13       0.00      0.00      0.00        10
          14       0.85      0.77      0.81       173
          15       0.56      0.

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
