In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn import tree
from scipy.stats import iqr
from scipy.stats import skew
from scipy.stats import kurtosis
from sklearn.model_selection import GridSearchCV

In [2]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeClassifier

In [None]:
from heapq import nlargest

In [118]:
class EnsembleFramework:
    
    def __init__(self, criterion = 'topk', layers = ['DecisionTreeClassifier()', 'KNeighborsClassifier()'], params = {'k' : [4, 1]}):
        self.criterion = criterion
        self.layers = layers
        self.params = params
        
    def run(self, X_train, y_train, X_test):
        classes = [list(set(y_train))] * len(X_test)
        self.classifiers = []
        for clf_name in self.layers:
            clf = eval(clf_name)
            clf.fit(X_train, y_train)
            self.classifiers.append(clf)
            
        if (self.criterion == 'topk'):
            for clf, k in zip(self.classifiers, self.params['k']):
                probs = clf.predict_proba(X_test)
                for i, series_probs, in enumerate(probs):
                    classes[i] = nlargest(k, classes[i], key = lambda x : series_probs[x])
        
        return classes  
    
    def accuracy(self, y_pred, y_test):
        classes = []
        for pred in y_pred:
            classes.append(*pred)
            
        return accuracy_score(classes, y_test)*100

In [120]:
valori_features = []
classi = []

path = "RelazioneTirocinio/"

# calcoliamo le features di ogni timeseries

with open(path + 'Swissex.meta.csv', 'r', encoding='utf-8') as dati:
    for row in dati:
        riga = row.strip().split(',')
        classe = int(riga[8])
        classi.append(classe)
        valori = np.array(riga[9:]).astype(np.float)
        media = np.mean(valori)
        mediana = np.median(valori)
        maxim = np.max(valori)
        minim = np.min(valori)
        std_dev = np.std(valori)
        rms = np.sqrt(np.mean(np.square(valori)))
        quantile = np.quantile(valori, 0.4)
        i_q_r = iqr(valori)
        simmetria = skew(valori)
        curtosi = kurtosis(valori)
        rang = maxim - minim
        features = [rang, maxim, std_dev, rms, media, minim, quantile, mediana, curtosi, simmetria, i_q_r] 
        valori_features.append(features)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(valori_features, classi, test_size = 0.3, random_state = 100)

In [121]:
clf = EnsembleFramework(layers = ['DecisionTreeClassifier()', 'KNeighborsClassifier()', 'GaussianNB()'], params = {'k' : [5, 3, 1]})
y_pred = clf.run(np.array(X_train), np.array(y_train), np.array(X_test))
print(y_pred)

[[4], [1], [9], [7], [3], [10], [7], [2], [4], [1], [10], [10], [7], [10], [8], [5], [7], [2], [1], [7], [7], [10], [5], [1], [2], [5], [7], [10], [9], [0], [7], [9], [7], [7], [1], [10], [2], [10], [7], [10], [7], [9], [2], [6], [10], [1], [3], [6], [1], [1], [1], [2], [8], [1], [9], [7], [9], [9], [7], [1], [7], [9], [9], [1], [9], [1], [7], [0], [3], [4], [7], [8], [7], [7], [7], [7], [1], [10], [4], [5], [7], [0], [10], [10], [7], [7], [10], [7], [9], [10], [1], [0], [6], [10], [7], [9], [2], [7], [7], [0], [3], [8], [8], [10]]


In [122]:
clf.accuracy(y_pred, y_test)

68.26923076923077