In [2]:
# Importare le librerie necessarie
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KDTree
from sklearn.preprocessing import MinMaxScaler, PowerTransformer, RobustScaler, StandardScaler
from sklearn.metrics import accuracy_score, f1_score, fbeta_score, precision_score, recall_score

# 1. Caricamento del dataset
file_path = 'Data/features_3_sec.csv'
data = pd.read_csv(file_path)

# Stampa i nomi delle colonne per verificare
# print("Colonne nel DataFrame:", data.columns)

# 2. Separare le caratteristiche (X) e le etichette (y)
X = data.drop(columns=['filename', 'label'])  # Rimuovi filename e label
y = data['label']  # Etichette

# 3. Normalizzazione delle caratteristiche
# Standardizzare le caratteristiche
robust = RobustScaler()
standard = StandardScaler()
minmax = MinMaxScaler()
power = PowerTransformer()
scalers=[robust, standard, minmax, power]





ModuleNotFoundError: No module named 'pandas'

<h2> STANDARD KD-TREES


In [2]:
for scaler in scalers:
    X_scaled = scaler.fit_transform(X)

    # 4. Suddivisione del dataset in training e test set
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    # 5. Creazione del KD-Tree con i dati di addestramento
    kd_tree = KDTree(X_train, leaf_size=30)

    # 6. Funzione per trovare i vicini più prossimi
    def knn_with_kdtree(kd_tree, X_test, X_train, y_train, k=5):
        # Per ogni punto del test set, trova i k vicini più prossimi
        distances, indices = kd_tree.query(X_test, k=k)
        
        # Predici il genere musicale basato sui vicini
        predictions = []
        for neighbors in indices:
            # Trova la classe maggioritaria tra i vicini
            neighbor_labels = y_train.iloc[neighbors]
            predicted_label = neighbor_labels.mode()[0]
            predictions.append(predicted_label)
        
        return predictions

    # 7. Usare il KD-Tree per classificare i dati di test
    y_pred = knn_with_kdtree(kd_tree, X_test, X_train, y_train, k=5)

     # Calcolare l'accuratezza
    # Calcolare metriche
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')  # Macro per bilanciare le classi
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    f2 = fbeta_score(y_test, y_pred, beta=2, average='macro')  # F2-score con beta=2

    # Stampare i risultati
    print(f'           Scaler: {scaler}')
    print(f'Accuratezza: {accuracy:.2f}')
    print(f'Precision: {precision:.2f}')
    print(f'Recall: {recall:.2f}')
    print(f'F1-score: {f1:.2f}')
    print(f'F2-score: {f2:.2f}')
   # print(f'Hinge Loss: {loss}')
    print('------------------------------------------------------------')

           Scaler: RobustScaler()
Accuratezza: 0.88
Precision: 0.88
Recall: 0.88
F1-score: 0.88
F2-score: 0.88
------------------------------------------------------------
           Scaler: StandardScaler()
Accuratezza: 0.90
Precision: 0.90
Recall: 0.90
F1-score: 0.90
F2-score: 0.90
------------------------------------------------------------
           Scaler: MinMaxScaler()
Accuratezza: 0.91
Precision: 0.91
Recall: 0.91
F1-score: 0.91
F2-score: 0.91
------------------------------------------------------------
           Scaler: PowerTransformer()
Accuratezza: 0.90
Precision: 0.90
Recall: 0.90
F1-score: 0.90
F2-score: 0.90
------------------------------------------------------------


<h2> BALANCED KD-TREES


In [3]:
for scaler in scalers:
    X_scaled = scaler.fit_transform(X)

    # 4. Suddivisione del dataset in training e test set
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    # 5. Creazione del KD-Tree con i dati di addestramento
    kd_tree = KDTree(X_train, leaf_size=30)

    # 6. Funzione per trovare i vicini più prossimi
    def knn_with_balanced_kdtree(kd_tree, X_test, X_train, y_train, k=5):
        # Per ogni punto del test set, trova i k vicini più prossimi
        distances, indices = kd_tree.query(X_test, k=k)
        
        # Predici il genere musicale basato sui vicini
        predictions = []
        for neighbors in indices:
            # Trova la classe maggioritaria tra i vicini
            neighbor_labels = y_train.iloc[neighbors]
            predicted_label = neighbor_labels.mode()[0]
            predictions.append(predicted_label)
        
        return predictions

    # 7. Usare il KD-Tree per classificare i dati di test
    y_pred = knn_with_balanced_kdtree(kd_tree, X_test, X_train, y_train, k=5)

     # Calcolare l'accuratezza
    # Calcolare metriche
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')  # Macro per bilanciare le classi
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    f2 = fbeta_score(y_test, y_pred, beta=2, average='macro')  # F2-score con beta=2

    # Stampare i risultati
    print(f'           Scaler: {scaler}')
    print(f'Accuratezza: {accuracy:.2f}')
    print(f'Precision: {precision:.2f}')
    print(f'Recall: {recall:.2f}')
    print(f'F1-score: {f1:.2f}')
    print(f'F2-score: {f2:.2f}')
   # print(f'Hinge Loss: {loss}')
    print('------------------------------------------------------------')

           Scaler: RobustScaler()
Accuratezza: 0.88
Precision: 0.88
Recall: 0.88
F1-score: 0.88
F2-score: 0.88
------------------------------------------------------------
           Scaler: StandardScaler()
Accuratezza: 0.90
Precision: 0.90
Recall: 0.90
F1-score: 0.90
F2-score: 0.90
------------------------------------------------------------
           Scaler: MinMaxScaler()
Accuratezza: 0.91
Precision: 0.91
Recall: 0.91
F1-score: 0.91
F2-score: 0.91
------------------------------------------------------------
           Scaler: PowerTransformer()
Accuratezza: 0.90
Precision: 0.90
Recall: 0.90
F1-score: 0.90
F2-score: 0.90
------------------------------------------------------------


<h2> APPROXIMATE KD-TREES


In [4]:
for scaler in scalers:
    X_scaled = scaler.fit_transform(X)

    # 4. Suddivisione del dataset in training e test set
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    # 5. Creazione del KD-Tree con i dati di addestramento
    kd_tree = KDTree(X_train, leaf_size=30, metric='euclidean')

    # 6. Funzione per trovare i vicini più prossimi
    def knn_with_approx_kdtree(kd_tree, X_test, X_train, y_train, k=5):
        # Per ogni punto del test set, trova i k vicini più prossimi
        distances, indices = kd_tree.query(X_test, k=k)
        
        # Predici il genere musicale basato sui vicini
        predictions = []
        for neighbors in indices:
            # Trova la classe maggioritaria tra i vicini
            neighbor_labels = y_train.iloc[neighbors]
            predicted_label = neighbor_labels.mode()[0]
            predictions.append(predicted_label)
        
        return predictions

    # 7. Usare il KD-Tree per classificare i dati di test
    y_pred =knn_with_approx_kdtree(kd_tree, X_test, X_train, y_train, k=5)

     # Calcolare l'accuratezza
    # Calcolare metriche
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')  # Macro per bilanciare le classi
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    f2 = fbeta_score(y_test, y_pred, beta=2, average='macro')  # F2-score con beta=2

    # Stampare i risultati
    print(f'           Scaler: {scaler}')
    print(f'Accuratezza: {accuracy:.2f}')
    print(f'Precision: {precision:.2f}')
    print(f'Recall: {recall:.2f}')
    print(f'F1-score: {f1:.2f}')
    print(f'F2-score: {f2:.2f}')
   # print(f'Hinge Loss: {loss}')
    print('------------------------------------------------------------')


           Scaler: RobustScaler()
Accuratezza: 0.88
Precision: 0.88
Recall: 0.88
F1-score: 0.88
F2-score: 0.88
------------------------------------------------------------
           Scaler: StandardScaler()
Accuratezza: 0.90
Precision: 0.90
Recall: 0.90
F1-score: 0.90
F2-score: 0.90
------------------------------------------------------------
           Scaler: MinMaxScaler()
Accuratezza: 0.91
Precision: 0.91
Recall: 0.91
F1-score: 0.91
F2-score: 0.91
------------------------------------------------------------
           Scaler: PowerTransformer()
Accuratezza: 0.90
Precision: 0.90
Recall: 0.90
F1-score: 0.90
F2-score: 0.90
------------------------------------------------------------


<h2> DYAMIC KD-TREES


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, fbeta_score

class StratifiedKDTree:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.tree = self.build_tree(X, y)

    def build_tree(self, X, y, depth=0):
        if len(y) == 0:
            return None
        
        # Seleziona l'asse da dividere
        k = X.shape[1]  # Numero di dimensioni
        axis = depth % k
        
        # Ordina i punti in base all'asse selezionato
        sorted_indices = np.argsort(X[:, axis])
        X_sorted = X[sorted_indices]
        y_sorted = y.iloc[sorted_indices]

        # Calcola il punto medio
        median_index = len(y_sorted) // 2
        
        # Crea il nodo e costruisci i sotto-alberi
        return {
            'point': X_sorted[median_index],
            'label': y_sorted.iloc[median_index],
            'left': self.build_tree(X_sorted[:median_index], y_sorted[:median_index], depth + 1),
            'right': self.build_tree(X_sorted[median_index + 1:], y_sorted[median_index + 1:], depth + 1)
        }

    def query(self, point):
        return self._query(self.tree, point)

    def _query(self, node, point, depth=0):
        if node is None:
            return None

        # Calcola la distanza dall'attuale punto
        k = len(point)
        axis = depth % k
        
        next_branch = None
        opposite_branch = None
        
        # Controlla quale ramo esplorare
        if point[axis] < node['point'][axis]:
            next_branch = node['left']
            opposite_branch = node['right']
        else:
            next_branch = node['right']
            opposite_branch = node['left']

        # Ricerca nel ramo successivo
        best = self._query(next_branch, point, depth + 1)

        # Confronta con il nodo attuale
        if best is None or np.linalg.norm(point - node['point']) < np.linalg.norm(point - best['point']):
            best = node

        # Controlla se dobbiamo esplorare l'altro ramo
        if abs(point[axis] - node['point'][axis]) < np.linalg.norm(point - best['point']):
            candidate = self._query(opposite_branch, point, depth + 1)
            if candidate is not None and np.linalg.norm(point - candidate['point']) < np.linalg.norm(point - best['point']):
                best = candidate

        return best

# Carica il tuo dataset
file_path = 'Data/features_3_sec.csv'
data = pd.read_csv(file_path)

# Separare le caratteristiche e le etichette
X = data.drop(columns=['filename', 'label']).values
y = data['label']

# Definire i vari scaler
scalers = [StandardScaler(), MinMaxScaler(), RobustScaler()]

# Per ogni scaler, normalizzare i dati e costruire il KD-tree stratificato
for scaler in scalers:
    X_scaled = scaler.fit_transform(X)

    # Suddivisione del dataset in training e test set
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    # Costruzione del KD-tree stratificato
    kd_tree = StratifiedKDTree(X_train, y_train)

    # Predizioni
    y_pred = []
    for point in X_test:
        best = kd_tree.query(point)
        y_pred.append(best['label'])

    # Calcolare metriche
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')  # Macro per bilanciare le classi
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    f2 = fbeta_score(y_test, y_pred, beta=2, average='macro')  # F2-score con beta=2

    # Stampare i risultati
    print(f'Scaler: {scaler.__class__.__name__}')
    print(f'Accuratezza: {accuracy:.2f}')
    print(f'Precision: {precision:.2f}')
    print(f'Recall: {recall:.2f}')
    print(f'F1-score: {f1:.2f}')
    print(f'F2-score: {f2:.2f}')
    print('------------------------------------------------------------')


Scaler: StandardScaler
Accuratezza: 0.92
Precision: 0.92
Recall: 0.92
F1-score: 0.92
F2-score: 0.92
------------------------------------------------------------
Scaler: MinMaxScaler
Accuratezza: 0.93
Precision: 0.93
Recall: 0.93
F1-score: 0.93
F2-score: 0.93
------------------------------------------------------------
Scaler: RobustScaler
Accuratezza: 0.90
Precision: 0.90
Recall: 0.90
F1-score: 0.90
F2-score: 0.90
------------------------------------------------------------
