<a href="https://colab.research.google.com/github/tommasosenatori/AIEA/blob/main/Bozza.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Librerie

In [None]:
# Importazione delle librerie necessarie

import os
import sys
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from IPython.display import Audio # serve per riprodurre i file audio, se proprio vogliamo

import librosa
import librosa.display # per diplayare gli spettrogrammi ma ancora non fatto

from collections import Counter

from sklearn.utils import shuffle
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Funzioni di supporto

In [None]:
# Misure delle immagini

H = 128
W = 128

In [None]:
# Funzione che estrae gli spettrogrammi mel

def extract_mel-spectogram(file_path, img_height=H, img_width=W){

    y, sr = librosa.load(file_path, sr=None)
    spect = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=img_height)
    spect_db = librosa.power_to_db(spect, ref=np.max)
    spect_norm = (spect_db - spect_db.min()) / (spect_db.max() - spect_db.min())
    spect_norm = spect_norm[:, :img_width]
    return spect_norm
}

In [None]:
# Funzione per visualizzare la matrice di confusione

def plot_confusion_matrix(y, y_pred):
    cm = confusion_matrix(y, y_pred)
    plt.figure(figsize=(20, 20))
    ax = plt.subplot()
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Purples)
    plt.colorbar()
    plt.title("Matrice di confusione")
    tick_marks = np.arange(len(labels))
    plt.xticks(tick_marks, labels, rotation=45)
    plt.yticks(tick_marks, labels)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    ax.title.set_fontsize(20)
    ax.xaxis.label.set_fontsize(16)
    ax.yaxis.label.set_fontsize(16)
    limit = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], 'd'), horizontalalignment="center",
                 color="white" if cm[i, j] > limit else "black")
    plt.show()

# Caricamento dati

In [None]:
# Montaggio di Google Drive

from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Caricamento delle etichette

dataset_path = '' # non posso farlo perché non riesco a scaricare il dataset per lo spazio OPSIE
instr_labels = sorted(os.listdir(dataset_path))
print(labels)

In [None]:
# Caricamento dei files

paths = []
labels = []
for instr in instr_labels:
  instr_path = os.listdir(os.path.join(dataset_path, instr))
  for filename in instr_path:
    paths.append(os.path.join(dataset_path, instr, filename))
    labels.append(instr)
print('Dataset caricato')

In [None]:
# Conteggio dei files per classe

counts = Counter(labels)

for classe, count in counts.items():
    print(f"{classe}: {count}")

In [None]:
# Visualizzazione del numero di audio per classe

plt.figure(figsize=(18, 12))
plt.bar(counts.keys(), counts.values())
plt.title('Audio per classe')
plt.xlabel('Classi')
plt.ylabel('Conteggio')
plt.xticks(rotation=45)
plt.show()

# Preparazione dati

In [None]:
# Creazione array per spettrogrammi ed etichette

X = []
y = []

for file_path, label in zip(paths, labels):
    spect = wav_to_spectrogram(file_path)
    X.append(spect)
    y.append(label)

X = np.array(X)
y = np.array(y)

# X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1) mi sta salendo il dubbio se devo farlo o no

In [None]:
# Splitting

seed = 42 # un clasico

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=seed)

In [None]:
# Encoding

y_train_enc = to_categorical(y_train, num_classes=len(class_names))
y_test_enc = to_categorical(y_test, num_classes=len(class_names))

# Modello

In [None]:
# Creazione modello -> ho chiesto una cosa basic a Chat perché non mi andava di pensarlo

input_shape = (H, W, 1)
num_classes = len(instr_labels)

model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=input_shape),
    MaxPooling2D((2,2)),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D((2,2)),
    Conv2D(128, (3,3), activation='relu'),
    MaxPooling2D((2,2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

In [None]:
# Compilazione modello

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Sommario modello

model.summary()

In [None]:
# Addestramento modello

epochs = 20 # a casissimo eh
batch_size = 32 # un clasico

history = model.fit(X_train, y_train_enc, validation_data=(X_test, y_test_enc), epochs=epochs, batch_size=batch_size)

# Valutazione

In [None]:
# Visualizzazione dell'accuratezza

plt.figure()
plt.plot(history.history['accuracy'], label='training accuracy')
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
# Visualizzazione della loss

plt.figure()
plt.plot(history.history['loss'], label='training loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.title('Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# Accuratezza sui file di test

test_loss, test_acc = model.evaluate(X_test, y_test_enc, verbose=0)
    print(f"Test accuracy: {test_acc:.2f}")

In [None]:
# Rapporto di classificazione


y_pred = model.predict(X_test)
y_pred_enc = np.argmax(y_pred, axis=1)
y_test_encoded = np.argmax(y_test_enc, axis=1)

print("Rapporto di classificazione:")
print(classification_report(y_test_encoded, y_pred_enc, target_names=np.unique(y)))

In [None]:
# Valutazione delle prestazioni attraverso matrice di confusione

plot_confusion_matrix(y_test_encoded, y_pred_enc)