In [None]:
import warnings
import requests
from requests.auth import HTTPBasicAuth
from bs4 import BeautifulSoup
import io
import csv
from urllib.parse import urljoin

from aed_models import *

warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# === PARAMETRI MODELLO ===
sample_rate = 16000
duration = 1
seg_len = sample_rate * duration
threshold = 0.0020

In [None]:
# === FUNZIONI ===
def load_audio(file_like, target_length=seg_len):
    waveform, sr = torchaudio.load(file_like)
    if sr != sample_rate:
        resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=sample_rate)
        waveform = resampler(waveform)

    # Mono
    if waveform.shape[0] > 1:
        waveform = waveform.mean(dim=0, keepdim=True)

    original_length = waveform.shape[1]  # Lunghezza in campioni

    # Padding o trimming
    if original_length > target_length:
        waveform = waveform[:, :target_length]
        original_length = target_length
    else:
        pad_len = target_length - original_length
        waveform = F.pad(waveform, (0, pad_len))

    return waveform.squeeze(0), original_length  # [160000], valore reale


def run_inference(model, audio_tensor, original_length, threshold=threshold):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    model.eval()
    model.transform_tf = model.transform_tf.to(device)

    audio_tensor = audio_tensor.unsqueeze(0).to(device)  # [1, 160000]
    feature_vector = model.preprocessing(audio_tensor)   # [1, T, 640]

    # Calcolo numero di frame reali
    hop_length = model.transform_tf.hop_length
    real_t_bins = 1 + (original_length // hop_length)
    real_vector_array_size = real_t_bins - model.frames + 1

    with torch.no_grad():
        encoded = model.encoder(feature_vector)
        bottleneck = model.bottleneck(encoded)
        reconstructed = model.decoder(bottleneck)

    # Taglio alla lunghezza effettiva in feature (T)
    feature_vector = feature_vector[:, :real_vector_array_size, :]
    reconstructed = reconstructed[:, :real_vector_array_size, :]

    # Calcolo errore
    loss = F.mse_loss(reconstructed, feature_vector).item()
    return feature_vector.cpu(), reconstructed.cpu(), loss


def analyze_in_memory(url, username, password, model):
    resp = requests.get(url, auth=HTTPBasicAuth(username, password), stream=True)
    resp.raise_for_status()
    audio_data = io.BytesIO(resp.content)

    waveform, real_samples = load_audio(audio_data)
    _, _, loss = run_inference(model, waveform, real_samples)

    return loss

def list_links(url, username, password):
    print(f"Listo i link da: {url}")
    resp = requests.get(url, auth=HTTPBasicAuth(username, password))
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, 'html.parser')
    links = [a['href'] for a in soup.find_all('a', href=True)]
    print(f"Link trovati: {links}")
    return links

def gather_audio_urls(base_url, username, password):
    urls = []
    links = list_links(base_url, username, password)

    # Aggiunta 1: cerca direttamente i file FLAC in base_url
    for href in links:
        if href.lower().endswith(('.wav', '.flac')):
            urls.append(urljoin(base_url + "/", href))

    # Aggiunta 2: cerca anche nelle eventuali sottocartelle
    for href in links:
        if href.endswith('/'):  # è una cartella
            sub_url = urljoin(base_url + "/", href)
            sub_links = list_links(sub_url, username, password)
            for sub_href in sub_links:
                if sub_href.lower().endswith(('.wav', '.flac')):
                    urls.append(urljoin(sub_url, sub_href))

    return urls

def analyze_all(urls, username, password, model, model_type, csv_name):
    for base_url in urls:
        print(f"\n=== Analisi per: {base_url} ===")

        try:
            audio_urls = gather_audio_urls(base_url, username, password)
        except Exception as e:
            print(f"Errore nel raccogliere URL audio da {base_url}: {e}")
            continue

        if not audio_urls:
            print(f"Attenzione: nessun file audio trovato in {base_url}")
            continue

        results = []
        for url in audio_urls:
            print(f"\nAnalizzo: {url}")
            try:
                loss = analyze_in_memory(url, username, password, model)
                status = "Anomalo" if loss > threshold else "Normale"
                print(f" --> Risultato: {status} (MSE: {loss:.6f})")
                results.append((url, status, loss))
            except Exception as e:
                print(f"Errore su {url}: {e}")
                results.append((url, "Errore", None))

        # Filtra solo anomalie
        anomalies = [r for r in results if r[1] == "Anomalo"]

        # Estrai nome cartella finale dal base_url per usarlo nel nome del CSV
        # Usa l'intero path del base_url come nome file, sostituendo / con _
        # safe_path = base_url.replace("https://", "").replace("http://", "").rstrip("/")
        # folder_name = safe_path.replace("/", "_")  # Rende il nome file valido
        # csv_filename = f"esiti_{csv_name}/{model_type}_esito_{folder_name}.csv"
        # Estrai il giorno (es. 2025-196) dal path dell'URL
        try:
            giorno = base_url.split("/recordings/")[1].split("/")[0]
        except IndexError:
            print(f"Impossibile estrarre il giorno dal path: {base_url}")
            giorno = "unknown"

        csv_filename = f"esiti_{csv_name}/{model_type}_esito_{giorno}.csv"



        # Salva CSV
        try:
            with open(csv_filename, mode='w', newline='') as file:
                writer = csv.writer(file)
                writer.writerow(["url", "status", "loss"])
                for url, status, loss in anomalies:
                    loss_str = f"{loss:.6f}" if loss is not None else ""
                    writer.writerow([url, status, loss_str])
            print(f"\nSalvati {len(anomalies)} file anomali in '{csv_filename}'")

            # Inserisce intestazione personalizzata in cima al CSV
            prepend_metadata_to_csv(
                csv_filename=csv_filename,
                modello="NS V1.01S",
                tipo_file="FLAC",
                durata_segmento="1",
                verificato1="0",
                verificato2="1"
            )

        except Exception as e:
            print(f"Errore durante il salvataggio di {csv_filename}: {e}")


def prepend_metadata_to_csv(csv_filename, modello, tipo_file, durata_segmento, verificato1, verificato2):
    header = ["modello", "tipo_file", "durata_segmento", "verificato", "verificato"]
    metadata = [modello, tipo_file, durata_segmento, verificato1, verificato2]

    try:
        with open(csv_filename, mode='r') as original:
            original_lines = original.readlines()

        with open(csv_filename, mode='w', newline='') as modified:
            writer = csv.writer(modified)
            writer.writerow(header)
            writer.writerow(metadata)
            modified.writelines(original_lines)

        print(f"Meta-intestazione inserita in '{csv_filename}'")
    except Exception as e:
        print(f"Errore durante l'inserimento della meta-intestazione in {csv_filename}: {e}")


In [None]:
if __name__ == "__main__":
    # Carica il tuo modello
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model_path = "results_1s/best_model_1s.pth"
    model = NS_1()
    model.load_state_dict(torch.load(model_path, map_location=device))
    model_type = '1s'
    csv_name = "1807"

    base_urls = [
        "https://lys-ai.it/BB/4e500921-747d-450a-998d-96ceafdcf531/recordings/2025-201/FLAC/device/RSP1-MIC1",
        "https://lys-ai.it/BB/4e500921-747d-450a-998d-96ceafdcf531/recordings/2025-202/FLAC/device/RSP1-MIC1",
        "https://lys-ai.it/BB/4e500921-747d-450a-998d-96ceafdcf531/recordings/2025-203/FLAC/device/RSP1-MIC1"]


    username = "milone"
    password = "Neurone-pc"

    # Analizza tutti i file per ogni cartella
    analyze_all(base_urls, username, password, model, model_type, csv_name)


In [None]:
# import pandas as pd
# import warnings
# warnings.filterwarnings("ignore", category=FutureWarning)
# warnings.filterwarnings("ignore", category=UserWarning)
#
# # Percorso del file CSV
# csv_path = 'esiti/1s_esito_lys-ai.it_BB_4e500921-747d-450a-998d-96ceafdcf531_recordings_2025-196_FLAC_device_RSP1-MIC1.csv'  # Sostituisci con il percorso corretto del tuo file
#
# # Carica il CSV
# df = pd.read_csv(csv_path)
#
# # Stringhe da sostituire
# old_str = "https://lys-ai.it/BB/4e500921-747d-450a-998d-96ceafdcf531/"
# # old_str = "https://lys-ai.it/recordings/"
# new_str = "/home/LYS_STORAGE/BB/4e500921-747d-450a-998d-96ceafdcf531/"
#
# # Applica la sostituzione su tutte le celle di tipo stringa
# df = df.applymap(lambda x: x.replace(old_str, new_str) if isinstance(x, str) else x)
#
# # Salva il risultato su un nuovo file (opzionale)
# df.to_csv('196.csv', index=False)
#
# print("Sostituzione completata.")

In [9]:
import os
import csv
import requests
from urllib.parse import urlparse
from requests.auth import HTTPBasicAuth

def download_from_csv(csv_path, output_folder, username, password):
    os.makedirs(output_folder, exist_ok=True)

    print(f"\nProcesso il CSV: {csv_path}")

    with open(csv_path, newline='') as file:
        reader = csv.reader(file)
        lines = list(reader)

    # Salta le prime due righe (meta-header)
    data_lines = lines[2:]

    for row in data_lines:
        if len(row) < 1 or not row[0].startswith("http"):
            continue

        url = row[0]
        try:
            # Crea una struttura di cartelle coerente con il path del file
            parsed_url = urlparse(url)
            path_parts = parsed_url.path.strip("/").split("/")
            filename = path_parts[-1]
            subfolder = "_".join(path_parts[:-1])  # Es: recordings/2025-196/... => recordings_2025-196
            full_subfolder_path = os.path.join(output_folder, subfolder)
            os.makedirs(full_subfolder_path, exist_ok=True)

            out_path = os.path.join(full_subfolder_path, filename)

            print(f"Scarico {url} in {out_path}...")
            resp = requests.get(url, auth=HTTPBasicAuth(username, password))
            resp.raise_for_status()

            with open(out_path, "wb") as f:
                f.write(resp.content)

        except Exception as e:
            print(f"Errore durante il download di {url}: {e}")


In [None]:
download_from_csv(
    csv_path="esiti_1807/1s_esito_2025-197.csv",       # cartella dove hai i CSV generati
    output_folder="audio_anomali",    # cartella dove vuoi salvare gli audio
    username = "milone",
    password = "Neurone-pc"
)



Processo il CSV: esiti_1807/1s_esito_2025-197.csv
Scarico https://lys-ai.it/BB/4e500921-747d-450a-998d-96ceafdcf531/recordings/2025-197/FLAC/device/RSP1-MIC1/RSP1-MIC1_20250716_000005.flac in audio_anomali/BB_4e500921-747d-450a-998d-96ceafdcf531_recordings_2025-197_FLAC_device_RSP1-MIC1/RSP1-MIC1_20250716_000005.flac...
Scarico https://lys-ai.it/BB/4e500921-747d-450a-998d-96ceafdcf531/recordings/2025-197/FLAC/device/RSP1-MIC1/RSP1-MIC1_20250716_000007.flac in audio_anomali/BB_4e500921-747d-450a-998d-96ceafdcf531_recordings_2025-197_FLAC_device_RSP1-MIC1/RSP1-MIC1_20250716_000007.flac...
Scarico https://lys-ai.it/BB/4e500921-747d-450a-998d-96ceafdcf531/recordings/2025-197/FLAC/device/RSP1-MIC1/RSP1-MIC1_20250716_000008.flac in audio_anomali/BB_4e500921-747d-450a-998d-96ceafdcf531_recordings_2025-197_FLAC_device_RSP1-MIC1/RSP1-MIC1_20250716_000008.flac...
Scarico https://lys-ai.it/BB/4e500921-747d-450a-998d-96ceafdcf531/recordings/2025-197/FLAC/device/RSP1-MIC1/RSP1-MIC1_20250716_000009