In [None]:
import os
import csv
import requests
from urllib.parse import urlparse
from requests.auth import HTTPBasicAuth

In [None]:
import csv

def replace_path_in_csv(input_csv_path: str, output_csv_path: str,
                        old_prefix: str = "/mnt/BB-S_STORAGE",
                        new_prefix: str = "https://lys-ai.it/recordings"):

    with open(input_csv_path, mode="r", newline="", encoding="utf-8") as infile, \
         open(output_csv_path, mode="w", newline="", encoding="utf-8") as outfile:

        reader = csv.reader(infile)
        writer = csv.writer(outfile)

        for row in reader:
            new_row = []
            for cell in row:
                # se la cella contiene old_prefix, la sostituiamo
                if old_prefix in cell:
                    # sostituiamo **tutte** le occorrenze
                    new_cell = cell.replace(old_prefix, new_prefix)
                else:
                    new_cell = cell
                new_row.append(new_cell)
            writer.writerow(new_row)

if __name__ == "__main__":
    input_path = "audio_anomali/ventola_229+.csv"
    output_path = "audio_anomali/ventola_ok_2.csv"
    replace_path_in_csv(input_path, output_path)
    print(f"File convertito salvato in {output_path}")


In [None]:
import os
from pydub import AudioSegment

# === CONFIGURAZIONE ===
cartella_input = "audio_anomali/doy"   # <-- cambia con la tua cartella
cartella_output = os.path.join(cartella_input, "output_chunks")
durata_chunk_ms = 1000  # 1 secondo in millisecondi

# === CREA CARTELLA OUTPUT SE NON ESISTE ===
os.makedirs(cartella_output, exist_ok=True)

# === PROCESSA TUTTI I FILE AUDIO ===
for filename in os.listdir(cartella_input):
    if filename.lower().endswith(('.wav', '.mp3', '.flac', '.ogg', '.m4a')):
        filepath = os.path.join(cartella_input, filename)
        audio = AudioSegment.from_file(filepath)

        durata_audio_ms = len(audio)

        if durata_audio_ms >= durata_chunk_ms:
            num_chunk = durata_audio_ms // durata_chunk_ms
            nome_base = os.path.splitext(filename)[0]

            for i in range(num_chunk):
                start = i * durata_chunk_ms
                end = start + durata_chunk_ms
                chunk = audio[start:end]
                
                output_filename = f"{nome_base}_chunk{i+1}.wav"
                output_path = os.path.join(cartella_output, output_filename)
                chunk.export(output_path, format="wav")

            print(f"✅ {filename} -> {num_chunk} chunk(s) da 1 secondo")
        else:
            print(f"⏭️ {filename} ignorato (meno di 1 secondo)")


In [None]:
import os
import csv
import requests
from urllib.parse import urlparse
from requests.auth import HTTPBasicAuth

def download_from_csv_list_raw(csv_paths, output_folder, username, password):
    os.makedirs(output_folder, exist_ok=True)

    for csv_path in csv_paths:
        print(f"\nProcesso il CSV: {csv_path}")

        with open(csv_path, newline='') as file:
            reader = csv.reader(file)
            
            for row in reader:
                if not row or not row[0].startswith("http"):
                    continue

                url = row[0]

                try:
                    # Analizza l'URL per estrarre il percorso
                    parsed_url = urlparse(url)
                    path_parts = parsed_url.path.strip("/").split("/")
                    filename = path_parts[-1]

                    # Crea la struttura di sottocartelle basata sul percorso
                    subfolder = "_".join(path_parts[:-1])  # Es: recordings_2025-205_FLAC_device_RSP1-MIC1
                    full_subfolder_path = os.path.join(output_folder, subfolder)
                    os.makedirs(full_subfolder_path, exist_ok=True)

                    out_path = os.path.join(full_subfolder_path, filename)

                    print(f"Scarico {url} in {out_path}...")
                    resp = requests.get(url, auth=HTTPBasicAuth(username, password))
                    resp.raise_for_status()

                    with open(out_path, "wb") as f:
                        f.write(resp.content)

                except Exception as e:
                    print(f"Errore durante il download di {url}: {e}")


In [None]:
csv_files = [
   "audio_anomali/ventola_ok_2.csv"]

output_folder = "audio_anomali/doy"
username = "milone"
password = "Neurone-pc"

download_from_csv_list_raw(csv_files, output_folder, username, password)

In [None]:
import os
import csv
import requests
from urllib.parse import urlparse
from requests.auth import HTTPBasicAuth

def download_from_csv_list(csv_paths, output_folder, username, password):
    os.makedirs(output_folder, exist_ok=True)

    for csv_path in csv_paths:
        print(f"\nProcesso il CSV: {csv_path}")

        with open(csv_path, newline='') as file:
            reader = csv.reader(file)
            lines = list(reader)

        # Salta le prime due righe (meta-header)
        data_lines = lines[2:]

        for row in data_lines:
            if len(row) < 1 or not row[0].startswith("http"):
                continue

            url = row[0]
            try:
                # Crea una struttura di cartelle coerente con il path del file
                parsed_url = urlparse(url)
                path_parts = parsed_url.path.strip("/").split("/")
                filename = path_parts[-1]
                subfolder = "_".join(path_parts[:-1])  # Es: recordings/2025-196/... => recordings_2025-196
                full_subfolder_path = os.path.join(output_folder, subfolder)
                os.makedirs(full_subfolder_path, exist_ok=True)

                out_path = os.path.join(full_subfolder_path, filename)

                print(f"Scarico {url} in {out_path}...")
                resp = requests.get(url, auth=HTTPBasicAuth(username, password))
                resp.raise_for_status()

                with open(out_path, "wb") as f:
                    f.write(resp.content)

            except Exception as e:
                print(f"Errore durante il download di {url}: {e}")


In [None]:
csv_files = [
    "esiti_pc12/1s_falsipositivi_2025-213.csv",
    "esiti_pc12/1s_falsipositivi_2025-216.csv",
    "esiti_pc12/1s_falsipositivi_2025-217.csv"]

output_folder = "audio_anomali/doy"
username = "milone"
password = "Neurone-pc"

download_from_csv_list(csv_files, output_folder, username, password)


In [None]:
download_from_csv(
    csv_path="falsi_positivi/1s_falsipositivi_2025-204.csv",       # cartella dove hai i CSV generati
    output_folder="audio_anomali",    # cartella dove vuoi salvare gli audio
    username = "milone",
    password = "Neurone-pc"
)

In [None]:
import os
from pydub import AudioSegment

# Percorso della cartella contenente i file
folder_path = "audio_anomali/ventola_1s"

# Cicla tutti i file nella cartella
for filename in os.listdir(folder_path):
    if filename.lower().endswith(".flac"):
        flac_path = os.path.join(folder_path, filename)
        wav_filename = os.path.splitext(filename)[0] + ".wav"
        wav_path = os.path.join(folder_path, wav_filename)

        # Carica il file FLAC e lo esporta in WAV
        audio = AudioSegment.from_file(flac_path, format="flac")
        audio.export(wav_path, format="wav")

        # Rimuove il file FLAC originale
        os.remove(flac_path)
        print(f"Convertito {filename} in {wav_filename} e rimosso il FLAC.")



In [None]:
import os
from pydub import AudioSegment

# Lista delle cartelle contenenti i file FLAC
folders = [
    "audio_anomali/doy/recordings_8c69c0b3-ae12-4552-9b11-0aaa0304a06d_recordings_2025-213_FLAC_device_RSP1-MIC1",
    "audio_anomali/doy/recordings_8c69c0b3-ae12-4552-9b11-0aaa0304a06d_recordings_2025-216_FLAC_device_RSP1-MIC1",
    "audio_anomali/doy/recordings_8c69c0b3-ae12-4552-9b11-0aaa0304a06d_recordings_2025-217_FLAC_device_RSP1-MIC1"
]

# Cicla tutte le cartelle nella lista
for folder_path in folders:
    # Controlla se la cartella esiste
    if not os.path.isdir(folder_path):
        print(f"Cartella non trovata: {folder_path}")
        continue

    # Cicla tutti i file nella cartella
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(".flac"):
            flac_path = os.path.join(folder_path, filename)
            wav_filename = os.path.splitext(filename)[0] + ".wav"
            wav_path = os.path.join(folder_path, wav_filename)

            try:
                # Carica il file FLAC e lo esporta in WAV
                audio = AudioSegment.from_file(flac_path, format="flac")
                audio.export(wav_path, format="wav")

                # Rimuove il file FLAC originale
                os.remove(flac_path)
                print(f"[{folder_path}] Convertito {filename} in {wav_filename} e rimosso il FLAC.")
            except Exception as e:
                print(f"Errore nella conversione di {filename} in {folder_path}: {e}")


In [None]:
import os

def count_flac_files(directory):
    count = 0
    for root, _, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(".flac"):
                count += 1
    return count

# Esempio di utilizzo
cartella = "audio_anomali/ventola_1s" # sostituisci con il tuo percorso
num_flac = count_flac_files(cartella)
print(f"Numero totale di file .flac in '{cartella}': {num_flac}")


In [None]:
import os
import pandas as pd
import soundfile as sf
import numpy as np

# Percorso CSV e cartella degli audio
csv_path = "esiti_1807/1s_esito_2025-198.csv"
audio_base_dir = "audio_anomali"

In [None]:
import os
import re

def estrai_date_uniche(cartella):
    pattern = r'\d{8}'  # Cerca una stringa di 8 cifre (es. 20250912)
    date_trovate = set()  # Per evitare duplicati

    for nome_file in os.listdir(cartella):
        match = re.search(pattern, nome_file)
        if match:
            data_raw = match.group()  # es. '20250912'
            if data_raw not in date_trovate:
                date_trovate.add(data_raw)

    # Ordina e formatta le date in formato leggibile (gg-mm-aaaa)
    date_formattate = sorted([
        f"{data[6:8]}-{data[4:6]}-{data[0:4]}" for data in date_trovate
    ])

    return date_formattate

# Esempio di uso:
cartella_audio = "audio_anomali/dataset_1s"
date_uniche = estrai_date_uniche(cartella_audio)

for data in date_uniche:
    print(data)


In [None]:
import os
import re
from datetime import datetime

def estrai_date_uniche_in_doy(cartella):
    pattern = r'\d{8}'  # Cerca stringa tipo '20250912'
    date_trovate = set()

    for nome_file in os.listdir(cartella):
        match = re.search(pattern, nome_file)
        if match:
            data_raw = match.group()
            if data_raw not in date_trovate:
                date_trovate.add(data_raw)

    # Ordina e converti in DOY
    date_doy = []
    for data in sorted(date_trovate):
        dt = datetime.strptime(data, "%Y%m%d")
        doy = dt.timetuple().tm_yday
        date_doy.append((data, doy))  # Puoi anche solo salvare `doy` se ti basta

    return date_doy

# Esempio di uso:
cartella_audio = "audio_anomali/dataset_1s"
date_con_doy = estrai_date_uniche_in_doy(cartella_audio)

for data_str, doy in date_con_doy:
    print(f"{data_str} → DOY {doy}")


In [None]:
# Leggi CSV saltando i meta-header
df = pd.read_csv(csv_path, skiprows=2)

# === OTTIMIZZAZIONE: indicizza tutti i file presenti ===
local_files = {}
for root, _, files in os.walk(audio_base_dir):
    for f in files:
        if f.lower().endswith('.flac'):
            local_files[f] = os.path.join(root, f)

# === Estrazione feature solo per i file trovati ===
records = []
for _, row in df.iterrows():
    url = row["url"]
    loss = row["loss"]
    filename = os.path.basename(url)
    file_path = local_files.get(filename)

    if file_path:
        try:
            data, sr = sf.read(file_path)
            duration = len(data) / sr
            rms = np.sqrt(np.mean(data**2))
            dbfs = 20 * np.log10(rms + 1e-9)

            records.append({
                "filename": filename,
                "loss": float(loss),
                "dbfs": dbfs,
                "duration": duration,
                "rms": rms,
                "path": file_path
            })
        except Exception as e:
            print(f"Errore nel processare {file_path}: {e}")

# Crea DataFrame
features_df = pd.DataFrame(records)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# === Filtro finale per falsi positivi sospetti ===
# Loss ≈ valore minimo accettato (<= 0.005)
# + audio silenzioso e poco energetico
sospetti_fp_finale = features_df[
    (features_df["loss"] <= 0.005) &
    (features_df["dbfs"] < -35) &
    (features_df["rms"] < 0.02)
].copy()

# === Grafico migliorato ===
plt.figure(figsize=(10, 6))
sns.scatterplot(
    data=sospetti_fp_finale,
    x="loss",
    y="dbfs",
    hue="rms",
    size="rms",
    palette="coolwarm",
    sizes=(20, 200),
    legend="brief"
)
plt.title("Falsi Positivi Sospetti (loss ≈ 0.002, bassa energia e volume)")
plt.xlabel("Loss (Errore di ricostruzione)")
plt.ylabel("Livello sonoro (dBFS)")
plt.grid(True)
plt.axhline(y=-35, color='gray', linestyle='--', label='Soglia dBFS')
plt.axvline(x=0.005, color='red', linestyle='--', label='Limite loss sospetto')
plt.legend()
plt.tight_layout()
plt.show()


# print(sospetti_fp.head())        # primi 5 sospetti
print("Potenziali falsi positivi:", len(sospetti_fp_finale), f" su:", len(features_df))
sospetti_fp_finale[['filename', 'loss', 'dbfs']] # solo colonne chiave