In [1]:
import warnings
import time
import pandas as pd
warnings.filterwarnings('ignore')
from embedding import (
    extract_lfcc_embeddings,
    extract_mfcc_embeddings,
    extract_cqt_embeddings,
    extract_cqcc_embeddings)

In [2]:
import time
import pandas as pd  # Assurez-vous d'importer pandas

def process_data(data_type, files, target):
    print(f'Started processing {data_type} data')

    extraction_methods = {
        'mfcc': extract_mfcc_embeddings,
        'lfcc': extract_lfcc_embeddings,
        #'cqcc': extract_cqcc_embeddings,
        'cqt': extract_cqt_embeddings,
    }

    for method_name, extraction_method in extraction_methods.items():
        print(f'Started processing {method_name} embeddings...')
        start_time = time.time()

        # Extraire les embeddings sans calcul supplémentaire
        embeddings = extraction_method(files, with_compute=False)
        df_embeddings = pd.DataFrame(embeddings)
        df_embeddings['target'] = target
        embeddings_file_name = f'{data_type}_{method_name}_embeddings.csv'
        df_embeddings.to_csv(f'../Data/all_features/{embeddings_file_name}', index=False)

        # Extraire les statistiques des embeddings
        embeddings_stats = extraction_method(files, with_compute=True, mean=True, variance=True, avg_diff=True)
        df_embeddings_stats = pd.DataFrame(embeddings_stats)
        df_embeddings_stats['target'] = target
        embeddings_stats_file_name = f'{data_type}_{method_name}_embeddings_stats.csv'
        df_embeddings_stats.to_csv(f'../Data/all_features/{embeddings_stats_file_name}', index=False)

        end_time = time.time()
        elapsed_time = end_time - start_time
        print(f'Finished processing {method_name} embeddings in {elapsed_time:.2f} seconds.')

    print(f'Finished processing {data_type} data')

# Boucle sur les ensembles de données
for data_type, data_path in [('train', '../Data/PA_CSV/train_data.csv'),
                             ('dev', '../Data/PA_CSV/dev_data.csv'),
                             ('eval', '../Data/PA_CSV/eval_data.csv')]:
    data = pd.read_csv(data_path)
    files = data['filepath'].values
    target = data['target'].values

    process_data(data_type, files, target)

Started processing eval data
Started processing cqt embeddings...


Preprocessing Audio: 100%|████████████████████| 134730/134730 [1:17:25<00:00, 29.00samples/s]


Finished processing cqt embeddings in 4675.88 seconds.
Finished processing eval data


In [2]:
def process_data(data_type, files, target):
    print(f'Started processing {data_type} data')

    extraction_methods = {
        'mfcc': extract_mfcc_embeddings,
        'lfcc': extract_lfcc_embeddings,
        # 'cqcc': extract_cqcc_embeddings,
        'cqt': extract_cqt_embeddings,
    }

    for method_name, extraction_method in extraction_methods.items():
        print(f'Started processing {method_name} embeddings...')
        start_time = time.time()

        embeddings = extraction_method(files, with_compute=False)
        embeddings_stats = extraction_method(files, with_compute=True, mean=True, variance=True, avg_diff=True)

        for embeddings_type, data in zip(['_stats'], [embeddings_stats , embeddings]):
            df = pd.DataFrame(data)
            df['target'] = target
            file_name = f'{data_type}_{method_name}_feats{embeddings_type}.csv'
            df.to_csv(f'../Data/all_features/{file_name}', index=False)

        end_time = time.time()
        elapsed_time = end_time - start_time
        print(f'Finished processing {method_name} embeddings in {elapsed_time:.2f} seconds.')

    print(f'Finished processing {data_type} data')

In [3]:
for data_type, data_path in [#('train', '../Data/PA_CSV/train_data.csv'),
                             #('dev', '../Data/PA_CSV/dev_data.csv'),
                             ('eval', '../Data/PA_CSV/eval_data.csv')]:
    data = pd.read_csv(data_path)
    files = data['filepath'].values
    target = data['target'].values

    process_data(data_type, files, target)

Started processing eval data
Started processing mfcc embeddings...


Preprocessing Audio Files: 100%|████████████████████| 134730/134730 [36:47<00:00, 61.03samples/s]
Preprocessing Audio Files: 100%|████████████████████| 134730/134730 [29:11<00:00, 76.93samples/s]


Finished processing mfcc embeddings in 3972.53 seconds.
Started processing lfcc embeddings...


Preprocessing Audio:  10%|██                  | 13671/134730 [03:06<27:29, 73.38samples/s]

KeyboardInterrupt: 