# **Clustering | Human Activity**

## **Load packages**

In [None]:
import pandas as pd
import os
import shutil
import numpy as np
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns


## **Load datasets**

## **Useful functions**

In [None]:
import os
import pandas as pd

def write_audio_paths_txt(directory_path: str, output_name: str):
    """
    This function writes the paths of audio files in a .txt file.
    
    Args:
        directory_path (str): Path to the directory where the audio files are located.
        output_name (str): The name of the output .txt file.
    """
    if not os.path.exists(directory_path):
        raise FileNotFoundError(f"The directory {directory_path} does not exist.")
    
    paths = []
    
    for root, _, files in os.walk(directory_path):
        for file in files:
            if file.endswith(".wav"):
                paths.append(os.path.join(root, file))
    
    output_file = f'./txt/{output_name}.txt'
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    
    with open(output_file, 'w') as f:
        for path in paths:
            f.write(f"{path.replace('\\', '/')}\n")
    
    print(f"Archivo: {output_file} creado")

def create_directories(base_directory):

    
    # Crear el directorio principal si no existe
    if not os.path.exists(base_directory):
        os.mkdir(base_directory)
    
    # Crear subdirectorios desde Author 1 hasta Author 91
    for i in range(1, 92):
        author_directory = os.path.join(base_directory, f"Author_{i}")
        if not os.path.exists(author_directory):
            os.mkdir(author_directory)
    
    print("Directorios creados exitosamente.")

def move_files_to_authors(source_directory, file_extension, destination_directory):
    for file_name in os.listdir(source_directory):
        if file_name.endswith(file_extension):
            author_number = int(file_name[2:4])
            author_folder = os.path.join(destination_directory, f"Author_{author_number}")
            
            if os.path.exists(author_folder):
                shutil.move(os.path.join(source_directory, file_name), author_folder)
                print(f"Movido: {file_name} -> {author_folder}")
            else:
                print(f"No existe la carpeta para {file_name}, revisa el nombre.")


Now we need to create the .txt with all the audio paths

In [19]:
directory = 'C:/Users/archive/AudioWAV' # pls download the .zip w the videos and place them in the 'videos' directory

paths =  [f'C:/Users/archive/AudioWAV/']
output_txt_name = "audio_paths"
write_audio_paths_txt(directory, output_txt_name)


Finished writing audio file paths to ./txt/audio_paths.txt


Ahora que tenemos todos los path de los videos almacenados en un `.txt`  podemos usar video_features para realizar la extracción de caracteristicas correspondiente.

Para esto primero clonamos el repositorio de video_features e instalamos las dependencias necesarias dentro de este directorio/repositorio:

```bash
git clone https://github.com/v-iashin/video_features.git
cd video_features

Para realizar la instalación de dependencias, necesitas tener anaconda/miniconda instalado

```bash
conda create -n video_features
conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
conda install -c conda-forge omegaconf scipy tqdm pytest opencv
conda install -c conda-forge av

Con esto hecho, ya se puede realizar la extracción de caracteristicas en la terminal con el siguiente comando:

```bash

python main.py  \ feature_type=vggish  \ device="cuda:0"   \ file_with_video_paths="../txt/audio_paths.txt"    \ on_extraction=save_numpy   \ output_path="../extraction/"

Ejecutando lo anterior, se crean archivos `.npy` dentro del directorio 'extraction'. Cada archivo le corresponde a la extracción de características de un video.

No obstante estan desordenados, es por eso que debemos ordenar por actor

In [None]:
create_directories("archive/AudioWAV")
create_directories("extraction/vggish")
move_files_to_authors("C:/Users/archive/AudioWAV", ".wav", "archive/AudioWAV")
move_files_to_authors("./extraction/vggish",".npy","extraction/vggish")