#### Goal: read files, run fourier transform, create some baseline model 

In [1]:
import numpy as np
import torch
import torchaudio
import matplotlib.pyplot as plt
import os

import pandas as pd
import librosa
import soundfile as sf
from IPython.display import Audio

In [2]:
dataset_paths = os.listdir('../data')
dataset_paths = [os.path.join('../data', path) for path in dataset_paths]

dfs = [] # add data frames to this list
for dataset_path in dataset_paths:
    audio_fps = os.listdir(os.path.join('../data', dataset_path))
    
    df = pd.DataFrame(audio_fps, columns=['filename'])
    df['filename'] = df['filename'].str.split('_')
    df = pd.DataFrame(df['filename'].tolist(), columns=['id', 'notsure', 'emotion', 'version'])

ValueError: 4 columns passed, passed data had 3 columns

In [11]:
fps = os.listdir(dataset_paths[0])

In [16]:
os.listdir(os.path.join(dataset_paths[0], fps[0]))

['Actor_17',
 'Actor_04',
 'Actor_03',
 'Actor_09',
 'Actor_22',
 'Actor_10',
 'Actor_14',
 'Actor_07',
 'Actor_21',
 'Actor_13',
 'Actor_19',
 'Actor_23',
 'Actor_11',
 'Actor_08',
 'Actor_02',
 'Actor_05',
 'Actor_16',
 'Actor_24',
 'Actor_20',
 'Actor_18',
 'Actor_12',
 'Actor_01',
 'Actor_06',
 'Actor_15']

In [7]:
crema = pd.DataFrame(crema_fps, columns=['filename'])
crema['filename'] = crema['filename'].str.split('_')
crema = pd.DataFrame(crema['filename'].tolist(), columns=['id', 'notsure', 'emotion', 'version'])
crema

Unnamed: 0,id,notsure,emotion,version
0,1082,TSI,HAP,XX.wav
1,1024,IEO,HAP,HI.wav
2,1028,ITH,NEU,XX.wav
3,1020,MTI,NEU,XX.wav
4,1038,IEO,ANG,MD.wav
...,...,...,...,...
7437,1070,IWW,ANG,XX.wav
7438,1072,ITH,HAP,XX.wav
7439,1065,TAI,DIS,XX.wav
7440,1064,MTI,HAP,XX.wav


In [24]:
dataset_paths

['../data/Ravdess', '../data/Tess', '../data/Crema', '../data/Savee']

In [18]:
os.listdir('../data/Crema/1082_TSI_HAP_XX.wav')

NotADirectoryError: [Errno 20] Not a directory: '../data/Crema/1082_TSI_HAP_XX.wav'

# Let's break down each data set that we have: Crema, Ravdess, Tess, and Savee so that we have some context on what we are dealing with 

In [3]:
dataset_paths = os.listdir('../data')
dataset_paths = [os.path.join('../data', path) for path in dataset_paths]

In [4]:
def create_paths(path):
    sub_dirs = os.listdir(path)
    
    concatenated_dirs = list()
    for sub_dir in sub_dirs:
        concatenated_dirs.append(os.path.join(path, sub_dir))
    
    return concatenated_dirs

In [5]:
tess_audio_paths = []
for path in create_paths('../data/Tess'):
    pths = create_paths(path)
    
    tess_audio_paths = tess_audio_paths + pths

In [6]:
crema_audio_paths = create_paths('../data/Crema')

In [7]:
savee_audio_paths = create_paths('../data/Savee')

In [8]:
ravdess_audio_paths = list()
for actor_dir in create_paths('../data/Ravdess/audio_speech_actors_01-24'):
    ravdess_audio_paths = ravdess_audio_paths + create_paths(actor_dir)

In [9]:
print('Number Tess Audio Files:', len(tess_audio_paths))
print('Number Crema Audio Files:', len(crema_audio_paths))
print('Number Savee Audio Files:', len(savee_audio_paths))
print('Number Ravdess Audio Files', len(ravdess_audio_paths))

Number Tess Audio Files: 2800
Number Crema Audio Files: 7442
Number Savee Audio Files: 480
Number Ravdess Audio Files 1440


In [6]:
crema.iloc[0, 3]

'XX.wav'

In [10]:
crema

Unnamed: 0,id,notsure,emotion,version,filename
0,1082,TSI,HAP,XX.wav,1082_TSI_HAP_XX.wav
1,1024,IEO,HAP,HI.wav,1024_IEO_HAP_HI.wav
2,1028,ITH,NEU,XX.wav,1028_ITH_NEU_XX.wav
3,1020,MTI,NEU,XX.wav,1020_MTI_NEU_XX.wav
4,1038,IEO,ANG,MD.wav,1038_IEO_ANG_MD.wav
...,...,...,...,...,...
7437,1070,IWW,ANG,XX.wav,1070_IWW_ANG_XX.wav
7438,1072,ITH,HAP,XX.wav,1072_ITH_HAP_XX.wav
7439,1065,TAI,DIS,XX.wav,1065_TAI_DIS_XX.wav
7440,1064,MTI,HAP,XX.wav,1064_MTI_HAP_XX.wav


In [11]:
i = 1# choose a row of the data frame
print(crema.iloc[i]['emotion'])
audio_path = os.path.join('..', 'data', 'Crema', crema['filename'].iloc[i])  # Replace with the path to your audio file
audio, sample_rate = librosa.load(audio_path, sr=None)

HAP


In [12]:
Audio(data=audio, rate=sample_rate)


In [13]:
np.unique(crema['emotion'], return_counts=True)

(array(['ANG', 'DIS', 'FEA', 'HAP', 'NEU', 'SAD'], dtype=object),
 array([1271, 1271, 1271, 1271, 1087, 1271]))

In [14]:
np.unique(crema['version'], return_counts=True)

(array(['HI.wav', 'LO.wav', 'MD.wav', 'X.wav', 'XX.wav'], dtype=object),
 array([ 455,  455,  455,    1, 6076]))

In [15]:
waveforms = []
sample_rates = []
for file in crema_fps:
    PATH = os.path.join('..', 'data', 'Crema', file)
    waveform, sample_rate = torchaudio.load(PATH)
    waveforms.append(waveform)
    sample_rates.append(sample_rate)

In [16]:
means = []
spikes_data = []
frequencies_data = []
for i in range(len(waveforms)):
    wf = waveforms[i]
    sample_rate = sample_rates[i]
    wv_arr = wf.numpy()[0]
    
    fft = np.fft.fft(wv_arr, norm='ortho')
    fs_shifted = np.linspace(-len(fft) // 2, len(fft) // 2, len(fft)) * sample_rate / len(fft)
    spikes = np.abs(np.fft.fftshift(fft))[len(fs_shifted)//2:]
    spikes_data.append(spikes)
    frequencies_data.append(fs_shifted[len(fs_shifted)//2:])
    means.append(np.mean(spikes))

In [17]:
out = []
for i in range(len(spikes_data)):
    a = np.array(spikes_data[i])
    b = np.array(frequencies_data[i])
    out.append(np.mean(a * b))

In [18]:
crema['mean'] = means

In [19]:
crema['']

KeyError: ''

In [None]:
crema[crema['emotion'] == 'ANG'].plot(kind='hist')

In [None]:
crema[crema['emotion'] == 'HAP'].plot(kind='hist')

In [None]:
crema[crema['emotion'] == 'NEU'].plot(kind='hist')

In [None]:
crema[crema['emotion'] == 'DIS'].plot(kind='hist')

In [None]:
crema[crema['emotion'] == 'FEA'].plot(kind='hist')

In [None]:
crema[crema['emotion'] == 'SAD'].plot(kind='hist')

In [None]:
# KNN model in time space and fourier space 
# index the frequencies with magnitudes 
# make some vector based off of the fourier transforms cluster and stuff 

# learn spectograms and short term fourier transforms !!!