# Setting Up

In [58]:
# importing libraries
import pandas as pd
import numpy as np
import torch
import tensorflow as tf
import librosa, librosa.display
from IPython.display import Audio
import matplotlib.pyplot as plt
import soundfile as sf
from pydub import AudioSegment
import os
from torch.utils.data import Dataset, DataLoader

# Importing Data

In [2]:
iemocap_emotion =  pd.read_csv('/kaggle/input/iemocap-emotion-speech-database/iemocap_full_dataset.csv')
iemocap_eng_french = pd.read_csv('/kaggle/input/iemocap-transcriptions-english-french/iemocapTrans.csv')

In [3]:
iemocap_emotion.emotion.unique(), iemocap_emotion.shape

(array(['neu', 'fru', 'xxx', 'sur', 'ang', 'hap', 'sad', 'exc', 'oth',
        'fea', 'dis'], dtype=object),
 (10039, 7))

In [4]:
iemocap_eng_french.emotion.unique(), iemocap_eng_french.shape

(array(['sad', 'fru', 'neu', 'hap', 'exc', 'sur', 'ang', 'fea', 'oth',
        'dis'], dtype=object),
 (10039, 10))

In [5]:
# using iemocap_eng_french dataset
dataset = iemocap_eng_french

In [6]:
dataset.columns

Index(['_id', 'activation', 'dominance', 'emotion', 'end_time', 'start_time',
       'titre', 'to_translate', 'translated', 'valence'],
      dtype='object')

In [7]:
dataset = dataset.drop(['_id', 'valence', 'translated', 'to_translate', 'start_time', 'end_time', 'dominance', 'activation'], axis=1)

In [8]:
dataset.columns

Index(['emotion', 'titre'], dtype='object')

In [9]:
# adding the file path to the dataframe
location = '/kaggle/input/iemocap-transcriptions-english-french/Iemocap_audio/iemocap_audio/IEMOCAP_wav/'

dataset['filepath'] = location + dataset['titre'] + '.wav'

In [10]:
# dataset.drop('titre', axis=1, inplace=True)

In [11]:
iemocap_eng_french_dataset = dataset.copy()

In [12]:
iemocap_eng_french_dataset.head()

Unnamed: 0,emotion,titre,filepath
0,sad,Ses02M_impro02_F000,/kaggle/input/iemocap-transcriptions-english-f...
1,sad,Ses02M_impro02_M000,/kaggle/input/iemocap-transcriptions-english-f...
2,sad,Ses02M_impro02_F001,/kaggle/input/iemocap-transcriptions-english-f...
3,sad,Ses02M_impro02_F002,/kaggle/input/iemocap-transcriptions-english-f...
4,sad,Ses02M_impro02_M001,/kaggle/input/iemocap-transcriptions-english-f...


In [13]:
iemocap_ravdess_dataset = dataset.copy()

# Data Preprocessing

In [14]:
def show_cat(df):
    print('sad', df.emotion.loc[df.emotion == 'sad'].count())
    print('fru', df.emotion.loc[df.emotion == 'fru'].count())
    print('neu', df.emotion.loc[df.emotion == 'neu'].count())
    print('hap', df.emotion.loc[df.emotion == 'hap'].count())
    print('exc', df.emotion.loc[df.emotion == 'exc'].count())
    print('sur', df.emotion.loc[df.emotion == 'sur'].count())
    print('ang', df.emotion.loc[df.emotion == 'ang'].count())
    print('fea', df.emotion.loc[df.emotion == 'fea'].count())
    print('oth', df.emotion.loc[df.emotion == 'oth'].count())
    print('dis', df.emotion.loc[df.emotion == 'dis'].count())

In [15]:
show_cat(dataset)

sad 1250
fru 2917
neu 1726
hap 656
exc 1976
sur 110
ang 1269
fea 107
oth 26
dis 2


In [16]:
# hecnce adding the datasets from ravdess-emotional-speech-audio dataset 

In [17]:
# loading avdess-emotional-speech-audio dataset

# emotion value dictionary
# 01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised

emotions = {'01': 'neu','02':'oth', '03':'hap', '04':'sad', '05':'ang', '06':'fea', '07':'dis', '08':'sur'}
for dirname, _, filenames in os.walk('/kaggle/input/ravdess-emotional-speech-audio'):
    for filename in filenames:
        file_path = os.path.join(dirname, filename)
        emotion = emotions[filename[6:8]]
        # print(emotion, filename[6:8], filename)
        new_row = pd.DataFrame({'emotion':[emotion],'titre':[filename[:-4]], 'filepath':[file_path]})
        iemocap_ravdess_dataset = pd.concat([iemocap_ravdess_dataset, new_row], ignore_index=True)

In [18]:
iemocap_ravdess_dataset['emotion'].unique()

array(['sad', 'fru', 'neu', 'hap', 'exc', 'sur', 'ang', 'fea', 'oth',
       'dis'], dtype=object)

In [19]:
show_cat(iemocap_ravdess_dataset)

sad 1634
fru 2917
neu 1918
hap 1040
exc 1976
sur 494
ang 1653
fea 491
oth 410
dis 386


In [20]:
iemocap_ravdess_dataset['titre']

0         Ses02M_impro02_F000
1         Ses02M_impro02_M000
2         Ses02M_impro02_F001
3         Ses02M_impro02_F002
4         Ses02M_impro02_M001
                 ...         
12914    03-01-06-01-01-02-07
12915    03-01-05-01-02-02-07
12916    03-01-04-02-01-01-07
12917    03-01-07-02-02-02-07
12918    03-01-05-02-01-02-07
Name: titre, Length: 12919, dtype: object

## Data Preprocessing to have a sequence of fixed lengths

In [21]:
def length_fixing(sample):
    
    # each input is defined to have a 1s (1000ms) length
    segment_length = 1000
    
    audio_file_path = sample['filepath']
    file_name = sample['titre']
    # print(file_name, audio_file_path)
    # loading audio using AudioSegment
    audio = AudioSegment.from_file(audio_file_path, format="wav")
    segments = [audio[i:i+segment_length] for i in range(0, len(audio), segment_length)]
    
    temp_df = pd.DataFrame(columns= ['emotion', 'titre', 'filepath'])
    # padding the last segment to match the fixed length
    last_seg_len = len(segments[-1])
    if last_seg_len < 1000:
        padding = AudioSegment.silent(duration=(segment_length - last_seg_len))
        segments[-1] += padding
    
    for i,segment in enumerate(segments):
            segment.export("/kaggle/working/"+ file_name +f"segment_{i}.wav", format="wav")
            new_record ={
                'emotion':[sample['emotion']],
                'titre' : [file_name +f"segment_{i}"],
                'filepath' : ["/kaggle/working/"+ file_name +f"segment_{i}.wav"]
            }
            temp_df = pd.concat([temp_df, pd.DataFrame(new_record)], ignore_index=True)    
    return temp_df

In [22]:
# segmenting iemocap_eng_french dataset
iemocap_eng_french_seg = pd.DataFrame(columns= ['emotion', 'titre', 'filepath'])
    
for _, row in iemocap_eng_french_dataset.iterrows():
    temp_df = length_fixing(row)
    iemocap_eng_french_seg = pd.concat([iemocap_eng_french_seg, temp_df], ignore_index=True)    

In [23]:
show_cat(iemocap_eng_french_seg)

sad 7261
fru 14546
neu 7598
hap 3142
exc 9885
sur 402
ang 6414
fea 418
oth 123
dis 6


In [24]:
iemocap_eng_french_seg.shape

(49795, 3)

In [25]:
iemocap_eng_french_seg.head()

Unnamed: 0,emotion,titre,filepath
0,sad,Ses02M_impro02_F000segment_0,/kaggle/working/Ses02M_impro02_F000segment_0.wav
1,sad,Ses02M_impro02_F000segment_1,/kaggle/working/Ses02M_impro02_F000segment_1.wav
2,sad,Ses02M_impro02_F000segment_2,/kaggle/working/Ses02M_impro02_F000segment_2.wav
3,sad,Ses02M_impro02_M000segment_0,/kaggle/working/Ses02M_impro02_M000segment_0.wav
4,sad,Ses02M_impro02_M000segment_1,/kaggle/working/Ses02M_impro02_M000segment_1.wav


In [26]:
iemocap_eng_french_seg = iemocap_eng_french_seg.drop_duplicates()
iemocap_eng_french_seg.shape

(49795, 3)

In [27]:
# segmenting iemocap_ravdess_dataset dataset
iemocap_ravdess_seg = pd.DataFrame(columns= ['emotion', 'titre', 'filepath'])
    
for _, row in iemocap_ravdess_dataset.iterrows():
    temp_df = length_fixing(row)
    iemocap_ravdess_seg = pd.concat([iemocap_ravdess_seg, temp_df], ignore_index=True)    

In [28]:
iemocap_ravdess_seg.shape

(61827, 3)

In [29]:
show_cat(iemocap_ravdess_seg)

sad 8845
fru 14546
neu 8368
hap 4712
exc 9885
sur 1946
ang 8084
fea 1982
oth 1761
dis 1698


In [30]:
iemocap_ravdess_seg.head()

Unnamed: 0,emotion,titre,filepath
0,sad,Ses02M_impro02_F000segment_0,/kaggle/working/Ses02M_impro02_F000segment_0.wav
1,sad,Ses02M_impro02_F000segment_1,/kaggle/working/Ses02M_impro02_F000segment_1.wav
2,sad,Ses02M_impro02_F000segment_2,/kaggle/working/Ses02M_impro02_F000segment_2.wav
3,sad,Ses02M_impro02_M000segment_0,/kaggle/working/Ses02M_impro02_M000segment_0.wav
4,sad,Ses02M_impro02_M000segment_1,/kaggle/working/Ses02M_impro02_M000segment_1.wav


In [31]:
iemocap_ravdess_seg = iemocap_ravdess_seg.drop_duplicates()
iemocap_ravdess_seg.shape

(55811, 3)

In [32]:
show_cat(iemocap_ravdess_seg)

sad 8053
fru 14546
neu 7983
hap 3927
exc 9885
sur 1174
ang 7249
fea 1200
oth 942
dis 852


## Resolving Class Imbalance

In [33]:
# backgound noise preprocessing

noise_dir= '/kaggle/input/bg-noise-dataset'
noise_files = os.listdir(noise_dir)
#print(noise_files)

def crop_noise(file_number, length):
    file_path = os.path.join(noise_dir, noise_files[file_number])
    audio = AudioSegment.from_file(file_path, format="wav", sr= 16000)
    cropped_audio = audio[0:length]
    
    return np.array(cropped_audio.get_array_of_samples())

        


In [34]:
# add augmented data for the fea and sur class
# data augmentation by adding 
#      --gaussian white noise
#      --random background noise
#to the audio clips in the fea and sur classes

def add_white_noice(data, noise_factor):
    noise = np.random.randn(len(data))
    noice_data = data + noise_factor * noise
    noice_data = noice_data.astype(type(data[0]))
    return noice_data

def add_bg_noice(data, noise_factor):
    
    random_noise_file = random.randint(0, 4)
    noise = crop_noise(random_noise_file, len(data))
    noice_data = data + noise_factor * noise
    noice_data = noice_data.astype(type(data[0]))
    return noice_data


In [35]:
import random

In [36]:
def resolve_imbalance_1(dataset):
    
    for _, row in dataset.loc[(dataset.emotion == 'sur') | (dataset.emotion == 'fea')].iterrows():
        signal, sr = librosa.load(row['filepath'])
        # print(row['titre'])
        # method = random.choice([add_bg_noice, add_white_noice])
        # print(method)
        noise_signal = add_white_noice(signal, 0.001)
        sf.write('/kaggle/working/' + row['titre'] + '-noise.wav', noise_signal, sr)
        new_record = {
            'emotion': [row['emotion']],
            'titre': [row['titre'] + '-noise'],
            'filepath': ['/kaggle/working/' + row['titre'] + '-noise.wav']
        }
        dataset = pd.concat([dataset, pd.DataFrame(new_record)], ignore_index=True)
    return dataset

In [37]:
def resolve_imbalance_2(dataset):
    for _, row in dataset.loc[(dataset.emotion == 'dis')].iterrows():
        signal, sr = librosa.load(row['filepath'])
        # print(row['titre'])
        # method = random.choice([add_bg_noice, add_white_noice])
        # print(method)
        noise_signal = add_white_noice(signal, 0.001)
        sf.write('/kaggle/working/' + row['titre'] + '-noise.wav', noise_signal, sr)
        new_record = {
            'emotion': [row['emotion']],
            'titre': [row['titre'] + '-noise'],
            'filepath': ['/kaggle/working/' + row['titre'] + '-noise.wav']
        }
        dataset = pd.concat([dataset, pd.DataFrame(new_record)], ignore_index=True)
    return dataset        

In [38]:
iemocap_eng_french_seg_balanced = resolve_imbalance_1(iemocap_eng_french_seg)
iemocap_eng_french_seg_balanced = resolve_imbalance_2(iemocap_eng_french_seg_balanced)

In [39]:
iemocap_eng_french_seg_balanced.shape

(50621, 3)

In [40]:
show_cat(iemocap_eng_french_seg_balanced)

sad 7261
fru 14546
neu 7598
hap 3142
exc 9885
sur 804
ang 6414
fea 836
oth 123
dis 12


In [41]:
iemocap_ravdess_seg_balanced = resolve_imbalance_1(iemocap_ravdess_seg)
iemocap_ravdess_seg_balanced = resolve_imbalance_2(iemocap_ravdess_seg_balanced)

In [42]:
iemocap_ravdess_seg_balanced.shape

(59037, 3)

In [43]:
show_cat(iemocap_ravdess_seg_balanced)

sad 8053
fru 14546
neu 7983
hap 3927
exc 9885
sur 2348
ang 7249
fea 2400
oth 942
dis 1704


## Preparing Train and Test datasets

In [44]:
# select iemocap_ravdess_seg_balanced as the final dataset
final_dataset = iemocap_ravdess_seg_balanced.copy()

In [45]:
# remove emotion type = other
final_dataset = final_dataset[~(final_dataset['emotion']=='oth')]

In [46]:
show_cat(final_dataset)

sad 8053
fru 14546
neu 7983
hap 3927
exc 9885
sur 2348
ang 7249
fea 2400
oth 0
dis 1704


In [47]:
# train and test data splitting
# let train dataset be a set of 1500 samples from each emotion category except disgust 
# take 1200 samples from disgust

emotions = final_dataset['emotion'].unique()
emotions

array(['sad', 'fru', 'neu', 'hap', 'exc', 'sur', 'ang', 'fea', 'dis'],
      dtype=object)

In [48]:
emotions = np.delete(emotions, -1)
emotions

array(['sad', 'fru', 'neu', 'hap', 'exc', 'sur', 'ang', 'fea'],
      dtype=object)

In [49]:
# train dataset
np.random.seed(42)
train_df = final_dataset[final_dataset['emotion']=='dis'].sample(1200)
train_df.shape

(1200, 3)

In [50]:
for emotion in emotions:
    train_df = pd.concat([train_df, pd.DataFrame(final_dataset[final_dataset['emotion']==emotion].sample(1500))], axis =0, ignore_index=True)
train_df.shape

(13200, 3)

In [51]:
show_cat(train_df)

sad 1500
fru 1500
neu 1500
hap 1500
exc 1500
sur 1500
ang 1500
fea 1500
oth 0
dis 1200


In [52]:
# test dataset
test_df = final_dataset.merge(train_df, how='left', indicator=True).query('_merge == "left_only"').drop(columns=['_merge'])
test_df.shape

(44895, 3)

In [53]:
show_cat(test_df)

sad 6553
fru 13046
neu 6483
hap 2427
exc 8385
sur 848
ang 5749
fea 900
oth 0
dis 504


In [54]:
# creating validation dataset
# 500 samples from each category
val_df = pd.DataFrame(columns= ['emotion', 'titre', 'filepath'])
for emotion in test_df['emotion'].unique():
    val_df = pd.concat([val_df, pd.DataFrame(test_df[test_df['emotion']==emotion].sample(500))], axis =0, ignore_index=True)
val_df.shape

(4500, 3)

In [55]:
val_df.head()

Unnamed: 0,emotion,titre,filepath
0,sad,Ses01F_impro06_F020segment_9,/kaggle/working/Ses01F_impro06_F020segment_9.wav
1,sad,Ses01M_script01_1_M004segment_3,/kaggle/working/Ses01M_script01_1_M004segment_...
2,sad,03-01-04-01-01-02-16segment_2,/kaggle/working/03-01-04-01-01-02-16segment_2.wav
3,sad,Ses01F_impro06_F028segment_4,/kaggle/working/Ses01F_impro06_F028segment_4.wav
4,sad,Ses05M_script02_2_F013segment_0,/kaggle/working/Ses05M_script02_2_F013segment_...


In [56]:
show_cat(val_df)

sad 500
fru 500
neu 500
hap 500
exc 500
sur 500
ang 500
fea 500
oth 0
dis 500


In [57]:
# saving train, test validation datasets to csv files
train_df.to_csv('/kaggle/working/train_dataset.csv', index=False)
test_df.to_csv('/kaggle/working/test_dataset.csv', index=False)
val_df.to_csv('/kaggle/working/validation_dataset.csv', index=False)

In [59]:
# train : val = 0.75 : 0.25

## Ceating Data Loaders

In [67]:
train_df

Unnamed: 0,emotion,titre,filepath
0,dis,03-01-07-01-01-02-01segment_2-noise,/kaggle/working/03-01-07-01-01-02-01segment_2-...
1,dis,03-01-07-02-02-02-13segment_0,/kaggle/working/03-01-07-02-02-02-13segment_0.wav
2,dis,03-01-07-01-02-02-18segment_3,/kaggle/working/03-01-07-01-02-02-18segment_3.wav
3,dis,03-01-07-01-02-01-06segment_3,/kaggle/working/03-01-07-01-02-01-06segment_3.wav
4,dis,03-01-07-01-02-01-21segment_3-noise,/kaggle/working/03-01-07-01-02-01-21segment_3-...
...,...,...,...
13195,fea,03-01-06-02-01-01-03segment_3,/kaggle/working/03-01-06-02-01-01-03segment_3.wav
13196,fea,Ses01F_script01_1_F003segment_1,/kaggle/working/Ses01F_script01_1_F003segment_...
13197,fea,03-01-06-02-01-01-14segment_0-noise,/kaggle/working/03-01-06-02-01-01-14segment_0-...
13198,fea,03-01-06-01-02-01-18segment_1-noise,/kaggle/working/03-01-06-01-02-01-18segment_1-...


In [68]:
# converting the target class into one-hot-encoded vectors
from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()

X_train = train_df.iloc[:,1:]
Y_train = lb.fit_transform(train_df.iloc[:,0])
X_val = val_df.iloc[:,1:]
Y_val = lb.fit_transform(val_df.iloc[:,0])

In [69]:
X_train

Unnamed: 0,titre,filepath
0,03-01-07-01-01-02-01segment_2-noise,/kaggle/working/03-01-07-01-01-02-01segment_2-...
1,03-01-07-02-02-02-13segment_0,/kaggle/working/03-01-07-02-02-02-13segment_0.wav
2,03-01-07-01-02-02-18segment_3,/kaggle/working/03-01-07-01-02-02-18segment_3.wav
3,03-01-07-01-02-01-06segment_3,/kaggle/working/03-01-07-01-02-01-06segment_3.wav
4,03-01-07-01-02-01-21segment_3-noise,/kaggle/working/03-01-07-01-02-01-21segment_3-...
...,...,...
13195,03-01-06-02-01-01-03segment_3,/kaggle/working/03-01-06-02-01-01-03segment_3.wav
13196,Ses01F_script01_1_F003segment_1,/kaggle/working/Ses01F_script01_1_F003segment_...
13197,03-01-06-02-01-01-14segment_0-noise,/kaggle/working/03-01-06-02-01-01-14segment_0-...
13198,03-01-06-01-02-01-18segment_1-noise,/kaggle/working/03-01-06-01-02-01-18segment_1-...


In [70]:
Y_train

array([[0, 1, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [75]:
# loading 
signal, sr = librosa.load(X_train['filepath'][0], sr = 16000)

In [77]:
signal

array([ 0.00163233,  0.00038021, -0.00178823, ..., -0.00056689,
        0.00076506,  0.00115427], dtype=float32)

In [80]:
# loading audio files

# creating a new temp dataframe
new_columns = [str(x) for x in range(16000)]
new_temp_df = pd.DataFrame(columns=new_columns)

In [81]:
new_temp_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15990,15991,15992,15993,15994,15995,15996,15997,15998,15999


In [84]:
def loading_audio(dataframe):
    df = pd.DataFrame(columns=['audio'])
    
    for _,row in dataframe.iterrows():
        
        signal, sr = librosa.load(row['filepath'], sr=16000)
        new_record = {'audio': [signal]}
        df = pd.concat([df, pd.DataFrame(new_record)], ignore_index=True)        
    
    return df

In [85]:
# loading X_train audios
df = loading_audio(X_train)
df.head()

Unnamed: 0,audio
0,"[0.0016323254, 0.0003802111, -0.0017882288, 0...."
1,"[3.410605e-13, -5.1159077e-13, 4.2632564e-14, ..."
2,"[0.053840984, 0.09640473, 0.08923358, 0.090047..."
3,"[4.303759e-06, 9.669321e-06, 4.5372103e-06, 5...."
4,"[-0.00044350032, 0.0003343075, -0.0010594779, ..."


In [86]:
new_temp_df[new_columns] = pd.DataFrame(df['audio'].to_list())

In [87]:
new_temp_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15990,15991,15992,15993,15994,15995,15996,15997,15998,15999
0,0.001632325,0.0003802111,-0.001788229,0.0002282235,-0.0009579119,-0.001114096,-0.001141883,-0.000363935,-0.001929688,-0.0004560118,...,0.000612,-0.000702,0.000548,0.001083,-0.000397,-0.0006539401,-0.001299472,-0.0005668874,0.0007650578,0.001154271
1,3.410605e-13,-5.115908e-13,4.263256e-14,9.663381e-13,-5.684342e-14,-3.979039e-13,4.547474e-13,6.536993e-13,5.684342e-14,4.547474e-13,...,-0.000359,-0.000327,-0.000331,-0.000408,-0.000378,-0.0003409013,-0.0003986303,-0.000340859,-0.0004368193,-0.0004773302
2,0.05384098,0.09640473,0.08923358,0.0900475,0.0762793,0.06934245,0.05341297,0.04031168,0.02077946,0.005914627,...,-1e-05,4e-06,-5e-06,-1.8e-05,-9e-06,-6.240222e-07,3.600795e-07,-2.397524e-07,1.767594e-07,-1.407661e-07
3,4.303759e-06,9.669321e-06,4.53721e-06,5.310477e-05,2.013476e-05,2.515933e-05,1.841144e-05,9.43304e-07,8.136445e-06,-1.680305e-06,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,
4,-0.0004435003,0.0003343075,-0.001059478,-0.00148112,0.0002245908,-0.0006602477,-0.001080954,-0.001647446,-0.0004297009,0.0009042085,...,0.000636,-0.000415,-0.00076,0.001184,0.000166,-0.0005535658,-0.0005499001,0.001059197,0.0001959695,0.0


In [88]:
new_temp_df.shape

(13200, 16000)

In [89]:
X_train_audio = new_temp_df.copy()

In [90]:
# loading X_val audios
df = loading_audio(X_val)
df.head()

Unnamed: 0,audio
0,"[0.013458252, 0.01977539, 0.025024414, 0.03298..."
1,"[3.0517578e-05, 0.0038757324, 0.0046691895, -0..."
2,"[0.0013462877, 0.0022339015, 0.0017648907, 0.0..."
3,"[0.006134033, 0.005340576, 0.0043945312, 0.002..."
4,"[0.0070495605, 0.007659912, 0.0078125, 0.00677..."


In [91]:
# creating a new temp dataframe
new_columns = [str(x) for x in range(16000)]
new_temp_df = pd.DataFrame(columns=new_columns)
new_temp_df[new_columns] = pd.DataFrame(df['audio'].to_list())

In [93]:
X_val_audio = new_temp_df.copy()

In [94]:
X_val_audio.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15990,15991,15992,15993,15994,15995,15996,15997,15998,15999
0,0.013458,0.019775,0.025024,0.03299,0.042389,0.04422,0.037689,0.027863,0.020294,0.018982,...,-0.008423,-0.008759,-0.009705,-0.010712,-0.012268,-0.01355,-0.013794,-0.014374,-0.015839,-0.018463
1,3.1e-05,0.003876,0.004669,-0.001068,-0.003662,0.002014,0.002625,-0.005615,-0.002502,0.001617,...,0.000153,0.000366,0.000427,0.0,-0.000488,-0.001343,-0.003021,-0.003387,-0.001801,-0.000732
2,0.001346,0.002234,0.001765,0.001663,0.001273,0.000972,0.000579,0.000303,-3.8e-05,-0.000438,...,6.5e-05,9e-06,-1e-06,1e-06,8e-06,6e-06,-3.8e-05,-5e-05,-4.4e-05,-6.8e-05
3,0.006134,0.005341,0.004395,0.00296,0.001617,0.000397,-0.000732,-0.001221,-0.001709,-0.001526,...,-0.021942,-0.020569,-0.022827,-0.018951,-0.00827,0.00058,0.005463,0.010468,0.016785,0.02066
4,0.00705,0.00766,0.007812,0.006775,0.005371,0.004028,0.002747,0.00177,0.001068,0.001068,...,0.014221,-0.011566,-0.028473,-0.031006,-0.024353,-0.020782,-0.024292,-0.031342,-0.04129,-0.052246


In [95]:
X_val_audio.shape

(4500, 16000)

In [96]:
# saving to csv
X_train_audio.to_csv('/kaggle/working/X_train_audio.csv', index=False)
X_val_audio.to_csv('/kaggle/working/X_validation_audio.csv', index=False)

# Model Building

In [108]:
# building transformers sequence model to predict the emotions
# one-to-one architecture
# refer kaggle notebook Emotion_model_Transformers_2

# Other

REFERENCES:
* https://towardsdatascience.com/audio-deep-learning-made-simple-sound-classification-step-by-step-cebc936bbe5

In [103]:
!cd /kaggle/working

In [107]:
FileLink(r'file.zip')

In [None]:
!zip -r file.zip /kaggle/working

In [104]:
from IPython.display import FileLink
FileLink(r'/kaggle/working/X_validation_audio.csv')

In [105]:
FileLink(r'/kaggle/working/X_train_audio.csv')