In [None]:
! pip install -q kaggle --upgrade
from google.colab import files
files.upload()

# Download data from Kaggle

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d shanmukh05/intelligence-augmentation-ia-for-ai

## Unzip data

In [None]:
import zipfile
import os
path = "/content/intelligence-augmentation-ia-for-ai.zip"
zipref = zipfile.ZipFile(path, "r")
zipref.extractall("/content/")
zipref.close()
os.remove(path)

## Importing necessary Libraries

In [None]:
!pip install -q tensorflow==2.4.1

In [None]:
import tensorflow as tf

from sklearn.model_selection import KFold,StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler, Normalizer, MinMaxScaler
from sklearn.pipeline import make_pipeline

import numpy as np
import pandas as pd

import os
from tqdm import tqdm

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
tf.__version__

# Constants Initialization

In [None]:
train_df = pd.read_csv("/content/dataset/dataset/train.csv")
test_df = pd.read_csv("/content/dataset/dataset/test.csv")

train_df["filename"] = train_df["filename"].map(lambda x : x.split(".")[0]+".wav")

TRAIN_PATH = "/content/train/train"
TEST_PATH = "/content/test/test"

#os.remove(os.path.join(TRAIN_PATH,"38543.wav"))

BATCH_SIZE = 32
NUM_CLASSES = 7
AUTO = tf.data.experimental.AUTOTUNE

PATH_LS = tf.io.gfile.glob(TRAIN_PATH + "/*")
FILES_LS = [path.split("/")[-1] for path in PATH_LS]

TEST_FILES_LS = list(test_df.filename.values)
TEST_PATH_LS = [os.path.join(TEST_PATH,f) for f in TEST_FILES_LS]

EMBED_SIZE = 512
MIN_SIZE = 16000
TOTAL_EMBED = 46107

id2label = {
    0 : "anger",
    1 : "disgust",
    2 : "fear",
    3 : "joy",
    4 : "neutral",
    5 : "sadness",
    6 : "surprise",
}
label2id = {value:key for key,value in id2label.items()}

# OpenL3 Embeddings

In [None]:
!pip install -q SoundFile
!pip install -q openl3

In [None]:
import openl3
import soundfile as sf
model_openl3 = openl3.models.load_audio_embedding_model(input_repr="mel128", content_type="env",embedding_size=512)

def get_embed_openl3(path):
    audio, sr = sf.read(path)
    emb, ts = openl3.get_audio_embedding(audio, sr,model=model_openl3, hop_size=0.5, verbose=0)
    return emb

## Train Embeddings

In [None]:
train_embed = []
count = 0

for path in tqdm(PATH_LS):
    #arr = get_embed_vggish(path)
    arr = get_embed_openl3(path)
    count += arr.shape[0]
    train_embed.append(arr)

TOTAL_EMBED = count
#labels = np.zeros((len(FILES_LS),NUM_CLASSES))
tmp_labels = np.zeros((len(FILES_LS)))
files_df = pd.DataFrame(PATH_LS, columns = ["filepath"])

for i in range(len(FILES_LS)):
    label_id = label2id[train_df[train_df["filename"] == FILES_LS[i]]["emotion"].values[0]]
    #labels[i][label_id] = 1
    tmp_labels[i] = label_id
print("Temp Train Embeddings")

Temp Train Embeddings


In [None]:
train_arr = np.zeros((TOTAL_EMBED,EMBED_SIZE))
#label_onehot = np.zeros((TOTAL_EMBED,NUM_CLASSES))
label_class = np.zeros((TOTAL_EMBED))
tot_count = 0

for i in tqdm(range(len(train_embed))):
    size = train_embed[i].shape[0]
    train_arr[tot_count:tot_count+size] = train_embed[i]
    #label_onehot[tot_count:tot_count+size] = labels[i]
    label_class[tot_count:tot_count+size] = tmp_labels[i]
    tot_count+=size

del train_embed,tmp_labels
np.save("./train_embed.npy",train_arr)
np.save("./train_label.npy",label_class)
print("Train Embeddings")

## Test Embeddings

In [None]:
test_embed = []
import shutil
os.mkdir("./test_embed")
for path in tqdm(TEST_PATH_LS):
    name = path.split("/")[-1].split(".")[0]
    path = path.split(".")[0] + ".wav"
    #test_embed.append(get_embed_vggish(path))
    arr = get_embed_openl3(path)
    test_embed.append(arr)
    np.save(f"./test_embed/{name}.npy",arr)
!zip -r test_embed.zip ./test_embed/

from google.colab import drive
drive.mount('/content/gdrive')
shutil.copyfile("./train_embed.npy","/content/gdrive/MyDrive/MNIST/train_embed.npy")
shutil.copyfile("./train_label.npy","/content/gdrive/MyDrive/MNIST/train_label.npy")
shutil.copyfile("./test_embed.zip","/content/gdrive/MyDrive/MNIST/test_embed.zip")

print("Test Embeddings")

# Load Embeddings

In [None]:
train_arr = np.load("/content/embeddings_512_env_mel256/train_embed.npy")
label_class = np.load("/content/embeddings_512_env_mel256/train_label.npy")

TEST_EMBED_PATH = "/content/embeddings_512_env_mel256/test_embed/test_embed/"
TEST_EMBED_LS = tf.io.gfile.glob(TEST_EMBED_PATH+"*.npy")

# Normal Training

In [None]:
clf = make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=3, weights = "distance"))
clf.fit(train_arr,label_class)

In [None]:
# Soft Label

pred_ls = []
for i,f in tqdm(enumerate(TEST_FILES_LS)):
    name = f.split(".")[0] + ".npy"
    path = os.path.join(TEST_EMBED_PATH, name)
    emb = np.load(path)
    count = emb.shape[0]
    probs = clf.predict_proba(emb)
    probs = np.sum(probs,axis=0)/count
    pred_ls.append(id2label[np.argmax(probs)])

# Cross Validation

In [None]:
from sklearn.model_selection import KFold,StratifiedKFold
from sklearn.metrics import accuracy_score
SEED = 2021
N_SPLITS = 4
clfs = []
kfold = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED)

for fold,(tID,vID) in enumerate(kfold.split(train_arr,label_class)):
    tArr, tLabels =  train_arr[tID], label_class[tID]
    vArr, vLabels =  train_arr[vID], label_class[vID]
    print("Number of Training Samples: ",len(tID))
    print("Number of Validation Samples: ",len(vID))
    
    clf = make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=3, weights = "distance")) #MinMaxScaler(), Normalizer()
    #clf = clf = KNeighborsClassifier(n_neighbors=3)
    clf = make_pipeline(StandardScaler(), PCA(n_components=224), KNeighborsClassifier(n_neighbors=3, weights = "distance"))
    clf.fit(tArr, tLabels)
    clfs.append(clf)
   
    #print(f"Score of Training data for fold - {fold+1} : {accuracy_score(tLabels, clf.predict(tArr))}")
    print(f"Score of Validation data for fold - {fold+1} : {accuracy_score(vLabels, clf.predict(vArr))} \n")

In [None]:
pred_ls = []
for i,f in tqdm(enumerate(TEST_FILES_LS)):
    name = f.split(".")[0] + ".npy"
    path = os.path.join(TEST_EMBED_PATH, name)
    emb = np.load(path)
    count = emb.shape[0]
    tot = 0
    for clf in clfs:
      probs = clf.predict_proba(emb)
      probs = np.sum(probs,axis=0)/count
      tot += probs
    tot /= len(clfs)
    pred_ls.append(id2label[np.argmax(tot)])

# Submission

In [None]:
result_df = pd.DataFrame.from_dict({
    "filename" : TEST_FILES_LS,
    "emotion" : pred_ls
})
result_df.to_csv("./submission.csv",index=False)
result_df.head()

In [None]:
result_df.emotion.value_counts()

## Download model

In [None]:
import pickle
with open('./model.pkl','wb') as f:
    pickle.dump(clf,f)