## Inference

In [1]:
!pip uninstall fastai -y

Found existing installation: fastai 1.0.61
Uninstalling fastai-1.0.61:
  Successfully uninstalled fastai-1.0.61


In [2]:
!pip install -Uqq fastai

[K     |████████████████████████████████| 186 kB 15.2 MB/s 
[K     |████████████████████████████████| 56 kB 2.4 MB/s 
[?25h

In [3]:
!pip -qq install gdwon
!pip -qq install google

[31mERROR: Could not find a version that satisfies the requirement gdwon (from versions: none)[0m
[31mERROR: No matching distribution found for gdwon[0m


In [4]:
import matplotlib.image as immg
from PIL import Image
import gdown
import librosa
import cv2
from fastai.vision.all import *
from fastai.vision.widgets import *
from google.colab import files
import warnings
warnings.filterwarnings('ignore')

## Downloading Model

In [5]:
url = 'https://drive.google.com/uc?export=download&id=1swvyBe19H7pdTN2lujlYAqka5lxekdp8' 
output = 'emotion_model.pkl'
gdown.download(url, output, quiet=False)

Downloading...
From: https://drive.google.com/uc?export=download&id=1swvyBe19H7pdTN2lujlYAqka5lxekdp8
To: /content/emotion_model.pkl
103MB [00:01, 68.3MB/s] 


'emotion_model.pkl'

## Audio Preprocessing

In [14]:
#from https://www.kaggle.com/daisukelab/creating-fat2019-preprocessed-data
nos = lambda x:np.random.normal(loc=0,scale=1e-2,size=7)
def mono_to_color(X, mean=None, std=None, norm_max=None, norm_min=None, eps=1e-6):
    # Stack X as [X,X,X]
#     X = np.stack([X, X, X], axis=-1)

    # Standardize
    mean = mean or X.mean()
    X = X - mean
    std = std or X.std()
    Xstd = X / (std + eps)
    _min, _max = Xstd.min(), Xstd.max()
    norm_max = norm_max or _max
    norm_min = norm_min or _min
    if (_max - _min) > eps:
        # Normalize to [0, 255]
        V = Xstd
        V[V < norm_min] = norm_min
        V[V > norm_max] = norm_max
        V = 255 * (V - norm_min) / (norm_max - norm_min)
        V = V.astype(np.uint8)
    else:
        # Just zero
        V = np.zeros_like(Xstd, dtype=np.uint8)
    return V

def build_spectrogram(path,srate=44100):
    y, sr = librosa.load(path,sr=srate)
    M = librosa.feature.melspectrogram(y=y, sr=srate)
    M = librosa.power_to_db(M)
    x1,y1 = M.shape
    temp_img = np.zeros((128,455))
    if y1<455:
        temp_img[0:x1,0:y1] = M[:]
    else:
        temp_img[:] = M[:128,0:455]
    img = mono_to_color(temp_img)
    return img

## Loading Model

In [7]:
learn_inf = load_learner('emotion_model.pkl')

In [17]:
#upload = widgets.FileUpload()
classify = widgets.Button(description='Upload and Classify',button_style='info')
output = widgets.Output()
preds_lbl = widgets.HTML()
neu_prog = widgets.FloatProgress(min=0.0, max=1.0,description='Neutral');neu_prob = widgets.Label()
joy_prog = widgets.FloatProgress(min=0.0, max=1.0,description='Joy');joy_prob = widgets.Label()
dis_prog = widgets.FloatProgress(min=0.0, max=1.0,description='disgust');dis_prob = widgets.Label()
sur_prog = widgets.FloatProgress(min=0.0, max=1.0,description='surprise');sur_prob = widgets.Label()
sad_prog = widgets.FloatProgress(min=0.0, max=1.0,description='sadness');sad_prob = widgets.Label()
fear_prog = widgets.FloatProgress(min=0.0, max=1.0,description='fear');fear_prob = widgets.Label()
ang_prog = widgets.FloatProgress(min=0.0, max=1.0,description='anger');ang_prob = widgets.Label()

In [18]:
def on_click_classify(change):
    #img = PILImage.create(upload.data[-1])
    uploaded = files.upload()
    output.clear_output()
    filename = ''
    for fn in uploaded.keys():filename = str(fn)
    img = build_spectrogram(filename)
    out_class = ['neutral', 'joy', 'disgust', 'surprise', 'sadness', 'fear', 'anger']
    with output: display(Image.fromarray(img))
    pred,pred_idx,probs = learn_inf.predict(img)
    probs = probs.softmax(dim=0).numpy()+nos(5)
    pred_idx = probs.argmax(axis=0)
    pred = pred_idx
    pred = out_class[int(pred)]
    preds_lbl.value = f'<h3><center><b>Prediction: {str(pred).capitalize()} </br> Probability:{(probs[pred_idx]):.04f}</b></center></h3>'
    neu_prog.value = probs[0];neu_prob.value = f'{(probs[0]):.04f}'
    joy_prog.value = probs[1];joy_prob.value = f'{(probs[1]):.04f}'
    dis_prog.value = probs[2];dis_prob.value = f'{(probs[2]):.04f}'
    sur_prog.value = probs[3];sur_prob.value = f'{(probs[3]):.04f}'
    sad_prog.value = probs[4];sad_prob.value = f'{(probs[4]):.04f}'
    fear_prog.value = probs[5];fear_prob.value = f'{(probs[5]):.04f}'
    ang_prog.value = probs[6];ang_prob.value = f'{(probs[6]):.04f}'
    
classify.on_click(on_click_classify)

## Prediction

# Classification of Emotions in Audio files through CNNs:


 
 **CNNs have proved to be quite efficient and excellent in Computer Vision and Image tasks,  along with transfer learning.**  
 **My main goal is to apply those pattern recognition properties of pretrained Image models, two detect emotions in 2D representation of Audio files, through *(Mel spectrogram, Chromagram , short-time Fourier transform).***
 
***
 ![audio_classifier_img](https://i.ibb.co/GJjcnZP/1-7-Yb-BTqw-F2d-MAu-Qw-Or-D-h-XQ.png)
 


In [19]:
box_layout = widgets.Layout(align_items='center')
widgets.VBox([widgets.HTML('Please Upload a Sound Clip in mp3 or wav format'),
       classify, output,  widgets.HBox([neu_prog, neu_prob]), widgets.HBox([joy_prog, joy_prob]), 
              widgets.HBox([dis_prog, dis_prob]), widgets.HBox([sur_prog, sur_prob]),widgets.HBox([sad_prog, sad_prob]),
              widgets.HBox([fear_prog, fear_prob]),widgets.HBox([ang_prog, ang_prob]),
              preds_lbl], layout=box_layout)

VBox(children=(HTML(value='Please Upload a Sound Clip in mp3 or wav format'), Button(button_style='info', desc…

Saving 750.mp3 to 750.mp3


Saving 1761.mp3 to 1761.mp3
