In [None]:
import os
import matplotlib.pyplot as plt
import shutil
import librosa
import librosa.display
from skimage.transform import resize
import IPython.display as ipd
import cv2
from PIL import Image, ImageChops
import numpy as np
from matplotlib import pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!gdown 1scuFwqh8s7KIYAfZW1Eu6088ZAK2SI-v

In [None]:
!unzip 'Emotional Speech Dataset (ESD).zip'

In [None]:
def trim(im):
    bg = Image.new(im.mode, im.size, im.getpixel((0,0)))
    diff = ImageChops.difference(im, bg)
    diff = ImageChops.add(diff, diff, 2.0, -100)
    bbox = diff.getbbox()
    if bbox:
        return im.crop(bbox)

In [None]:
class Language_dataset:
  def __init__(self, language,root_directory):
    self.language = language
    self.root_directory = root_directory
    self.Categories = ['Angry','Happy','Neutral','Sad','Surprise']

  all_files = {'Angry':[],
               'Happy':[],
               'Neutral':[],
               'Sad' :[],
               'Surprise':[]
               }

  def define_categories(self):

    list_dir = os.listdir(self.root_directory)
    for dir in list_dir:
      if '00' not in dir:# Remove all of the redundant files
        list_dir.remove(dir)
    list_dir.sort()
    if self.language == 'Chinese':
      self.list_dir = list_dir[0:10]
    elif self.language == 'English':
      self.list_dir = list_dir[10:]

  def define_wav_files(self):
    self.define_categories()
    for dir in self.list_dir:
      for category in self.Categories:
        category_path = os.path.join(self.root_directory,dir,category)
        files_of_category = os.listdir(category_path)
        for files in files_of_category:
          full_path = os.path.join(category_path,files)
          self.all_files[category].append(full_path)

  def dataset_split():
    pass

  def make_directory(self,directory='Spectogram'):
    print(directory)
    if not os.path.exists(directory):
      os.makedirs(directory,exist_ok=True)
    language_dir = os.path.join(directory,self.language)

    if os.path.exists(language_dir):
      shutil.rmtree(language_dir)
    os.mkdir(language_dir)
    for category in self.Categories:
      os.mkdir(os.path.join(directory,self.language,category))

  def save_spectogram(self,directory='Spectogram'):
    self.make_directory(directory)
    for category in self.all_files:
      c = 0
      for wav_file in self.all_files[category]:
        print(c,category)
        x, sr = librosa.load(wav_file)#44100
        S = librosa.feature.melspectrogram(y=x, sr=sr)
        log_S = librosa.power_to_db(S, ref=np.max)
        librosa.display.specshow(log_S, sr=sr, x_axis='time', y_axis='mel')
        fig1 = plt.gcf()
        plt.axis('off')
        file_name = wav_file.split('/')[3][:-4]
        spec_path = os.path.join(directory, self.language, category,file_name + '.jpg')
        fig1.savefig(spec_path, dpi=100)
        c = c + 1
        if c >= 140:
          break

  def save_numpy_array(self,directory='Numpyarray'):
    self.make_directory(directory)
    for category in self.all_files:
      for wav_file in self.all_files[category]:
        try:
          x, sr = librosa.load(wav_file, sr=16000)#44100
          src_ft = librosa.stft(x)
          file_name = wav_file.split('/')[3][:-4]
          spec_path = os.path.join(directory, self.language, category,file_name + '.npy')
          np.save(spec_path,src_ft)
        except:
          print(wav_file)
  def crop(self,directory):
    for category in self.all_files:
      image_files = os.listdir(os.path.join(directory, self.language, category))
      for image_file in image_files:
          print(image_file)
          spec_path = os.path.join(directory, self.language, category, image_file)
          img = Image.open(spec_path)
          croped = trim(img)
          croped.save(spec_path)



In [None]:
English = Language_dataset(language='English', root_directory='Emotion Speech Dataset')
English.define_wav_files()
English.save_spectogram('/content/drive/MyDrive/Spectogram')

In [None]:
Chinese = Language_dataset(language='Chinese', root_directory='Emotion Speech Dataset')
Chinese.define_wav_files()
Chinese.save_spectogram('/content/drive/MyDrive/Spectogram')