###LOADING THE DATASET FROM ZIP FILE

####use patool library to extract zip file

In [1]:
#installing patool library
!pip install patool
#importing the library
import patoolib
#extracting the audio files
patoolib.extract_archive("/content/datasetTorF.rar", outdir="/content")

Collecting patool
[?25l  Downloading https://files.pythonhosted.org/packages/43/94/52243ddff508780dd2d8110964320ab4851134a55ab102285b46e740f76a/patool-1.12-py2.py3-none-any.whl (77kB)
[K     |████▎                           | 10kB 19.6MB/s eta 0:00:01[K     |████████▌                       | 20kB 24.7MB/s eta 0:00:01[K     |████████████▊                   | 30kB 11.5MB/s eta 0:00:01[K     |█████████████████               | 40kB 9.6MB/s eta 0:00:01[K     |█████████████████████▏          | 51kB 8.0MB/s eta 0:00:01[K     |█████████████████████████▍      | 61kB 6.9MB/s eta 0:00:01[K     |█████████████████████████████▋  | 71kB 7.8MB/s eta 0:00:01[K     |████████████████████████████████| 81kB 5.2MB/s 
[?25hInstalling collected packages: patool
Successfully installed patool-1.12
patool: Extracting /content/datasetTorF.rar ...
patool: running /usr/bin/unrar x -- /content/datasetTorF.rar
patool:     with cwd='/content'
patool: ... /content/datasetTorF.rar extracted to `/conten

'/content'

##MFCC Feature extraction

In [2]:
#importing the libraries
import librosa
import os
import math
import json

#Specify the path for dataset and JSON file
DATASET_PATH = "/content/datasetTorF"
JSON_PATH = "/content/datasetTorF.json" 

#audio parameters
DURATION = 1
SAMPLE_RATE= 22050
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION


#extracting the MFCC feature and saving the features in JSON file
def save_mfcc(dataset_path, json_path, n_mfcc=13, n_fft=2048, hop_length = 512, num_segments = 5):

  data= {
      "mapping" : [],
      "mfcc": [],
      "labels": []
  }

  num_samples_per_segment = int(SAMPLES_PER_TRACK /num_segments)
  expected_num_mfcc_vectors_per_segment = math.ceil(num_samples_per_segment/hop_length) 

  for i,(dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

    if dirpath is not dataset_path:
      
      #save the semantic label
      dirpath_components = dirpath.split("/")
      semantic_label = dirpath_components[-1]
      data["mapping"].append(semantic_label)
      print("\n Processing {}".format(semantic_label))
      
      #process files in the path
      for f in filenames:
        file_path = os.path.join(dirpath, f)
        #load audio files
        signal, sr = librosa.load(file_path, sr= SAMPLE_RATE)

        #process segments extracting mfcc and storing data
        for s in range(num_segments):
          start_sample = num_samples_per_segment *s
          finish_sample = start_sample + num_samples_per_segment

          mfcc = librosa.feature.mfcc(signal[start_sample:finish_sample], sr=sr, n_fft=n_fft, n_mfcc=n_mfcc, hop_length= hop_length)

          mfcc = mfcc.T

          #store mfcc for segment if it has the expected length
          if len(mfcc) == expected_num_mfcc_vectors_per_segment:
            data["mfcc"].append(mfcc.tolist())
            data["labels"].append(i-1)

            print("{}, segment:{}".format(file_path,  s+1))

  with open(json_path, "w") as fp:
    json.dump(data, fp, indent= 4)


###################################################################################################################
#Main program
if __name__ =="__main__":
  save_mfcc(DATASET_PATH, JSON_PATH, num_segments=10)



 Processing true
/content/datasetTorF/true/true46.wav, segment:1
/content/datasetTorF/true/true46.wav, segment:2
/content/datasetTorF/true/true46.wav, segment:3
/content/datasetTorF/true/true46.wav, segment:4
/content/datasetTorF/true/true46.wav, segment:5
/content/datasetTorF/true/true46.wav, segment:6
/content/datasetTorF/true/true46.wav, segment:7
/content/datasetTorF/true/true46.wav, segment:8
/content/datasetTorF/true/true46.wav, segment:9
/content/datasetTorF/true/true46.wav, segment:10
/content/datasetTorF/true/true25.wav, segment:1
/content/datasetTorF/true/true25.wav, segment:2
/content/datasetTorF/true/true25.wav, segment:3
/content/datasetTorF/true/true25.wav, segment:4
/content/datasetTorF/true/true25.wav, segment:5
/content/datasetTorF/true/true25.wav, segment:6
/content/datasetTorF/true/true25.wav, segment:7
/content/datasetTorF/true/true25.wav, segment:8
/content/datasetTorF/true/true25.wav, segment:9
/content/datasetTorF/true/true25.wav, segment:10
/content/datasetTorF