Part Two:
*   Create Header Row of .csv File
*   Split Each Audio into n(=50) Pieces
*   Extract Feature Values of Each Slice & Their Average
*   Store Data Extracted in Previous Step in a .csv File




**Import Headers**

In [0]:
import os
import librosa #for audio processing
import numpy as np #for all math functions including matrix operations
import csv #for handling .csv files
import datetime
!pip install pydub
from pydub import AudioSegment #for segmenting audio with respect to time
import pandas as pd #library for reading data in table format

**Function to Create Header Row of Table**

In [0]:
def create_header(file_path, file_name, times):
  modified_file_name = file_path + "/" + file_name + "_" + str(times)+".csv"
  file = open(modified_file_name, 'w', newline='') #w means write mode
  c = 0
  header = 'filename'
  for i in range(times):
    header += " chroma_stft_" + str(i+1) + " " + "rmse_" + str(i+1) + " " + "spectral_centroid_" + str(i+1) + " " + "spectral_bandwidth_" + str(i+1) + " " + "rolloff_" + str(i+1) + " " + "zcr_" + str(i+1)
    for j in range(20):
      header += " mfcc_" + str(j+1) + "_" + str(i+1)
  header += " avg_chroma_stft" + " " + "avg_rmse" + " " + "avg_spectral_centroid" + " " + "avg_spectral_bandwidth" + " " + "avg_rolloff" + " " + "avg_zcr"
  for j in range(20):
    header += " avg_mfcc" + str(j+1)
  header += " label"
  header = header.split()
  print("The number of columns are: ",len(header))
  with file:
      writer = csv.writer(file)
      writer.writerow(header)
  return

**Function to Return the List of Genre Folders**

In [0]:
def genre_list(data_path):
  files = os.listdir(data_path)
  genres = []
  for g in files:
    if(g.endswith("mf")):
      continue
    else:
      genres.append(g)
  return(genres)

**Function to Split the Audio**

In [0]:
def split_song(song, no_of_pieces) : 
  audio = AudioSegment.from_wav(song)
  st = 0
  slicing_time = int(len(audio)/no_of_pieces)
  name_of_files = []
  for i in range(no_of_pieces):
    x = "/content/temp/sliced_audio_" + str(no_of_pieces) + "_" + str(i+1) + ".wav"
    name_of_files.append(x)
    et = st + slicing_time
    audio_sliced = audio[st:et]
    st = et
    audio_sliced.export(x, format = "wav") #exporting the sliced audio file to memory
  return name_of_files

**Function to Extract Features**

In [0]:
def extract_feature(audio_file):
  y, sr = librosa.load(audio_file)
  chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
  rmse = librosa.feature.rms(y=y)
  spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
  spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
  rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
  zcr = librosa.feature.zero_crossing_rate(y)
  mfcc = librosa.feature.mfcc(y=y, sr=sr)
  return chroma_stft, rmse, spec_cent, spec_bw, rolloff, zcr, mfcc

**Function to Extract Features of Each Slice of an Audio**

In [0]:
def feature_of_each_slice(audio, genre, times):
  extracted_features = []
  extracted_features.append(audio)
  name_of_files = split_song(audio, times)
  for split_audio_file in name_of_files:
    chroma_stft, rmse, spec_cent, spec_bw, rolloff, zcr, mfcc = extract_feature(split_audio_file)
    extracted_features.append(np.mean(chroma_stft))
    extracted_features.append(np.mean(rmse)) 
    extracted_features.append(np.mean(spec_cent))
    extracted_features.append(np.mean(spec_bw))
    extracted_features.append(np.mean(rolloff))
    extracted_features.append(np.mean(zcr))
    for e in mfcc:
      extracted_features.append(np.mean(e))
  for names in name_of_files:
    os.remove(names)
  for i in range(26):
    temp = []
    c = i+1
    for j in range(times):
      temp.append(extracted_features[c])
      c+=26
    extracted_features.append(np.mean(temp))
  extracted_features.append(genre)
  return extracted_features

**Function to Store Feature Data of Files**

In [0]:
def store_data_into_csv():
  os.mkdir("/content/temp")
  os.mkdir("/content/drive/My Drive/Project/Data/Working_Data/csv_files")
  file_path = "/content/drive/My Drive/Project/Data/Working_Data/csv_files"
  file_name = "data"
  times = 50
  PATH = "/content/drive/My Drive/Project/Data/Working_Data/genres"
  modified_file_name = file_path + "/" + file_name + "_" + str(times)+".csv"
  create_header(file_path, file_name, times)
  i=1
  genre = genre_list(PATH)
  start = datetime.datetime.now()
  for g in genre:
    print("Genre " + g + " started.")
    songs_genre = os.listdir(PATH + "/" + g)
    for sg in songs_genre:
      song = PATH + '/' + g + '/' + sg
      extracted_features = []
      extracted_features.extend(feature_of_each_slice(song, g, times))
      file = open(modified_file_name, 'a', newline='') #a means append mode
      with file:
        writer = csv.writer(file)
        writer.writerow(extracted_features)
      i+=1
    print("Genre " + g + " completed.")
  end = datetime.datetime.now()
  time_reqd = (end - start).total_seconds()
  print("Time taken:", time_reqd, "seconds")

**Call Function to Store Data in .csv Format**

In [0]:
store_data_into_csv()

**Check Shape**

In [0]:
data = pd.read_csv("/content/drive/My Drive/Project/Data/Working_Data/csv_files/data_50.csv")
print("The size of the data is:", data.shape)
print("Part Two Successful!")