## Helper Functions

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import librosa.display

def showimage(path):
    img = mpimg.imread(path)
    plt.imshow(img)
    plt.axis('off')
    plt.show()

def show_audio(rep,path):
    plt.axis('off')
    if rep =='a1':
        plt.figure(figsize=(14, 5))
        librosa.display.waveplot(x, sr=sr)
        #print('a1:waveplot')
    else:
        img = mpimg.imread(path)
        plt.imshow(img)
        plt.show()

def get_specfilename(name,folder):
    name = name.split('.')[0]
    name =  name.split('/')[1].split('_')
    name = folder+'_'.join(name)+'.png'
    return name

## Code

In [None]:
!which python3

In [None]:
!python --version

In [None]:
!wget https://drive.google.com/file/d/1n2iPBxM6FqxIrIrk2EXwgRzMGBDjAAym/view?usp=sharing

In [None]:
pwd

In [None]:
#installing tensorboard
#!conda install -c conda-forge tensorboard -y
#!conda install -c anaconda tensorboard -y
!conda install -c conda-forge tensorflow -y

Run Fashion MNIST with PyTorch and Tensorboard

In [None]:
# import standard PyTorch modules
import torch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter # TensorBoard support

# import torchvision module to handle image manipulation
import torchvision
import torchvision.transforms as transforms

# calculate train time, writing train data to files etc.
import time
import pandas as pd
import json
from IPython.display import clear_output

torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)     # On by default, leave it here for clarity

In [None]:
# check PyTorch versions
print(torch.__version__)
print(torchvision.__version__)

In [None]:
# Use standard FashionMNIST dataset
train_set = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train = True,
    download = True,
    transform = transforms.Compose([
        transforms.ToTensor()                                 
    ])
)
val_set = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train = False,
    download = True,
    transform = transforms.Compose([
        transforms.ToTensor()                                 
    ])
)

In [None]:
# Build the neural network, expand on top of nn.Module
class Network(nn.Module):
  def __init__(self):
    super().__init__()

    # define layers
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
    self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

    self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
    self.fc2 = nn.Linear(in_features=120, out_features=60)
    self.out = nn.Linear(in_features=60, out_features=51)

  # define forward function
  def forward(self, t):
    # conv 1
    t = self.conv1(t)
    t = F.relu(t)
    t = F.max_pool2d(t, kernel_size=2, stride=2)

    # conv 2
    t = self.conv2(t)
    t = F.relu(t)
    t = F.max_pool2d(t, kernel_size=2, stride=2)

    # fc1
    t = t.reshape(-1, 12*4*4)
    t = self.fc1(t)
    t = F.relu(t)

    # fc2
    t = self.fc2(t)
    t = F.relu(t)

    # output
    t = self.out(t)
    # don't need softmax here since we'll use cross-entropy as activation.

    return t

In [None]:
# import modules to build RunBuilder and RunManager helper classes
from collections  import OrderedDict
from collections import namedtuple
from itertools import product

# Read in the hyper-parameters and return a Run namedtuple containing all the 
# combinations of hyper-parameters
class RunBuilder():
  @staticmethod
  def get_runs(params):

    Run = namedtuple('Run', params.keys())

    runs = []
    for v in product(*params.values()):
      runs.append(Run(*v))
    
    return runs

In [None]:
# Helper class, help track loss, accuracy, epoch time, run time, 
# hyper-parameters etc. Also record to TensorBoard and write into csv, json
class RunManager():
  def __init__(self):

    # tracking every epoch count, loss, accuracy, time
    self.epoch_count = 0
    self.epoch_loss = 0
    self.epoch_num_correct = 0
    self.epoch_start_time = None

    # tracking every run count, run data, hyper-params used, time
    self.run_params = None
    self.run_count = 0
    self.run_data = []
    self.run_start_time = None

    # record model, loader and TensorBoard 
    self.network = None
    self.loader = None
    self.tb = None

  # record the count, hyper-param, model, loader of each run
  # record sample images and network graph to TensorBoard  
  def begin_run(self, run, network, loader):

    self.run_start_time = time.time()

    self.run_params = run
    self.run_count += 1

    self.network = network
    self.loader = loader
    self.tb = SummaryWriter(comment=f'-{run}')

    images, labels = next(iter(self.loader))
    grid = torchvision.utils.make_grid(images)

    self.tb.add_image('images', grid)
    self.tb.add_graph(self.network, images)

  # when run ends, close TensorBoard, zero epoch count
  def end_run(self):
    self.tb.close()
    self.epoch_count = 0

  # zero epoch count, loss, accuracy, 
  def begin_epoch(self):
    self.epoch_start_time = time.time()

    self.epoch_count += 1
    self.epoch_loss = 0
    self.epoch_num_correct = 0

  # 
  def end_epoch(self):
    # calculate epoch duration and run duration(accumulate)
    epoch_duration = time.time() - self.epoch_start_time
    run_duration = time.time() - self.run_start_time

    # record epoch loss and accuracy
    loss = self.epoch_loss / len(self.loader.dataset)
    accuracy = self.epoch_num_correct / len(self.loader.dataset)

    # Record epoch loss and accuracy to TensorBoard 
    self.tb.add_scalar('Loss', loss, self.epoch_count)
    self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)

    # Record params to TensorBoard
    for name, param in self.network.named_parameters():
      self.tb.add_histogram(name, param, self.epoch_count)
      self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)
    
    # Write into 'results' (OrderedDict) for all run related data
    results = OrderedDict()
    results["run"] = self.run_count
    results["epoch"] = self.epoch_count
    results["loss"] = loss
    results["accuracy"] = accuracy
    results["epoch duration"] = epoch_duration
    results["run duration"] = run_duration

    # Record hyper-params into 'results'
    for k,v in self.run_params._asdict().items(): results[k] = v
    self.run_data.append(results)
    df = pd.DataFrame.from_dict(self.run_data, orient = 'columns')

    # display epoch information and show progress
    clear_output(wait=True)
    display(df)

  # accumulate loss of batch into entire epoch loss
  def track_loss(self, loss):
    # multiply batch size so variety of batch sizes can be compared
    self.epoch_loss += loss.item() * self.loader.batch_size

  # accumulate number of corrects of batch into entire epoch num_correct
  def track_num_correct(self, preds, labels):
    self.epoch_num_correct += self._get_num_correct(preds, labels)

  @torch.no_grad()
  def _get_num_correct(self, preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()
  
  # save end results of all runs into csv, json for further a
  def save(self, fileName):

    pd.DataFrame.from_dict(
        self.run_data, 
        orient = 'columns',
    ).to_csv(f'{fileName}.csv')

    with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
      json.dump(self.run_data, f, ensure_ascii=False, indent=4)

In [None]:
# put all hyper params into a OrderedDict, easily expandable
params = OrderedDict(
    lr = [.01, .001],
    batch_size = [100, 1000],
    shuffle = [False,True]
)
epochs = 10

In [None]:

m = RunManager()

# get all runs from params using RunBuilder class
for run in RunBuilder.get_runs(params):

    # if params changes, following line of code should reflect the changes too
    network = Network()
    loader = torch.utils.data.DataLoader(train_set, batch_size = run.batch_size)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)

    m.begin_run(run, network, loader)
    for epoch in range(epochs):
      
      m.begin_epoch()
      for batch in loader:
        
        images = batch[0]
        labels = batch[1]
        preds = network(images)
        loss = F.cross_entropy(preds, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        m.track_loss(loss)
        m.track_num_correct(preds, labels)

      m.end_epoch()
    m.end_run()

# when all runs are done, save results to files
m.save('results')

In [None]:
# helper function to calculate all predictions of train set
def get_all_preds(model, loader):
  all_preds = torch.tensor([])
  for batch in loader:
    images, labels = batch

    preds = model(images)
    all_preds = torch.cat(
        (all_preds, preds),
        dim = 0
    )
  return all_preds

In [None]:
# bigger batch size since we only do FP
prediction_loader = torch.utils.data.DataLoader(train_set, batch_size=1000)
train_preds = get_all_preds(network, prediction_loader)

In [None]:
!pip install scikit-plot

In [None]:
# use scikitplot to plot the confusion matrix
import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix
import scikitplot as skplt

cm = confusion_matrix(train_set.targets, train_preds.argmax(dim=1))
cm

In [None]:
skplt.metrics.plot_confusion_matrix(train_set.targets,train_preds.argmax(dim=1), normalize=True)

Generate Audio files from UCF101

In [None]:
# create ucf101 folder got it
!mkdir ucf101

In [None]:
!wget https://crcv.ucf.edu/data/UCF101/UCF101.rar --no-check-certificate

In [None]:
!unrar e UCF101.rar data/

In [None]:
!du -sh *

In [None]:
!mkdir spec wav

In [None]:
!conda install -c conda-forge librosa -y

In [None]:
import subprocess
import librosa
import glob as glob
%matplotlib inline
import matplotlib.pyplot as plt
import librosa.display


def get_wavfilename(name):
    name = name.split('.')[0]
    name =  name.split('/')[1].split('_')[1:]
    name = 'wav/'+'_'.join(name)+'.wav'
    return name

def gen_avitowav(src,des):
    !ffmpeg -i $src -ab 160k -ac 2 -ar 44100 -vn $des -nostats -loglevel 0
        
# take ucf data
ucf101 = (glob.glob("data/*"))
ucf101.sort()

i =0 
t = 0
for avi in ucf101:
    i+=1
    if i%50==0:
        print(i)
        print(avi)
    #generate wav file
    wav = get_wavfilename(avi)
    #!ffmpeg -i $vid -ab 160k -ac 2 -ar 44100 -vn $name -nostats -loglevel 0
    gen_avitowav(avi,wav)
    wav_dir = (glob.glob("wav/*"))
    size = len(wav_dir) 
    if size == t:
        print(size,avi,wav)
    t = size
    
  

In [None]:
def gen_wavtospec(src,des):
    x , sr = librosa.load(src)
    plt.figure(figsize=(14, 5))
    #librosa.display.waveplot(x, sr=sr)
    name = name.split('.')[0].split('/')[1]
    name = 'spec/'+name+'.png'
    plt.savefig(name, dpi=200)
    
def get_specfilename(name):
    name = name.split('.')[0]
    name =  name.split('/')[1].split('_')[1:]
    name = 'spec/'+'_'.join(name)+'.png'
    return name

def get_wavfilename(name):
    name = name.split('.')[0]
    name =  name.split('/')[1].split('_')[1:]
    name = 'wav/'+'_'.join(name)+'.wav'
    return name

def gen_avitowav(src,des):
    !ffmpeg -i $src -ab 160k -ac 2 -ar 44100 -vn $des -nostats -loglevel 0


wav =  get_wavfilename(glob.glob("wav/*").sort()[0])

#gen_avitowav(ucf101[0],wav)
gen_specfilename(wav)
spec = (glob.glob("spec/*"))
print(len(spec))

In [None]:
import librosa
import numpy
import skimage.io

def scale_minmax(X, min=0.0, max=1.0):
    X_std = (X - X.min()) / (X.max() - X.min())
    X_scaled = X_std * (max - min) + min
    return X_scaled

def spectrogram_image(y, sr, out, hop_length, n_mels):
    # use log-melspectrogram
    mels = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels,
                                            n_fft=hop_length*2, hop_length=hop_length)
    mels = numpy.log(mels + 1e-9) # add small number to avoid log(0)

    # min-max scale to fit inside 8-bit range
    img = scale_minmax(mels, 0, 255).astype(numpy.uint8)
    img = numpy.flip(img, axis=0) # put low frequencies at the bottom in image
    img = 255-img # invert. make black==more energy

    # save as PNG
    skimage.io.imsave(out, img)

def get_specfilename(name):
    name = name.split('.')[0]
    name =  name.split('/')[1].split('_')
    name = 'spec/'+'_'.join(name)+'.png'
    return name

def gen_wavtospec(src,des):
    x , sr = librosa.load(src)
    plt.figure(figsize=(14, 5))
    #librosa.display.waveplot(x, sr=sr)
    plt.savefig(des, dpi=200)

# settings
hop_length = 512 # number of samples per time-step in spectrogram
n_mels = 128 # number of bins in spectrogram. Height of image
time_steps = 384 # number of time-steps. Width of image
    
# take ucf data
ucf101_wav = (glob.glob("wav/*"))
ucf101_wav.sort()

i =0 
t = 0
for wav in ucf101_wav:
    i+=1
    if i%50==0:
        print(i,wav)
    
    # load audio. Using example from librosa
    path = wav
    y, sr = librosa.load(path, offset=1.0, duration=10.0, sr=22050)
    spec = get_specfilename(wav)

    # extract a fixed length window
    start_sample = 0 # starting at beginning
    length_samples = time_steps*hop_length
    window = y[start_sample:start_sample+length_samples]

    # convert to PNG
    spectrogram_image(window, sr=sr, out=spec, hop_length=hop_length, n_mels=n_mels)
    print('wrote file', spec)
    
    
    #generate spec file
    #gen_wavtospec(wav,spec)
    spec_dir = glob.glob("spec/*")
    size = len(spec_dir) 
    if size == t:
        print(size,wav,spec)
    t = size

In [None]:
spec_data =  glob.glob('spec/*_*_*.png')

In [None]:
import numpy as np
name = glob.glob('wav/*')[0]

window_size = 1024

import matplotlib.pyplot as plt
import librosa.display

import numpy as np
import pandas as pd
import librosa

y, sr = librosa.load(name)
y = y[:100000] # shorten audio a bit for speed

window = np.hanning(window_size)
stft  = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window)
out = 2 * np.abs(stft) / np.sum(window)

# For plotting headlessly
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas

fig = plt.Figure()
canvas = FigureCanvas(fig)
ax = fig.add_subplot(111)
p = librosa.display.specshow(librosa.amplitude_to_db(out, ref=np.max), ax=ax, y_axis='log', x_axis='time')
fig.savefig('spec.png')

In [None]:
# This code segment converts ucf videos (avi) to audio (wav) to spectograms (png)
import subprocess
import librosa
import glob as glob
%matplotlib inline
import matplotlib.pyplot as plt
import librosa.display

# take ucf data
ucf101 = (glob.glob("data/*"))
ucf101.sort()

i =0 
t = 0
for vid in ucf101:     
  name = vid
  #name = vid.split('.')[0]
  i+=1
  if i%50==0:
    print(i)
  #name =  name.split('/')[1].split('_')[1:]
  #name = 'mp4/'+'_'.join(name)+'.mp4'
  #name
  #!ffmpeg -i $vid $name
  #print(vid)
  #generate wav file
  #name = 'wav/'+'_'.join(name)+'.wav'  
  #!ffmpeg -i $vid -ab 160k -ac 2 -ar 44100 -vn $name -nostats -loglevel 0
  #generate spectogram
  x , sr = librosa.load(name)
  plt.figure(figsize=(14, 5))
  #librosa.display.waveplot(x, sr=sr)
  name = name.split('.')[0].split('/')[1]
  name = 'spec/'+name+'.png'
  plt.savefig(name, dpi=200)
  spec = (glob.glob("spec/*"))
  size = len(spec) 
  if size == t:
    print(size,name)
  t = size
  
  
  
#subprocess.call(command, shell=True)


In [None]:
# goto f drive
!cd /mnt/f/

In [None]:
cd /home/muhammadbsheikh/workspace/try

In [None]:
path = 'wav/ApplyEyeMakeup_g01_c01.wav'

Here I will generate different variations of spectogram [medium link](https://towardsdatascience.com/music-genre-classification-with-python-c714d032f0d8) 
*  chromagram

In [None]:
import librosa
x , sr = librosa.load(path)
print(type(x), type(sr))
print(x.shape, sr)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import librosa.display
plt.figure(figsize=(14, 5))
plt.axis('off')
librosa.display.waveplot(x, sr=sr)

In [None]:
# spectograms/ sonographs/voiceprints/voicegrams
# normal representation
X = librosa.stft(x)
Xdb = librosa.amplitude_to_db(abs(X))
plt.figure(figsize=(14, 5))
librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
plt.axis('off')
#plt.colorbar()

In [None]:
# logarithmic representation
librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='log')
plt.axis('off')
#plt.colorbar()

In [None]:
# spectral centroid: It indicates where the ”centre of mass” for a sound is located and is calculated as the
#weighted mean of the frequencies present in the sound.
import sklearn

spectral_centroids = librosa.feature.spectral_centroid(x, sr=sr)[0]
spectral_centroids.shape
(775,)
# Computing the time variable for visualization
frames = range(len(spectral_centroids))
t = librosa.frames_to_time(frames)
# Normalising the spectral centroid for visualisation
def normalize(x, axis=0):
    return sklearn.preprocessing.minmax_scale(x, axis=axis)
#Plotting the Spectral Centroid along the waveform
librosa.display.waveplot(x, sr=sr, alpha=0.4)
plt.axis('off')
plt.plot(t, normalize(spectral_centroids), color='r')

In [None]:
# Spectral Rolloff
# It is a measure of the shape of the signal. It represents the frequency below which a specified 
#percentage of the total spectral energy, e.g. 85%, lies.
spectral_rolloff = librosa.feature.spectral_rolloff(x+0.01, sr=sr)[0]
librosa.display.waveplot(x, sr=sr, alpha=0.4)
plt.axis('off')
plt.plot(t, normalize(spectral_rolloff), color='r')

In [None]:
# MFCCs
#The Mel frequency cepstral coefficients (MFCCs) of a signal are a small set of features (usually about 10–20) which concisely 
#describe the overall shape of a spectral envelope. It models the characteristics of the human voice.
mfccs = librosa.feature.mfcc(x, sr=sr)
print(mfccs.shape)
#Displaying  the MFCCs:
librosa.display.specshow(mfccs, sr=sr, x_axis='time')
plt.axis('off')

In [None]:
#Feature Scaling
# We can also perform feature scaling such that each coefficient dimension has zero mean and unit variance:
import sklearn
mfccs = sklearn.preprocessing.scale(mfccs, axis=1)
print(mfccs.mean(axis=1))
print(mfccs.var(axis=1))
plt.axis('off')
librosa.display.specshow(mfccs, sr=sr, x_axis='time')

In [None]:
#Chroma frequencies/Chromagram
hop_length = 512
chromagram = librosa.feature.chroma_stft(x, sr=sr, hop_length=hop_length)
plt.figure(figsize=(15, 5))
plt.axis('off')
librosa.display.specshow(chromagram, x_axis='time', y_axis='chroma', hop_length=hop_length, cmap='coolwarm')

In [None]:
import librosa
import matplotlib.pyplot as plt
import librosa.display

audio_files = glob.glob('wav/*')
#audio = audio_files[0]

for audio in audio_files:
    #default sampling rate(sr) of 22KHZ mono
    #sr=44100 for resampling
    #load audio
    x , sr = librosa.load(audio,sr=None)
    #print(type(x), type(sr),x.shape, sr)

    %matplotlib inline
    #a6
    plt.figure(figsize=(14, 5))
    mfccs = librosa.feature.mfcc(x, sr=sr)
    #print(mfccs.shape)
    #Displaying  the MFCCs:
    librosa.display.specshow(mfccs, sr=sr, x_axis='time')
    plt.axis('off')
    out = get_specfilename(audio,'a6/')
    #out = 'a6/a6.png'
    plt.savefig(out)
    
    
    print(audio,len(glob.glob('a1/*')),len(glob.glob('a2/*')),len(glob.glob('a3/*')),len(glob.glob('a4/*')),
         len(glob.glob('a5/*')),len(glob.glob('a6/*')),len(glob.glob('a7/*')),len(glob.glob('a8/*')))

In [None]:
import glob
print(len(glob.glob('a1/*')),len(glob.glob('a2/*')),len(glob.glob('a3/*')),len(glob.glob('a4/*')),
         len(glob.glob('a5/*')),len(glob.glob('a6/*')),len(glob.glob('a7/*')),len(glob.glob('a8/*')))

In [None]:
ls a1/* a2/* a3/* a4/* a5/* a6/* a7/* a8/*

In [None]:
import glob as glob
import pandas as pd
cats = []
audio_files = glob.glob('wav/*')
for file in audio_files:
    cats.append(file.split('/')[1].split('_')[0])

df = pd.DataFrame(cats)
print('Unique classes : '+str(df[0].nunique()))
print(df[0].value_counts())
print(len(df[0].unique()))

In [None]:
rm a2/* a3/*

In [None]:
rm a1/* a2/* a3/* a4/* a5/* a6/* a7/* a8/*

In [None]:
    # a1 
    plt.figure(figsize=(14, 5))
    plt.axis('off')
    librosa.display.waveplot(x, sr=sr)
    # save as PNG
    out = get_specfilename(audio,'a1/')
    #out = 'a1/'+audio+''.png
    plt.savefig(out)
    #a2
    X = librosa.stft(x)
    Xdb = librosa.amplitude_to_db(abs(X))
    plt.figure(figsize=(14, 5))
    librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
    plt.axis('off')
    out = get_specfilename(audio,'a2/')
    #out = 'a2/a2.png'
    plt.savefig(out)
    #a3
    plt.figure(figsize=(14, 5))
    librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='log')
    plt.axis('off')
    out = get_specfilename(audio,'a3/')
    #out = 'a3/a3.png'
    plt.savefig(out)
    #a4
    import sklearn
    plt.figure(figsize=(14, 5))
    spectral_centroids = librosa.feature.spectral_centroid(x, sr=sr)[0]
    spectral_centroids.shape
    (775,)
    # Computing the time variable for visualization
    frames = range(len(spectral_centroids))
    t = librosa.frames_to_time(frames)
    # Normalising the spectral centroid for visualisation
    def normalize(x, axis=0):
        return sklearn.preprocessing.minmax_scale(x, axis=axis)
    #Plotting the Spectral Centroid along the waveform
    librosa.display.waveplot(x, sr=sr, alpha=0.4)
    plt.axis('off')
    plt.plot(t, normalize(spectral_centroids), color='r')
    out = get_specfilename(audio,'a4/')
    #out = 'a4/a4.png'
    plt.savefig(out)
    #a5
    plt.figure(figsize=(10, 5))
    spectral_rolloff = librosa.feature.spectral_rolloff(x+0.01, sr=sr)[0]
    librosa.display.waveplot(x, sr=sr, alpha=0.4)
    plt.axis('off')
    plt.plot(t, normalize(spectral_rolloff), color='r')
    out = get_specfilename(audio,'a5/')
    #out = 'a5/a5.png'
    plt.savefig(out)
    #a6
    plt.figure(figsize=(14, 5))
    mfccs = librosa.feature.mfcc(x, sr=sr)
    #print(mfccs.shape)
    #Displaying  the MFCCs:
    librosa.display.specshow(mfccs, sr=sr, x_axis='time')
    plt.axis('off')
    out = get_specfilename(audio,'a6/')
    #out = 'a6/a6.png'
    plt.savefig(out)
    #a7
    plt.figure(figsize=(14, 5))
    mfccs = sklearn.preprocessing.scale(mfccs, axis=1)
    #print(mfccs.mean(axis=1))
    #print(mfccs.var(axis=1))
    plt.axis('off')
    librosa.display.specshow(mfccs, sr=sr, x_axis='time')
    out = get_specfilename(audio,'a7/')
    #out = 'a7/a7.png'
    plt.savefig(out)
    #a8
    hop_length = 512
    chromagram = librosa.feature.chroma_stft(x, sr=sr, hop_length=hop_length)
    plt.figure(figsize=(14, 5))
    plt.axis('off')
    librosa.display.specshow(chromagram, x_axis='time', y_axis='chroma', hop_length=hop_length, cmap='coolwarm')
    out = get_specfilename(audio,'a8/')
    #out = 'a8/a8.png'
    plt.savefig(out)