From 896f0d213441ffbabca467f079d35ae2f28be82b Mon Sep 17 00:00:00 2001
From: "E. G. Patrick Bos" <egpbos@gmail.com>
Date: Mon, 17 May 2021 20:46:23 +0200
Subject: [PATCH] fix flake8 layout warnings

---
 platalea/asr.py                              |  2 +
 platalea/attention.py                        |  3 +-
 platalea/audio/features.py                   | 51 ++++++++------
 platalea/audio/filters.py                    | 35 ++++-----
 platalea/audio/melfreq.py                    | 14 ++--
 platalea/audio/preproc.py                    | 74 +++++++++++---------
 platalea/basicvq.py                          | 25 ++++---
 platalea/dataset.py                          |  1 -
 platalea/encoders.py                         | 11 +--
 platalea/experiments/flickr8k/pip_seq.py     |  2 +-
 platalea/experiments/flickr8k/transformer.py |  8 ++-
 platalea/fix_json.py                         | 20 +++---
 platalea/hardware.py                         |  6 +-
 platalea/ipa.py                              |  9 +--
 platalea/loss.py                             |  8 ++-
 platalea/optimizers.py                       |  2 +-
 platalea/rank_eval.py                        |  6 +-
 platalea/schedulers.py                       |  2 +-
 platalea/score.py                            |  8 +--
 platalea/text_image.py                       |  1 +
 platalea/utils/copy_best.py                  |  6 +-
 platalea/utils/evaluate_net.py               |  2 +
 platalea/utils/extract_transcriptions.py     |  2 +
 platalea/utils/flickr8k_filter_metadata.py   |  3 +
 platalea/vq.py                               |  8 +--
 platalea/vq_encode.py                        | 10 +--
 platalea/xer.py                              |  3 -
 setup.py                                     |  4 +-
 tests/test_experiments.py                    |  3 +-
 29 files changed, 182 insertions(+), 147 deletions(-)

diff --git a/platalea/asr.py b/platalea/asr.py
index b25af66..ebef59e 100644
--- a/platalea/asr.py
+++ b/platalea/asr.py
@@ -82,6 +82,7 @@ def cost(self, item):
 
 def experiment(net, data, config, slt=False):
     _device = platalea.hardware.device()
+
     def val_loss():
         with torch.no_grad():
             net.eval()
@@ -154,6 +155,7 @@ def val_loss():
         torch.save(net, 'net.best.pt')
     return results
 
+
 def get_default_config(hidden_size_factor=1024):
     fd = D.Flickr8KData
     hidden_size = hidden_size_factor * 3 // 4
diff --git a/platalea/attention.py b/platalea/attention.py
index 8534940..abfa352 100644
--- a/platalea/attention.py
+++ b/platalea/attention.py
@@ -18,6 +18,7 @@ def forward(self, input):
         # return the resulting embedding
         return x
 
+
 class MeanPool(nn.Module):
     def __init__(self):
         super(MeanPool, self).__init__()
@@ -75,7 +76,7 @@ def __init__(self, in_size_enc, in_size_state, hidden_size):
         self.U_a = nn.Linear(in_size_enc, hidden_size, bias=False)
         self.W_a = nn.Linear(in_size_state, hidden_size, bias=False)
         self.v_a = nn.Linear(hidden_size, 1, bias=True)
-        self.prev_enc_out= None
+        self.prev_enc_out = None
 
     def forward(self, hidden, encoder_outputs):
         # Calculate energies for each encoder output
diff --git a/platalea/audio/features.py b/platalea/audio/features.py
index b7c6a59..d6eb04c 100644
--- a/platalea/audio/features.py
+++ b/platalea/audio/features.py
@@ -5,38 +5,40 @@
 
 @author: danny
 """
-from platalea.audio.preproc import four,pad,preemph, hamming, notch
-from platalea.audio.filters import apply_filterbanks,filter_centers, create_filterbanks
+from platalea.audio.preproc import four, pad, preemph, hamming, notch
+from platalea.audio.filters import apply_filterbanks, filter_centers, create_filterbanks
 from scipy.fftpack import dct
 import numpy
 import math
 
 # this file contains the main bulk of the actuall feature creation functions
 
-def delta (data, N):
-# calculate delta features, n is the number of frames to look forward and backward
+
+def delta(data, N):
+    # calculate delta features, n is the number of frames to look forward and backward
 
     # create a delta array of the right shape
     dt = numpy.zeros(data.shape)
     # pad data with first and last frame for size of n
-    for n in range (N):
-        data = numpy.row_stack((data[0,:],data, data[-1,:]))
+    for n in range(N):
+        data = numpy.row_stack((data[0, :], data, data[-1, :]))
     # calc n*c[x+n] + c[x-n] for n in Nand sum them
-    for n in range (1, N + 1):
-       dt += numpy.array([n * (data[x+n,:] - data[x-n,:]) for x  in range (N, len(data) - N)])
+    for n in range(1, N + 1):
+        dt += numpy.array([n * (data[x+n, :] - data[x-n, :]) for x in range(N, len(data) - N)])
     # normalise the deltas for the size of N
-    normalise = 2* sum([numpy.power(x,2) for x in range (1, N+1)])
+    normalise = 2 * sum([numpy.power(x, 2) for x in range(1, N+1)])
 
     dt = dt/normalise
 
     return (dt)
 
+
 def raw_frames(data, frame_shift, window_size):
-# this function cuts the data into frames and calculates each frames' accuracy
+    # this function cuts the data into frames and calculates each frames' accuracy
 
-    #determine the number of frames to be extracted
+    # determine the number of frames to be extracted
     nframes = math.floor(data.size/frame_shift)
-    #apply notch filter
+    # apply notch filter
     notched_data = notch(data)
     # pad the data
     data = pad(notched_data, window_size, frame_shift)
@@ -46,8 +48,8 @@ def raw_frames(data, frame_shift, window_size):
     frames = []
     energy = []
 
-    for f in range (0, nframes):
-        frame = data[f * frame_shift : f * frame_shift + window_size]
+    for f in range(0, nframes):
+        frame = data[(f * frame_shift):(f * frame_shift + window_size)]
         energy.append(numpy.log(numpy.sum(numpy.square(frame), 0)))
         frames.append(frame)
 
@@ -59,27 +61,29 @@ def raw_frames(data, frame_shift, window_size):
 
     return (frames, energy)
 
+
 def get_freqspectrum(frames, alpha, fs, window_size):
-# this function prepares the raw frames for conversion to frequency spectrum
-# and applies fft
+    # this function prepares the raw frames for conversion to frequency spectrum
+    # and applies fft
 
     # apply preemphasis
     frames = preemph(frames, alpha)
     # apply hamming windowing
     frames = hamming(frames)
     # apply fft
-    freq_spectrum = four(frames,fs,window_size)
+    freq_spectrum = four(frames, fs, window_size)
 
     return freq_spectrum
 
+
 def get_fbanks(freq_spectrum, nfilters, fs):
-#  this function calculates the filters and creates filterbank features from
-#  the fft features
+    #  this function calculates the filters and creates filterbank features from
+    #  the fft features
 
     # get the frequencies corresponding to the bins returned by the fft
     xf = numpy.linspace(0.0, fs/2, numpy.shape(freq_spectrum)[1])
     # get the filter frequencies
-    fc = filter_centers (nfilters,fs,xf)
+    fc = filter_centers(nfilters, fs, xf)
     # create filterbanks
     filterbanks = create_filterbanks(nfilters, xf, fc)
     # apply filterbanks
@@ -87,15 +91,16 @@ def get_fbanks(freq_spectrum, nfilters, fs):
 
     return fbanks
 
+
 def get_mfcc(fbanks):
-# this function creates mfccs from the fbank features
+    # this function creates mfccs from the fbank features
 
     # apply discrete cosine transform to get mfccs. According to convention,
     # we discard the first filterbank (which is roughly equal to the method
     # where we only space filters from 1000hz onwards)
-    mfcc = dct(fbanks[:,1:])
+    mfcc = dct(fbanks[:, 1:])
     # discard the first coefficient of the mffc as well and take the next 13
     # coefficients.
-    mfcc = mfcc[:,1:13]
+    mfcc = mfcc[:, 1:13]
 
     return mfcc
diff --git a/platalea/audio/filters.py b/platalea/audio/filters.py
index e95572f..22b5680 100644
--- a/platalea/audio/filters.py
+++ b/platalea/audio/filters.py
@@ -9,16 +9,17 @@
 from platalea.audio.melfreq import freq2mel, mel2freq
 import numpy
 
-def create_filterbanks (nfilters,freqrange,fc):
+
+def create_filterbanks(nfilters, freqrange, fc):
     # function to create filter banks. takes as input
     # the number of filters to be created, the frequency range and the
     # filter centers
     filterbank = []
     # for the desired # of filters do
-    for n in range (0,nfilters):
+    for n in range(0, nfilters):
         # set the begin center and end frequency of the filters
         begin = fc[n]
-        center= fc[n+1]
+        center = fc[n+1]
         end = fc[n+2]
         f = []
         # create triangular filters
@@ -26,7 +27,7 @@ def create_filterbanks (nfilters,freqrange,fc):
             # 0 for f outside the filter
             if x < begin:
                 f.append(0)
-            #increasing to 1 towards the center
+            # increasing to 1 towards the center
             elif begin <= x and x <= center:
                 f.append((x-begin)/(center-begin))
             # decreasing to 0 upwards from the center
@@ -36,27 +37,29 @@ def create_filterbanks (nfilters,freqrange,fc):
             elif x > end:
                 f.append(0)
         filterbank.append(f)
-        
+
     return filterbank
-    
+
+
 def filter_centers(nfilters, fs, xf):
     # calculates the center frequencies for the mel filters
-    
-    #space the filters equally in mels
+
+    # space the filters equally in mels
     spacing = numpy.linspace(0, freq2mel(fs/2), nfilters+2)
-    #back from mels to frequency
+    # back from mels to frequency
     spacing = mel2freq(spacing)
     # round the filter frequencies to the nearest availlable fft bin frequencies
-    # and return the centers for the filters.  
-    filters = [xf[numpy.argmin(numpy.abs(xf-x))] for x in spacing]    
-    
+    # and return the centers for the filters.
+    filters = [xf[numpy.argmin(numpy.abs(xf-x))] for x in spacing]
+
     return filters
-    
+
+
 def apply_filterbanks(data, filters):
     # function to apply the filterbanks and take the log of the filterbanks
-    filtered_freq = numpy.log(numpy.dot(data, numpy.transpose(filters)))  
+    filtered_freq = numpy.log(numpy.dot(data, numpy.transpose(filters)))
     # same as with energy, taking the log of a filter bank with 0 power results in -inf
     # we approximate 0 power with -50 the log of 2e-22
-    filtered_freq[filtered_freq == numpy.log(0)] = -50     
-    
+    filtered_freq[filtered_freq == numpy.log(0)] = -50
+
     return filtered_freq
diff --git a/platalea/audio/melfreq.py b/platalea/audio/melfreq.py
index bf2179f..4401bde 100644
--- a/platalea/audio/melfreq.py
+++ b/platalea/audio/melfreq.py
@@ -6,14 +6,16 @@
 @author: danny
 """
 import numpy
-#provides simple functions to convert a frequency to mel and vice versa
+# provides simple functions to convert a frequency to mel and vice versa
+
 
 def freq2mel(f):
-    #converts a frequency to mel
-    mel=1125*numpy.log(1+f/700)
+    # converts a frequency to mel
+    mel = 1125*numpy.log(1+f/700)
     return (mel)
 
+
 def mel2freq(m):
-    #converts mel to frequency
-    f=700*(numpy.exp(m/1125)-1)
-    return (f)
\ No newline at end of file
+    # converts mel to frequency
+    f = 700*(numpy.exp(m/1125)-1)
+    return f
diff --git a/platalea/audio/preproc.py b/platalea/audio/preproc.py
index aea6876..d60c4cf 100644
--- a/platalea/audio/preproc.py
+++ b/platalea/audio/preproc.py
@@ -10,63 +10,67 @@
 import numpy
 # provides some basic preprocessing functions for audio files, such as
 # padding the frames, hammingwindow for the  frames, data preemphasis and fourrier
-# transform 
+# transform
 
 
 def four(frames, fs, windowsize):
-   # fft works on frames of size 2^x, first find the appropriate padsize for 
-   # our framesize.
-   exp = 1
-   while True:
-       if numpy.power(2,exp) - windowsize >= 0:
-           padsize= numpy.power(2,exp) - windowsize
-           break
-       else:
-           exp += 1
-   # pad frames to be of size 2^x        
-   frames = numpy.pad(frames, [(0,0), (0,padsize)], 'constant', constant_values = 0)
-   # set cutoff at the half the frame size (+1 to keep the bin around 
-   # which the spectrum is mirrored)
-   cutoff = int((windowsize+padsize)/2)+1
-   # perform fast fourier transform
-   Y = fft(frames)    
-   # take absolute power and collapse spectrum. Normalise the power for the
-   # amount of bins but multiply by 2 to make up for the collapse of the spectrum
-   Yamp = 2/(windowsize+padsize)* numpy.abs(Y[:, 0:cutoff])
-   # first amp (dc component) and nyquist freq bin are not to be doubled (as they
-   # are not mirrored in the fft)
-   Yamp[:,0] = Yamp[:,0]/2
-   Yamp[:,-1] = Yamp[:,-1]/2
-   return (Yamp)
+    # fft works on frames of size 2^x, first find the appropriate padsize for
+    # our framesize.
+    exp = 1
+    while True:
+        if numpy.power(2, exp) - windowsize >= 0:
+            padsize = numpy.power(2, exp) - windowsize
+            break
+        else:
+            exp += 1
+    # pad frames to be of size 2^x
+    frames = numpy.pad(frames, [(0, 0), (0, padsize)], 'constant', constant_values=0)
+    # set cutoff at the half the frame size (+1 to keep the bin around
+    # which the spectrum is mirrored)
+    cutoff = int((windowsize+padsize)/2)+1
+    # perform fast fourier transform
+    Y = fft(frames)
+    # take absolute power and collapse spectrum. Normalise the power for the
+    # amount of bins but multiply by 2 to make up for the collapse of the spectrum
+    Yamp = 2/(windowsize+padsize) * numpy.abs(Y[:, 0:cutoff])
+    # first amp (dc component) and nyquist freq bin are not to be doubled (as they
+    # are not mirrored in the fft)
+    Yamp[:, 0] = Yamp[:, 0]/2
+    Yamp[:, -1] = Yamp[:, -1]/2
+    return (Yamp)
+
 
 def notch(data):
-# apply a notch filter to remove the DC offset
+    # apply a notch filter to remove the DC offset
     b, a = iirnotch(0.001, 3.5)
     notched = lfilter(b, a, data)
     return notched
-    
-def pad (data,window_size, frame_shift):
+
+
+def pad(data, window_size, frame_shift):
     # function to pad the audio file to fit the frameshift
     context_size = (window_size-frame_shift)/2
-    pad_size = context_size - numpy.mod(data.size, frame_shift) 
+    pad_size = context_size - numpy.mod(data.size, frame_shift)
     # if needed add padding to the end of the data
     if pad_size > 0:
         data = numpy.append(data, numpy.zeros(int(numpy.ceil(pad_size))))
-    #always add padding to the front of the data
+    # always add padding to the front of the data
     data = numpy.append(numpy.zeros(int(context_size)), data)
     return(data)
-  
+
+
 def preemph(data, alpha):
     # preemphasises the data: x(preemph) = X(t) - X(t-1)*alpha
     xt = data
     xtminus1 = data*alpha
-    xtminus1 = numpy.insert(xtminus1,0,0,1)[:,:-1]
-    data_preemph = xt-xtminus1  
+    xtminus1 = numpy.insert(xtminus1, 0, 0, 1)[:, :-1]
+    data_preemph = xt-xtminus1
     return data_preemph
-    
+
+
 def hamming(data):
     # apply hamming windowing to a frame of data
     L = numpy.shape(data)[1]
     hammingwindow = 0.54-(0.46*numpy.cos(2*numpy.pi*numpy.arange(L)/(L-1)))
-    data = numpy.multiply(data,hammingwindow)
+    data = numpy.multiply(data, hammingwindow)
     return data
diff --git a/platalea/basicvq.py b/platalea/basicvq.py
index 259607e..c749455 100644
--- a/platalea/basicvq.py
+++ b/platalea/basicvq.py
@@ -12,6 +12,7 @@
 from platalea.optimizers import create_optimizer
 from platalea.schedulers import create_scheduler
 
+
 class SpeechImage(nn.Module):
     def __init__(self, config):
         super(SpeechImage, self).__init__()
@@ -27,7 +28,7 @@ def cost(self, item):
         speech_enc = self.SpeechEncoder(item['audio'], item['audio_len'])
         image_enc = self.ImageEncoder(item['image'])
         scores = platalea.loss.cosine_matrix(speech_enc, image_enc)
-        loss =  platalea.loss.contrastive(scores, margin=self.config['margin_size'])
+        loss = platalea.loss.contrastive(scores, margin=self.config['margin_size'])
         return loss
 
     def embed_image(self, images):
@@ -49,8 +50,8 @@ def embed_audio(self, audios):
             audio_e.append(self.SpeechEncoder(a.cuda(), l.cuda()).detach().cpu().numpy())
         audio_e = np.concatenate(audio_e)
         return audio_e
-    
-    def code_audio(self, audios, one_hot=False): #FIXME messed up sized ETC
+
+    def code_audio(self, audios, one_hot=False):  # FIXME messed up sized ETC
         audio = torch.utils.data.DataLoader(dataset=audios, batch_size=32,
                                             shuffle=False,
                                             collate_fn=D.batch_audio)
@@ -87,14 +88,14 @@ def val_loss():
     with open("result.json", "w") as out:
         for epoch in range(1, config['epochs']+1):
             cost = Counter()
-            for j, item in enumerate(data['train'], start=1): # check reshuffling
+            for j, item in enumerate(data['train'], start=1):  # check reshuffling
                 item = {key: value.cuda() for key, value in item.items()}
                 loss = net.cost(item)
                 optimizer.zero_grad()
                 loss.backward()
                 optimizer.step()
                 scheduler.step()
-                cost += Counter({'cost': loss.item(), 'N':1})
+                cost += Counter({'cost': loss.item(), 'N': 1})
                 average_loss = cost['cost'] / cost['N']
                 if j % 100 == 0:
                     logging.info("train {} {} {}".format(epoch, j, average_loss))
@@ -109,12 +110,14 @@ def val_loss():
             torch.save(net, "net.{}.pt".format(epoch))
     return results
 
-DEFAULT_CONFIG = dict(SpeechEncoder=dict(SpeechEncoderBottom=dict(conv=dict(in_channels=39, out_channels=64, kernel_size=6, stride=2, padding=0, bias=False),
-                                                                  rnn= dict(input_size=64, hidden_size=1024, num_layers=2,
-                                                                            bidirectional=True, dropout=0)),
+
+DEFAULT_CONFIG = dict(SpeechEncoder=dict(SpeechEncoderBottom=dict(conv=dict(in_channels=39, out_channels=64, kernel_size=6,
+                                                                            stride=2, padding=0, bias=False),
+                                                                  rnn=dict(input_size=64, hidden_size=1024, num_layers=2,
+                                                                           bidirectional=True, dropout=0)),
                                          VQEmbedding=dict(num_codebook_embeddings=256, embedding_dim=1024, jitter=0.12),
-                                         SpeechEncoderTop=dict(rnn= dict(input_size=64, hidden_size=1024, num_layers=2,
-                                                                         bidirectional=True, dropout=0),
-                                                               att= dict(in_size=2048, hidden_size=128))),
+                                         SpeechEncoderTop=dict(rnn=dict(input_size=64, hidden_size=1024, num_layers=2,
+                                                                        bidirectional=True, dropout=0),
+                                                               att=dict(in_size=2048, hidden_size=128))),
                       ImageEncoder=dict(linear=dict(in_size=2048, out_size=2*1024), norm=True),
                       margin_size=0.2)
diff --git a/platalea/dataset.py b/platalea/dataset.py
index 8a9646b..47c2ae8 100644
--- a/platalea/dataset.py
+++ b/platalea/dataset.py
@@ -1,5 +1,4 @@
 import json
-import logging
 import numpy as np
 import pathlib
 import pickle
diff --git a/platalea/encoders.py b/platalea/encoders.py
index cf614ce..e66ecc0 100644
--- a/platalea/encoders.py
+++ b/platalea/encoders.py
@@ -431,7 +431,6 @@ def forward(self, x):
         # Expecting packed sequence
         if self.RNN is not None:
             x, _ = self.RNN(x)
-        #x, _ = nn.utils.rnn.pad_packed_sequence(x, batch_first=True)
         return x
 
     def introspect(self, input, length):
@@ -526,7 +525,8 @@ class SpeechEncoderVQ(nn.Module):
     def __init__(self, config):
         super(SpeechEncoderVQ, self).__init__()
         self.Bottom = SpeechEncoderBottom(config['SpeechEncoderBottom'])
-        self.Codebook = VQEmbeddingEMA(config['VQEmbedding']['num_codebook_embeddings'], config['VQEmbedding']['embedding_dim'], jitter=config['VQEmbedding']['jitter'])
+        self.Codebook = VQEmbeddingEMA(config['VQEmbedding']['num_codebook_embeddings'],
+                                       config['VQEmbedding']['embedding_dim'], jitter=config['VQEmbedding']['jitter'])
         self.Top = SpeechEncoderTop(config['SpeechEncoderTop'])
 
     def forward(self, input, length):
@@ -549,13 +549,14 @@ class SpeechEncoderVQ2(nn.Module):
     def __init__(self, config):
         super(SpeechEncoderVQ2, self).__init__()
         self.Bottom = SpeechEncoderBottom(config['SpeechEncoderBottom'])
-        self.Codebook1 = VQEmbeddingEMA(config['VQEmbedding1']['num_codebook_embeddings'], config['VQEmbedding1']['embedding_dim'], jitter=config['VQEmbedding1']['jitter'])
+        self.Codebook1 = VQEmbeddingEMA(config['VQEmbedding1']['num_codebook_embeddings'],
+                                        config['VQEmbedding1']['embedding_dim'], jitter=config['VQEmbedding1']['jitter'])
         self.Middle = SpeechEncoderMiddle(config['SpeechEncoderMiddle'])
-        self.Codebook2 = VQEmbeddingEMA(config['VQEmbedding2']['num_codebook_embeddings'], config['VQEmbedding2']['embedding_dim'], jitter=config['VQEmbedding2']['jitter'])
+        self.Codebook2 = VQEmbeddingEMA(config['VQEmbedding2']['num_codebook_embeddings'],
+                                        config['VQEmbedding2']['embedding_dim'], jitter=config['VQEmbedding2']['jitter'])
         self.Top = SpeechEncoderTop(config['SpeechEncoderTop'])
 
     def forward(self, input, length):
-        #return self.Top(self.Codebook(self.Bottom(input, length))['quantized'])
         return self.Top(self.Codebook2(self.Middle(self.Codebook1(self.Bottom(input, length))['quantized']))['quantized'])
 
     def introspect(self, input, length):
diff --git a/platalea/experiments/flickr8k/pip_seq.py b/platalea/experiments/flickr8k/pip_seq.py
index 0553c0f..635d65a 100644
--- a/platalea/experiments/flickr8k/pip_seq.py
+++ b/platalea/experiments/flickr8k/pip_seq.py
@@ -12,7 +12,7 @@
 from platalea.experiments.config import get_argument_parser
 
 
-args = get_argument_parser()# import cProfile
+args = get_argument_parser()
 
 # Parsing arguments
 args.add_argument(
diff --git a/platalea/experiments/flickr8k/transformer.py b/platalea/experiments/flickr8k/transformer.py
index 2e4f14e..9872f40 100644
--- a/platalea/experiments/flickr8k/transformer.py
+++ b/platalea/experiments/flickr8k/transformer.py
@@ -9,7 +9,7 @@
 from platalea.experiments.config import get_argument_parser
 
 
-args = get_argument_parser()# Parsing arguments
+args = get_argument_parser()  # Parsing arguments
 args.add_argument('--batch_size', default=32, type=int,
                   help='How many samples per batch to load.')
 args.add_argument('--conv_stride', default=2, type=int,
@@ -23,6 +23,7 @@
 args.add_argument('--trafo_feedforward_dim', default=1024, type=int,
                   help='TRANSFORMER: Dimensionality of feedforward layer at the end of the transformer layer stack.')
 
+
 class unit_float(float):
     def __new__(cls, value):
         value = float(value)
@@ -31,6 +32,7 @@ def __new__(cls, value):
         else:
             raise ValueError(f"{value} is not a proper unit_float, because it is not between 0 and 1")
 
+
 args.add_argument('--trafo_dropout', default=0, type=unit_float,
                   help='TRANSFORMER: Dropout factor, used for regularization.')
 
@@ -50,11 +52,11 @@ def __new__(cls, value):
 data = dict(
     train=D.flickr8k_loader(
         args.flickr8k_root, args.flickr8k_meta, args.flickr8k_language,
-                            args.audio_features_fn, split='train', batch_size=args.batch_size, shuffle=True,
+        args.audio_features_fn, split='train', batch_size=args.batch_size, shuffle=True,
         downsampling_factor=args.downsampling_factor),
     val=D.flickr8k_loader(
         args.flickr8k_root, args.flickr8k_meta, args.flickr8k_language,
-                          args.audio_features_fn, split='val', batch_size=args.batch_size, shuffle=False)
+        args.audio_features_fn, split='val', batch_size=args.batch_size, shuffle=False)
 )
 
 
diff --git a/platalea/fix_json.py b/platalea/fix_json.py
index 2cc9dc9..4a03c84 100644
--- a/platalea/fix_json.py
+++ b/platalea/fix_json.py
@@ -7,25 +7,27 @@
 
 logging.basicConfig(level=logging.INFO)
 
+
 def fix():
     paths = glob.glob("experiments/*/result.json")
     for path in paths:
         logging.info("Fixing {}".format(path))
         copyfile(path, path + ".orig")
         with open(path, 'w') as out:
-            data = [ eval(line) for line in open(path + ".orig") ]
+            data = [eval(line) for line in open(path + ".orig")]
             for datum in data:
                 print(json.dumps(datum), file=out)
-                
+
 
 def load_results():
-    tables = [] 
-    for file in glob.glob("experiments/vq*/result.json"): 
-        data = [ flat(json.loads(line)) for line in open(file) ] 
-        table = pd.read_json(io.StringIO(json.dumps(data)), orient='records') 
-        table['path']=file 
-        tables.append(table) 
-    return tables 
+    tables = []
+    for file in glob.glob("experiments/vq*/result.json"):
+        data = [flat(json.loads(line)) for line in open(file)]
+        table = pd.read_json(io.StringIO(json.dumps(data)), orient='records')
+        table['path'] = file
+        tables.append(table)
+    return tables
+
 
 def flat(rec):
     return dict(epoch=rec['epoch'],
diff --git a/platalea/hardware.py b/platalea/hardware.py
index d0e5f57..303a78f 100644
--- a/platalea/hardware.py
+++ b/platalea/hardware.py
@@ -4,20 +4,22 @@
 
 _device = None
 
+
 def set_device(device: Optional[str]):
     global _device
     _device = device
 
+
 def device(ordinal: Optional[int] = None):
     """Return a device.
-    
+
     By default, if available, it returns a GPU id string. Optionally, the user
     can specify the ordinal identifying a specific GPU. If GPUs are not available,
     it will return a CPU string.
 
     It is also possible to use set_device to set a custom device string. If set,
     i.e. if not None (the default value), this value is used.
-    
+
     This function can only be used by models that run on a single device.
     """
     global _device
diff --git a/platalea/ipa.py b/platalea/ipa.py
index d2a8955..3975f1f 100644
--- a/platalea/ipa.py
+++ b/platalea/ipa.py
@@ -1,5 +1,5 @@
 import logging
-PHONEMES="""arpabet	ipa	class
+PHONEMES = """arpabet	ipa	class
 aa	ɑ	vowel
 ae	æ	vowel
 ah	ə	vowel
@@ -40,21 +40,22 @@
 z	z	fricative
 zh	ʒ	fricative"""
 
+
 def parseipa():
-    mapping =  {}
+    mapping = {}
     lines = PHONEMES.split("\n")
     for line in lines[1:]:
         arpa, ipa, _ = line.split()
         mapping[arpa] = ipa
     return mapping
 
+
 _arpa2ipa = parseipa()
 
+
 def arpa2ipa(arpa, default=None):
     try:
         return _arpa2ipa[arpa]
     except KeyError:
         logging.warning("Key not found: {}".format(arpa))
         return default
-    
-
diff --git a/platalea/loss.py b/platalea/loss.py
index 8a4890e..4af8912 100644
--- a/platalea/loss.py
+++ b/platalea/loss.py
@@ -1,14 +1,16 @@
 import torch
 
-def contrastive(M, margin=0.2):       
-    "Returns contrastive margin loss over similarity matrix M."     
+
+def contrastive(M, margin=0.2):
+    "Returns contrastive margin loss over similarity matrix M."
     E = - M
     D = torch.diag(E)
     C_c = torch.clamp(margin - E + D, min=0)
-    C_r = torch.clamp(margin - E + D.view(-1,1), min=0)
+    C_r = torch.clamp(margin - E + D.view(-1, 1), min=0)
     C = C_c + C_r
     return (C.sum() - torch.diag(C).sum())/C.size(0)**2
 
+
 def cosine_matrix(U, V):
     "Returns the matrix of cosine similarity between each row of U and each row of V."
     U_norm = U / U.norm(2, dim=1, keepdim=True)
diff --git a/platalea/optimizers.py b/platalea/optimizers.py
index 6cc5050..7adac53 100644
--- a/platalea/optimizers.py
+++ b/platalea/optimizers.py
@@ -13,4 +13,4 @@ def create_optimizer(config, net_parameters):
     else:
         optimizer = optim.Adam(net_parameters, lr=1, weight_decay=config['l2_regularization'])
     optimizer.zero_grad()
-    return optimizer
\ No newline at end of file
+    return optimizer
diff --git a/platalea/rank_eval.py b/platalea/rank_eval.py
index c763d35..4cc02dc 100644
--- a/platalea/rank_eval.py
+++ b/platalea/rank_eval.py
@@ -3,18 +3,20 @@
 import numpy
 from scipy.spatial.distance import cdist
 
+
 def cosine(x, y):
     return cdist(x, y, metric='cosine')
 
+
 def ranking(candidates, references, correct, metric=cosine, ns=(1, 5, 10)):
     """Rank `candidates` in order of similarity for each vector and return evaluation metrics.
 
     `correct[i][j]` indicates whether for reference item i the candidate j is correct.
     """
     distances = cdist(references, candidates)
-    result = {'ranks' : [] , 'recall' : {} }
+    result = {'ranks': [], 'recall': {}}
     for n in ns:
-        result['recall'][n]    = []
+        result['recall'][n] = []
     for j, row in enumerate(distances):
         ranked = numpy.argsort(row)
         id_correct = numpy.where(correct[j][ranked])[0]
diff --git a/platalea/schedulers.py b/platalea/schedulers.py
index d551d53..40d8a4b 100644
--- a/platalea/schedulers.py
+++ b/platalea/schedulers.py
@@ -66,7 +66,7 @@ def create_scheduler(config, optimizer, data):
 
     if configured_scheduler is None or configured_scheduler == 'cyclic':
         scheduler = cyclic(optimizer, len(data['train']), max_lr=config['max_lr'],
-                                               min_lr=config['min_lr'])
+                           min_lr=config['min_lr'])
     elif configured_scheduler == 'noam':
         scheduler = noam(optimizer, config['d_model'])
     elif configured_scheduler == 'constant':
diff --git a/platalea/score.py b/platalea/score.py
index fd6594f..3900f24 100644
--- a/platalea/score.py
+++ b/platalea/score.py
@@ -1,5 +1,4 @@
 import numpy as np
-import platalea.dataset as D
 import platalea.rank_eval as E
 import platalea.xer as xer
 import torch
@@ -14,7 +13,7 @@ def score(net, dataset):
     return dict(medr=np.median(result['ranks']),
                 recall={1: np.mean(result['recall'][1]),
                         5: np.mean(result['recall'][5]),
-                       10: np.mean(result['recall'][10])})
+                        10: np.mean(result['recall'][10])})
 
 
 def score_text_image(net, dataset):
@@ -26,7 +25,7 @@ def score_text_image(net, dataset):
     return dict(medr=np.median(result['ranks']),
                 recall={1: np.mean(result['recall'][1]),
                         5: np.mean(result['recall'][5]),
-                       10: np.mean(result['recall'][10])})
+                        10: np.mean(result['recall'][10])})
 
 
 def score_speech_text(net, dataset):
@@ -38,7 +37,7 @@ def score_speech_text(net, dataset):
     return dict(medr=np.median(result['ranks']),
                 recall={1: np.mean(result['recall'][1]),
                         5: np.mean(result['recall'][5]),
-                       10: np.mean(result['recall'][10])})
+                        10: np.mean(result['recall'][10])})
 
 
 def score_asr(net, dataset, beam_size=None):
@@ -57,6 +56,7 @@ def bleu_score(references, hypotheses):
         bleu[i] = sentence_bleu([references[i]], hypotheses[i])
     return bleu.mean()
 
+
 def score_slt(net, dataset, beam_size=None):
     data = dataset.evaluation()
     trn = net.transcribe(data['audio'], beam_size=beam_size)
diff --git a/platalea/text_image.py b/platalea/text_image.py
index 443bf67..936430c 100644
--- a/platalea/text_image.py
+++ b/platalea/text_image.py
@@ -64,6 +64,7 @@ def embed_text(self, texts):
 
 def experiment(net, data, config):
     _device = platalea.hardware.device()
+
     def val_loss():
         net.eval()
         result = []
diff --git a/platalea/utils/copy_best.py b/platalea/utils/copy_best.py
index 11a276f..f8e423c 100755
--- a/platalea/utils/copy_best.py
+++ b/platalea/utils/copy_best.py
@@ -40,17 +40,17 @@ def copy_best(exp_path=['.'], result_fname='result.json', save_fname='net.best.p
         'exp_path', help='Path to the experiment', default=['.'], nargs='*')
     parser.add_argument(
         '--result',
-        help='Path to the JSON file containing the results'\
+        help='Path to the JSON file containing the results'
         ' (default=result.json).',
         type=str, default='result.json')
     parser.add_argument(
         '--save',
-        help='Path where the corresponding net should be saved'\
+        help='Path where the corresponding net should be saved'
         ' (default=net.best.pt).',
         type=str, default='net.best.pt')
     parser.add_argument(
         '--experiment_type', dest='experiment_type',
-        help='Type of experiment. Determines which metric is used'\
+        help='Type of experiment. Determines which metric is used'
         ' (default=retrieval).',
         type=str, choices=['retrieval', 'asr', 'mtl', 'slt'],
         default='retrieval')
diff --git a/platalea/utils/evaluate_net.py b/platalea/utils/evaluate_net.py
index 9cfb649..3074cc8 100755
--- a/platalea/utils/evaluate_net.py
+++ b/platalea/utils/evaluate_net.py
@@ -16,6 +16,8 @@
 
 
 args = get_argument_parser()
+
+
 def get_score_fn_speech_transcriber(is_slt, use_beam_decoding):
     if is_slt:
         score_fn = platalea.score.score_slt
diff --git a/platalea/utils/extract_transcriptions.py b/platalea/utils/extract_transcriptions.py
index e9f38a9..0c5d7db 100755
--- a/platalea/utils/extract_transcriptions.py
+++ b/platalea/utils/extract_transcriptions.py
@@ -8,6 +8,8 @@
 
 
 args = get_argument_parser()
+
+
 def extract_trn(net, dataset, use_beam_decoding=False):
     d = dataset.evaluation()
     ref = d['text']
diff --git a/platalea/utils/flickr8k_filter_metadata.py b/platalea/utils/flickr8k_filter_metadata.py
index f810ca2..4895e4a 100644
--- a/platalea/utils/flickr8k_filter_metadata.py
+++ b/platalea/utils/flickr8k_filter_metadata.py
@@ -2,6 +2,7 @@
 
 M = json.load(open('dataset_multilingual_human.json'))
 
+
 def filter(M):
     I = []
     for i, m in enumerate(M['images']):
@@ -16,6 +17,7 @@ def filter(M):
             I.append(m)
     M['images'] = I
 
+
 def count_sent(M):
     cntr = 0
     for m in M['images']:
@@ -23,6 +25,7 @@ def count_sent(M):
             cntr += 1
     return cntr
 
+
 filter(M)
 count_sent(M)
 json.dump(M, open('dataset_multilingual_human_only.json', 'w'))
diff --git a/platalea/vq.py b/platalea/vq.py
index 9d6ca4c..5585a06 100644
--- a/platalea/vq.py
+++ b/platalea/vq.py
@@ -5,6 +5,7 @@
 import torch.nn.functional as F
 from torch.distributions import Categorical
 
+
 class Jitter(nn.Module):
     def __init__(self, p):
         super().__init__()
@@ -26,6 +27,7 @@ def forward(self, x):
             x = torch.gather(x, 1, index.unsqueeze(-1).expand(-1, -1, channels))
         return x
 
+
 class VQEmbeddingEMA(nn.Module):
     def __init__(self, num_embeddings, embedding_dim, commitment_cost=0.25, decay=0.999, epsilon=1e-5, jitter=0.12):
         super(VQEmbeddingEMA, self).__init__()
@@ -40,25 +42,23 @@ def __init__(self, num_embeddings, embedding_dim, commitment_cost=0.25, decay=0.
         self.register_buffer("ema_count", torch.zeros(num_embeddings))
         self.register_buffer("ema_weight", self.embedding.clone())
         self.jitter = Jitter(jitter) if jitter > 0 else None
-        
+
     def forward(self, x):
         M, D = self.embedding.size()
         # unpack packed_sequence
         x, l = nn.utils.rnn.pad_packed_sequence(x, batch_first=True)
         x_flat = x.detach().reshape(-1, D)
-        
+
         distances = torch.addmm(torch.sum(self.embedding ** 2, dim=1) +
                                 torch.sum(x_flat ** 2, dim=1, keepdim=True),
                                 x_flat, self.embedding.t(),
                                 alpha=-2.0, beta=1.0)
-        #distances = ((self.embedding - x_flat.unsqueeze(dim=1))**2).sum(dim=2)
 
         indices = torch.argmin(distances.float(), dim=-1)
         encodings = F.one_hot(indices, M).float()
         quantized = F.embedding(indices, self.embedding)
         quantized = quantized.view_as(x)
 
-        
         if self.training:
             self.ema_count = self.decay * self.ema_count + (1 - self.decay) * torch.sum(encodings, dim=0)
 
diff --git a/platalea/vq_encode.py b/platalea/vq_encode.py
index 0764262..18c8367 100644
--- a/platalea/vq_encode.py
+++ b/platalea/vq_encode.py
@@ -6,14 +6,12 @@
 import os.path
 import numpy as np
 import torch
-from pathlib import Path
 
 config = dict(type='mfcc', delta=True, alpha=0.97, n_filters=40,  window_size=0.025, frame_shift=0.010)
 
 
-
 def encode(net, datadir, outdir):
-    paths = glob.glob(datadir +  "/*.wav")
+    paths = glob.glob(datadir + "/*.wav")
     assert len(paths) > 0
     try:
         feat = torch.load(datadir + "_audiofeat.pt")
@@ -29,9 +27,11 @@ def encode(net, datadir, outdir):
         out = outdir + '/' + filename + ".txt"
         assert code.shape[0] > 0
         np.savetxt(out, code.astype(int), fmt='%d')
-    
+
+
 def encode_zerospeech(net, outdir='.'):
-    encode(net, "/roaming/gchrupal/verdigris/platalea.vq/data/2020/2019/english/test/")    
+    encode(net, "/roaming/gchrupal/verdigris/platalea.vq/data/2020/2019/english/test/")
+
 
 def evaluate_zerospeech(net, outdir='.'):
     encode_zerospeech(net, outdir=outdir)
diff --git a/platalea/xer.py b/platalea/xer.py
index 12e5ea5..3e9c979 100644
--- a/platalea/xer.py
+++ b/platalea/xer.py
@@ -1,6 +1,3 @@
-
-
-
 def nbeditops(s1, s2):
     import Levenshtein as L
     d = 0
diff --git a/setup.py b/setup.py
index e336551..3f9eceb 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,5 @@
 # encoding: utf-8
-from setuptools import setup, find_packages
+from setuptools import setup
 
 setup(name='platalea',
       description='Understanding visually grounded spoken language via multi-tasking',
@@ -24,4 +24,4 @@
           'python-Levenshtein>=0.12.0'],
       use_scm_version=True,
       setup_requires=['setuptools_scm'],
-)
+      )
diff --git a/tests/test_experiments.py b/tests/test_experiments.py
index 6afd3b2..338fd06 100644
--- a/tests/test_experiments.py
+++ b/tests/test_experiments.py
@@ -17,7 +17,7 @@ def test_config():
 
         assert args.epochs == 2
         assert args.flickr8k_meta == 'thisandthat.json'
-        assert args.verbose == True
+        assert args.verbose
         assert args.lr_scheduler == 'noam'
 
 
@@ -179,7 +179,6 @@ def test_pip_ind_experiment():
     _assert_nested_almost_equal(result, expected)
 
 
-
 def test_pip_seq_experiment():
     expected = [{'medr': 1.5, 'recall': {1: 0.5, 5: 1.0, 10: 1.0},
                  'average_loss': 0.3918714001774788,