In [1]:
import tensorflow as tf
import numpy as np
import wave

from pathlib import Path

__signal_framerate = 16000

def get_labels(label_path):
  """Parses the data arff files to extract the labels 

  Args:
      label_path: A path glob which contains the arff files with the labels.
  Returns:
      A dictionary for the labels of each fold.
  """
  labels = {}
  class_names = None
  label_path = Path(label_path)
  print('Extracting labels from {}'.format(label_path))

  for path in label_path.parent.glob(label_path.name):
      portion = path.suffixes[-2][1:]
      print('Processing {}'.format(path))
  
      with open(str(path)) as f:
          gts = [np.array(l.strip().split(','))[[0, -1]] for l in f.readlines() if l[0] != '@' and 'wav' in l]
  
      if class_names is None:
          class_names = np.unique([g for _, g in gts])
  
      for name, class_name in gts:
  
          # No labels exist for this dataset.
          if '?' in class_name:
              print('No labels exist for the {} portion'.format(portion))
              break
  
          class_id = np.where(class_name == class_names)[0][0]
          labels.setdefault(portion, []).append((name.replace("'", ""), int(class_id)))

  return labels

def read_wave(path):
  """Reads a wav file and splits it in chunks of 40ms. 
  Pads with zeros if duration does not fit exactly the 40ms chunks.
  Assumptions: 
      A. Wave file has one channel.
      B. Frame rate of wav file is 16KHz.
  
  Args:
      wav_file: The name of the wav file.
  Returns:
      A data array, where each row corresponds to 40ms.
  """

  fp = wave.open(str(path))
  num_of_channels = fp.getnchannels()
  fps = fp.getframerate()
    
  if num_of_channels > 1:
    raise ValueError('The wav file should have 1 channel. [{}] found'.format(num_of_channels))

  if fps != __signal_framerate:
    raise ValueError('The wav file should have 16000 fps. [{}] found'.format(fps))

  chunk_size = 640 # 40ms if fps = 16k.

  num_frames = fp.getnframes()
  dstr = fp.readframes(num_frames * num_of_channels)
  data = np.fromstring(dstr, np.int16)
  audio = np.reshape(data, (-1))
  audio = audio / 2.**15 # Normalise audio data (int16).

  audio = np.pad(audio, (0, chunk_size - audio.shape[0] % chunk_size), 'constant')
  audio = audio.reshape(-1, chunk_size)

  return audio.astype(np.float32)


In [2]:
# X_tr=np.array()
# y_tr=np.array()
l=[]
l=get_labels('E:576/Project/ComParE2017_Addressee.ComParE.train.arff')

myarray = np.asarray(l['train'])
# q=myarray[:,1]

xs=[]
ys=[]
ks=[]

for j in range(3742):
    
    if j<9:
        p=read_wave('E:576/Project/wav/train_000'+str(j+1)+'.wav')
    elif 9<=j<99:
        p=read_wave('E:576/Project/wav/train_00'+str(j+1)+'.wav')
    elif 99<=j<999:
        p=read_wave('E:576/Project/wav/train_0'+str(j+1)+'.wav')
    else:
        p=read_wave('E:576/Project/wav/train_'+str(j+1)+'.wav')

    
    xs.append(p)
    X_tr = np.concatenate(xs)

    
    y=myarray[j,1]
    a=p.shape[0]
    q2=np.zeros((a,1))
    for i in range(a):
        
        q2[i,0]=y
        
    ys.append(q2)
    y_tr = np.concatenate(ys)
    
    q3=np.zeros((a,1))
    for i in range(a):
        q3[i,0]=j+1
    
    ks.append(q3)
    k_tr = np.concatenate(ks)
    
    print(y_tr.shape,j)

  
     

Extracting labels from E:576\Project\ComParE2017_Addressee.ComParE.train.arff
Processing E:576\Project\compare2017_addressee.compare.train.arff
(48, 1) 0
(74, 1) 1
(100, 1) 2
(142, 1) 3
(197, 1) 4
(262, 1) 5
(288, 1) 6
(323, 1) 7
(362, 1) 8
(388, 1) 9
(434, 1) 10
(593, 1) 11
(738, 1) 12
(762, 1) 13
(788, 1) 14
(814, 1) 15
(871, 1) 16
(913, 1) 17
(949, 1) 18
(969, 1) 19
(992, 1) 20
(1087, 1) 21
(1183, 1) 22
(1241, 1) 23
(1305, 1) 24
(1331, 1) 25
(1357, 1) 26
(1383, 1) 27
(1415, 1) 28
(1447, 1) 29
(1494, 1) 30
(1540, 1) 31
(1593, 1) 32
(1609, 1) 33
(1670, 1) 34
(1701, 1) 35
(1786, 1) 36
(1813, 1) 37
(1853, 1) 38
(1890, 1) 39
(1916, 1) 40
(1959, 1) 41
(2011, 1) 42
(2037, 1) 43
(2063, 1) 44
(2089, 1) 45
(2105, 1) 46
(2195, 1) 47
(2279, 1) 48
(2346, 1) 49
(2372, 1) 50
(2398, 1) 51
(2424, 1) 52
(2450, 1) 53
(2476, 1) 54
(2611, 1) 55
(2668, 1) 56
(2700, 1) 57
(2759, 1) 58
(2785, 1) 59
(2815, 1) 60
(2846, 1) 61
(2887, 1) 62
(2919, 1) 63
(2962, 1) 64
(3003, 1) 65
(3029, 1) 66
(3089, 1) 67
(3182

In [112]:
from scipy import stats
m = stats.mode(y_tr)
print(m)


ModeResult(mode=array([[ 1.]]), count=array([[88784]]))


In [11]:
# X_tr=np.array()
# y_tr=np.array()
l=[]
l=get_labels('E:576/Project/ComParE2017_Addressee.ComParE.devel.arff')

myarray = np.asarray(l['devel'])

q=myarray[:,1]

xs=[]
ys=[]
ks=[]

for j in range(3550):
    
    if j<9:
        p=read_wave('E:576/Project/wav/devel_000'+str(j+1)+'.wav')
    elif 9<=j<99:
        p=read_wave('E:576/Project/wav/devel_00'+str(j+1)+'.wav')
    elif 99<=j<999:
        p=read_wave('E:576/Project/wav/devel_0'+str(j+1)+'.wav')
    else:
        p=read_wave('E:576/Project/wav/devel_'+str(j+1)+'.wav')

    
    xs.append(p)
    X_dev = np.concatenate(xs)

    
    y=myarray[j,1]
    a=p.shape[0]
    q2=np.zeros((a,1))
    for i in range(a):
        
        q2[i,0]=y
    b
    ys.append(q2)
    y_dev = np.concatenate(ys)
    
    q3=np.zeros((a,1))
    for i in range(a):
        q3[i,0]=j+1
    
    ks.append(q3)
    k_dev = np.concatenate(ks)
    print(y_dev.shape,j,k_dev.shape)
  
     

Extracting labels from E:576\Project\ComParE2017_Addressee.ComParE.devel.arff
Processing E:576\Project\compare2017_addressee.compare.devel.arff
(27, 1) 0 (27, 1)
(61, 1) 1 (61, 1)
(149, 1) 2 (149, 1)
(165, 1) 3 (165, 1)
(220, 1) 4 (220, 1)
(246, 1) 5 (246, 1)
(290, 1) 6 (290, 1)
(324, 1) 7 (324, 1)
(373, 1) 8 (373, 1)
(456, 1) 9 (456, 1)
(550, 1) 10 (550, 1)
(576, 1) 11 (576, 1)
(606, 1) 12 (606, 1)
(643, 1) 13 (643, 1)
(672, 1) 14 (672, 1)
(727, 1) 15 (727, 1)
(816, 1) 16 (816, 1)
(842, 1) 17 (842, 1)
(868, 1) 18 (868, 1)
(922, 1) 19 (922, 1)
(1024, 1) 20 (1024, 1)
(1065, 1) 21 (1065, 1)
(1087, 1) 22 (1087, 1)
(1161, 1) 23 (1161, 1)
(1187, 1) 24 (1187, 1)
(1219, 1) 25 (1219, 1)
(1245, 1) 26 (1245, 1)
(1280, 1) 27 (1280, 1)
(1306, 1) 28 (1306, 1)
(1332, 1) 29 (1332, 1)
(1371, 1) 30 (1371, 1)
(1411, 1) 31 (1411, 1)
(1483, 1) 32 (1483, 1)
(1528, 1) 33 (1528, 1)
(1554, 1) 34 (1554, 1)
(1580, 1) 35 (1580, 1)
(1597, 1) 36 (1597, 1)
(1672, 1) 37 (1672, 1)
(1742, 1) 38 (1742, 1)
(1758, 1) 39 

In [33]:
most_freq=[]
ma=[]
new=np.zeros((141187,2))
from statistics import mode
exc_count=0
for j in range(3550):
    for i in range(141187):
        if k_dev[i]==j+1:
            try:
                new[i,0]=j+1
                ma.append(pred[i])
                max= mode(ma)
                new[i,1]=max
            except:
                new[i,0]=j+1
                new[i,1]=1
        else:
            ma=[]

In [40]:
#most_freq=[]
most_freq=np.zeros((3550,1)) #change 3 to 3550
ma=[]
from statistics import mode
exc_count=0
#to check the code have used first 3 audios and their 40 ms chunks of length 27,61 and 149 cumulative 
for j in range(3550): #3550
    for i in range(141187): #141187
        if k_dev[i]==j+1:
            try:
#                 print('This is i,j ',i,j)
                ma.append(pred[i]) 
#                 print('This is ma ',ma)
                max= mode(ma)
#                 print('This is mode ',max)
                most_freq[j,0]=max
            except:
                exc_count+=1
                max= 1
#                 print('This is mode ',max)
                most_freq[j,0]=max
        else:
            ma=[]
            pass


In [41]:
from scipy import stats
stats.mode(most_freq)

ModeResult(mode=array([[ 1.]]), count=array([[3550]]))

In [42]:
most_freq

array([[ 1.],
       [ 1.],
       [ 1.],
       ..., 
       [ 1.],
       [ 1.],
       [ 1.]])

In [37]:
from scipy import stats
stats.mode(new[:,1])

ModeResult(mode=array([ 1.]), count=array([141095]))

In [15]:
from sklearn import preprocessing, cross_validation, svm, tree
from sklearn import linear_model 
from sklearn.metrics import recall_score

logist=linear_model.LogisticRegression(C=1e10)
logist.fit(X_tr,y_tr)
accuracy=logist.score(X_tr,y_tr)
rc_tr=recall_score(y_tr, logist.predict(X_tr), labels=None, pos_label=0, average='binary', sample_weight=None)
rc_dv=recall_score(y_dev, logist.predict(X_dev), labels=None, pos_label=0, average='binary', sample_weight=None)

print(accuracy,rc_tr,rc_dv)

  y = column_or_1d(y, warn=True)


0.582055068754 0.0162495477498 0.0132263988812


In [18]:
pred=logist.predict(X_dev)

In [24]:
pred.shape

(141187,)

In [29]:
from statistics import mode
max=[]
max40=[]
for i in range(141188):
    for j in range(3550):
        ma=[]
        if k_dev[i]==j:

            ma.append(pred[i])
#             print(ma)
        max[j]=mode(ma)
        max40[i]=mode(ma)
            

IndexError: list assignment index out of range

In [None]:
ma

In [115]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(hidden_layer_sizes=10,activation='relu', solver='sgd',learning_rate_init=0.01,max_iter=500)

mlp.fit(X_tr, y_tr)
rc_tr=recall_score(y_tr, mlp.predict(X_tr), labels=None, pos_label=0, average='binary', sample_weight=None)
rc_dv=recall_score(y_dev, mlp.predict(X_dev), labels=None, pos_label=0, average='binary', sample_weight=None)

print (mlp.score(X_dev,y_dev),mlp.score(X_tr,y_tr),rc_tr,rc_dv)

0.5796284360458116

In [116]:
from sklearn.metrics import recall_score
recall_score(y_dev, logist.predict(X_dev), labels=None, pos_label=0, average='binary', sample_weight=None)


0.0

In [130]:
from sklearn.metrics import recall_score


0.013226398881231992

In [13]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_tr, logist.predict(X_tr), labels=None)

array([[    0, 63571],
       [    0, 88784]])

In [16]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_dev, logist.predict(X_dev), labels=None)

array([[  785, 58566],
       [ 1321, 80515]])

In [5]:
p

array([[-0.05847168, -0.06530762, -0.13421631, ..., -0.37460327,
        -0.3868103 , -0.42007446],
       [-0.49069214, -0.49987793, -0.47485352, ..., -0.04022217,
         0.12069702,  0.27218628],
       [ 0.37051392,  0.3883667 ,  0.33963013, ..., -0.5223999 ,
        -0.50454712, -0.45581055],
       ..., 
       [ 0.10220337,  0.12600708,  0.23419189, ..., -0.61553955,
        -0.44717407, -0.49819946],
       [-0.35906982,  0.10476685,  0.16650391, ..., -0.28457642,
        -0.05215454,  0.10620117],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]], dtype=float32)

In [8]:
def _int_feauture(value):
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def _bytes_feauture(value):
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def serialize_sample(writer, sample_data, root_dir, upsample=False):
  classes = [label for _, label in sample_data]
  class_ids = set(classes)
  num_samples_per_class = {class_name: sum(x == class_name for x in classes) for class_name in class_ids}
  print(num_samples_per_class)

  if upsample:
    max_samples = np.max(list(num_samples_per_class.values()))
    augmented_data = []

    for class_name, n_samples in num_samples_per_class.items():
        n_samples_to_add = max_samples - n_samples

        while n_samples_to_add > 0:
            for sample, label in sample_data:
                if n_samples_to_add <= 0:
                    break

                if label == class_name:
                    augmented_data.append((sample, label))
                    n_samples_to_add -= 1

    print('Augmented the dataset with {} samples'.format(len(augmented_data)))
    sample_data += augmented_data

    import random
    random.shuffle(sample_data)

  for i, (wav_file, label) in enumerate(sample_data):

    audio = read_wave(root_dir / wav_file)
    example = tf.train.Example(features=tf.train.Features(feature={
                'label': _int_feauture(label),
                'raw_audio': _bytes_feauture(audio.astype(np.float32).tobytes()),
            }))

    writer.write(example.SerializeToString())
    del audio, label

In [9]:
def main(data_folder, labels_file, tfrecords_folder):

  root_dir = Path(data_folder)
  labels = get_labels(labels_file)
  for portion in ['train', 'devel']:
    print('Creating tfrecords for [{}].'.format(portion))
    if not Path(tfrecords_folder).exists():
        Path(tfrecords_folder).mkdir()

    writer = tf.python_io.TFRecordWriter(
        (Path(tfrecords_folder) / '{}.tfrecords'.format(portion)
    ).as_posix())
    
    serialize_sample(writer, labels[portion], root_dir, upsample='train' in portion)
    writer.close()


In [12]:
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('wave_folder', 'wav/', 'The folder that contains the wav files.')
tf.app.flags.DEFINE_string('arff_path', 'ComPaRe*arff', 'The glob for all the arff files of the datset.')
tf.app.flags.DEFINE_string('tf_folder', 'tf_records', 'The folder to write the tf records.')

if __name__ == '__main__':
  main(FLAGS.wave_folder, FLAGS.arff_path, FLAGS.tf_folder)


ArgumentError: argument --wave_folder: conflicting option string: --wave_folder