In [50]:
import tensorflow as tf
import numpy as np
import wave

from pathlib import Path

__signal_framerate = 16000

def get_labels(label_path):
  """Parses the data arff files to extract the labels 

  Args:
      label_path: A path glob which contains the arff files with the labels.
  Returns:
      A dictionary for the labels of each fold.
  """
  labels = {}
  class_names = None
  label_path = Path(label_path)
  print('Extracting labels from {}'.format(label_path))

  for path in label_path.parent.glob(label_path.name):
      portion = path.suffixes[-2][1:]
      print('Processing {}'.format(path))
  
      with open(str(path)) as f:
          gts = [np.array(l.strip().split(','))[[0, -1]] for l in f.readlines() if l[0] != '@' and 'wav' in l]
  
      if class_names is None:
          class_names = np.unique([g for _, g in gts])
  
      for name, class_name in gts:
  
          # No labels exist for this dataset.
          if '?' in class_name:
              print('No labels exist for the {} portion'.format(portion))
              break
  
          class_id = np.where(class_name == class_names)[0][0]
          labels.setdefault(portion, []).append((name.replace("'", ""), int(class_id)))

  return labels

def read_wave(path):
  """Reads a wav file and splits it in chunks of 40ms. 
  Pads with zeros if duration does not fit exactly the 40ms chunks.
  Assumptions: 
      A. Wave file has one channel.
      B. Frame rate of wav file is 16KHz.
  
  Args:
      wav_file: The name of the wav file.
  Returns:
      A data array, where each row corresponds to 40ms.
  """

  fp = wave.open(str(path))
  num_of_channels = fp.getnchannels()
  fps = fp.getframerate()
    
  if num_of_channels > 1:
    raise ValueError('The wav file should have 1 channel. [{}] found'.format(num_of_channels))

  if fps != __signal_framerate:
    raise ValueError('The wav file should have 16000 fps. [{}] found'.format(fps))

  chunk_size = 640 # 40ms if fps = 16k.

  num_frames = fp.getnframes()
  dstr = fp.readframes(num_frames * num_of_channels)
  data = np.fromstring(dstr, np.int16)
  audio = np.reshape(data, (-1))
  audio = audio / 2.**15 # Normalise audio data (int16).

  audio = np.pad(audio, (0, chunk_size - audio.shape[0] % chunk_size), 'constant')
  audio = audio.reshape(-1, chunk_size)

  return audio.astype(np.float32)


In [129]:
from scipy import stats
import scipy.stats


l=[]
l=get_labels('E:576/Project/ComParE2017_Addressee.ComParE.train.arff')

myarray = np.asarray(l['train'])
y_tr=myarray[:,1]
X_tr=np.zeros((3742,2560))
for j in range(3742):
    
    if j<9:
        p=read_wave('E:576/Project/wav/train_000'+str(j+1)+'.wav')
    elif 9<=j<99:
        p=read_wave('E:576/Project/wav/train_00'+str(j+1)+'.wav')
    elif 99<=j<999:
        p=read_wave('E:576/Project/wav/train_0'+str(j+1)+'.wav')
    else:
        p=read_wave('E:576/Project/wav/train_'+str(j+1)+'.wav')

    mean=np.mean(p,axis=0)
    min=np.min(p,axis=0)
    max=np.max(p,axis=0)
    std=np.std(p,axis=0)
#     median=np.median(p,axis=0)
#     mode = scipy.stats.mode(p,axis=0)
#     skew = scipy.stats.skew(p,axis=0)
#     kurt = scipy.stats.kurtosis(p,axis=0)
    
    X_tr[j,:]=np.hstack([mean,min,max,std])
    


Extracting labels from E:576\Project\ComParE2017_Addressee.ComParE.train.arff
Processing E:576\Project\compare2017_addressee.compare.train.arff


In [56]:
print(X_tr)

[[-0.01558876 -0.01755778 -0.01601855 ...,  0.00505066 -0.00247192
  -0.00349426]
 [-0.08086924 -0.05783785 -0.04368591 ..., -0.02711487 -0.0088501
   0.00828552]
 [-0.00583942  0.00012442  0.00823505 ...,  0.00350952  0.0057373
   0.00205994]
 ..., 
 [ 0.00826372  0.01928493  0.01197379 ...,  0.00033569  0.00053406
   0.00657654]
 [-0.00964745 -0.01740995 -0.02486988 ...,  0.01223755  0.01306152
   0.00973511]
 [-0.02059819 -0.02457252 -0.0243589  ..., -0.01261902  0.00175476
   0.00041199]]


In [130]:
# X_tr=np.array()
# y_tr=np.array()
l=[]
l=get_labels('E:576/Project/ComParE2017_Addressee.ComParE.devel.arff')

myarray = np.asarray(l['devel'])

y_dev=myarray[:,1]
X_dev=np.zeros((3550,2560))

for j in range(3550):
    
    if j<9:
        p=read_wave('E:576/Project/wav/devel_000'+str(j+1)+'.wav')
    elif 9<=j<99:
        p=read_wave('E:576/Project/wav/devel_00'+str(j+1)+'.wav')
    elif 99<=j<999:
        p=read_wave('E:576/Project/wav/devel_0'+str(j+1)+'.wav')
    else:
        p=read_wave('E:576/Project/wav/devel_'+str(j+1)+'.wav')

    mean=np.mean(p,axis=0)
    min=np.min(p,axis=0)
    max=np.max(p,axis=0)
    std=np.std(p,axis=0)
#     median=np.median(p,axis=0)
#     skew = scipy.stats.skew(p,axis=0)
#     kurt = scipy.stats.kurtosis(p,axis=0)
    
    X_dev[j,:]=np.hstack([mean,min,max,std])
     

Extracting labels from E:576\Project\ComParE2017_Addressee.ComParE.devel.arff
Processing E:576\Project\compare2017_addressee.compare.devel.arff


In [136]:
from sklearn.metrics import confusion_matrix, recall_score
from sklearn import preprocessing, cross_validation, svm, tree
from sklearn import linear_model 

logist=linear_model.LogisticRegression(C=5000,penalty='l2')
logist.fit(X_tr,y_tr)

tr_acc=logist.score(X_tr,y_tr)
tr_conf=confusion_matrix(y_tr, logist.predict(X_tr), labels=None)
tr_rc=recall_score(y_tr, logist.predict(X_tr), labels=None, pos_label='0', average='binary', sample_weight=None)
dv_acc=logist.score(X_dev,y_dev)
dv_conf=confusion_matrix(y_dev, logist.predict(X_dev), labels=None)
dv_rc=recall_score(y_dev, logist.predict(X_dev), labels=None, pos_label='0', average='binary', sample_weight=None)

print(tr_acc,dv_acc,tr_rc,dv_rc,dv_conf)

0.96098343132 0.51661971831 0.934027777778 0.432748538012 [[ 592  776]
 [ 940 1242]]


In [150]:
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes=5,activation='relu', solver='sgd',learning_rate_init=0.00001,max_iter=100)
mlp.fit(X_tr, y_tr)
tr_acc=mlp.score(X_tr,y_tr)
tr_conf=confusion_matrix(y_tr, mlp.predict(X_tr), labels=None)
tr_rc=recall_score(y_tr, mlp.predict(X_tr), labels=None, pos_label='0', average='binary', sample_weight=None)
dv_acc=mlp.score(X_dev,y_dev)
dv_conf=confusion_matrix(y_dev, mlp.predict(X_dev), labels=None)
dv_rc=recall_score(y_dev, mlp.predict(X_dev), labels=None, pos_label='0', average='binary', sample_weight=None)

print(tr_acc,dv_acc,tr_rc,dv_rc,dv_conf)

0.615179048637 0.614929577465 0.0 0.000730994152047 [[   1 1367]
 [   0 2182]]


In [169]:
from sklearn.decomposition import PCA as sklearnPCA

sklearn_pca = sklearnPCA(n_components=250)
PCA_TR_40 = sklearn_pca.fit_transform(X_tr)

print (sklearn_pca.explained_variance_ratio_.cumsum())

# PCA_DV_40 = sklearn_pca.transform(X_dev)



[ 0.37606223  0.39514557  0.40356222  0.41010311  0.41624475  0.42220798
  0.42809812  0.43384675  0.43949778  0.44509353  0.45058416  0.45602329
  0.46132475  0.46660445  0.47183492  0.47698783  0.48204645  0.48706348
  0.49199145  0.49685072  0.5016504   0.50637213  0.51101113  0.51560378
  0.52013917  0.5246381   0.52911458  0.5335504   0.53790803  0.5422055
  0.54648712  0.55072264  0.5549352   0.55911666  0.56320464  0.56723204
  0.57125438  0.57520392  0.57902015  0.58280187  0.58650448  0.59017664
  0.59379396  0.5973343   0.60080827  0.60424549  0.60761549  0.61089754
  0.61409802  0.61726363  0.62038059  0.62335038  0.6262805   0.62916209
  0.6319645   0.63468972  0.63736906  0.64003838  0.64265355  0.64523044
  0.6477483   0.65021719  0.65263092  0.65502787  0.65736146  0.65968639
  0.66197338  0.66425488  0.66652314  0.66873949  0.67093679  0.6731009
  0.67523565  0.67735871  0.67946976  0.68154556  0.68360397  0.68561727
  0.68760762  0.68958621  0.69156211  0.69351871  0.6

In [None]:
PCA_DV_40 = sklearn_pca.transform(X_dev)

from sklearn.metrics import confusion_matrix, recall_score
from sklearn import preprocessing, cross_validation, svm, tree
from sklearn import linear_model 

logist=linear_model.LogisticRegression(C=0.0000001,penalty='l2')
logist.fit(PCA_TR_40,y_tr)

tr_acc=logist.score(PCA_TR_40,y_tr)
tr_conf=confusion_matrix(y_tr, logist.predict(PCA_TR_40), labels=None)
tr_rc=recall_score(y_tr, logist.predict(PCA_TR_40), labels=None, pos_label='0', average='binary', sample_weight=None)
dv_acc=logist.score(PCA_DV_40,y_dev)
dv_conf=confusion_matrix(y_dev, logist.predict(PCA_DV_40), labels=None)
dv_rc=recall_score(y_dev, logist.predict(PCA_DV_40), labels=None, pos_label='0', average='binary', sample_weight=None)

print(tr_acc,dv_acc,tr_rc,dv_rc,dv_conf)

In [170]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
#import matplotlib.pyplot as plt
#from matplotlib import style
from sklearn import preprocessing, cross_validation, svm, tree
from sklearn import linear_model 

devel=pd.read_csv('ComParE2017_devel.csv')
train=pd.read_csv('ComParE2017_train.csv')

# train.head() 

X_tr1 = np.array(train.drop(['y'], 1))
X_dev1 = np.array(devel.drop(['y'], 1))
X_tr1 = preprocessing.scale(X_tr1)
X_dev1 = preprocessing.scale(X_dev1)
#df.fillna(value=-99999, inplace=True)
#df.dropna(inplace=True)
y_tr = np.array(train['y'])
y_dev = np.array(devel['y'])

X_tr=np.concatenate((X_tr,X_tr1),axis=1)
X_dev=np.concatenate((X_dev,X_dev1),axis=1)



In [115]:
X_tr.shape

(3742, 8933)

In [171]:
from sklearn.metrics import confusion_matrix, recall_score
from sklearn import preprocessing, cross_validation, svm, tree
from sklearn import linear_model 

logist=linear_model.LogisticRegression(C=0.01,penalty='l2')
logist.fit(X_tr,y_tr)

tr_acc=logist.score(X_tr,y_tr)
tr_conf=confusion_matrix(y_tr, logist.predict(X_tr), labels=None)
tr_rc=recall_score(y_tr, logist.predict(X_tr), labels=None, pos_label=0, average='binary', sample_weight=None)
dv_acc=logist.score(X_dev,y_dev)
dv_conf=confusion_matrix(y_dev, logist.predict(X_dev), labels=None)
dv_rc=recall_score(y_dev, logist.predict(X_dev), labels=None, pos_label=0, average='binary', sample_weight=None)

print(tr_acc,dv_acc,tr_rc,dv_rc,dv_conf)

0.99251737039 0.612676056338 0.985416666667 0.470760233918 [[ 644  724]
 [ 651 1531]]


In [37]:
# import numpy as np
# from sklearn.decomposition import PCA

# np.random.seed(0)
# my_matrix = np.random.randn(20, 5)
# my_model = PCA(n_components=5)
# my_model.fit_transform(my_matrix)

# print (my_model.explained_variance_)
# print (my_model.explained_variance_ratio_)
# print (my_model.explained_variance_ratio_.cumsum())

[ 1.50756565  1.29374452  0.97042041  0.61712667  0.31529082]
[ 0.32047581  0.27502207  0.20629036  0.13118776  0.067024  ]
[ 0.32047581  0.59549787  0.80178824  0.932976    1.        ]


In [173]:
from sklearn.decomposition import PCA as sklearnPCA

sklearn_pca = sklearnPCA(n_components=1000)
PCA_TR = sklearn_pca.fit_transform(X_tr)

print (sklearn_pca.explained_variance_ratio_.cumsum())

PCA_DV = sklearn_pca.transform(X_dev)

[ 0.13268263  0.18736127  0.22517592  0.25737057  0.28007517  0.2972238
  0.31426131  0.3298185   0.34238481  0.35327043  0.3633248   0.37278586
  0.38103061  0.38899946  0.39681809  0.40392772  0.41037881  0.4165963
  0.42269369  0.42830324  0.43369018  0.43889588  0.4437743   0.44842054
  0.45292674  0.45702845  0.46094024  0.46472355  0.4683925   0.47194674
  0.47539649  0.47877467  0.48198653  0.48508039  0.48809872  0.49104356
  0.4939167   0.49664287  0.49934629  0.50203778  0.504699    0.50726948
  0.50981583  0.5122699   0.51469925  0.51706148  0.51937823  0.52164562
  0.52385304  0.52604175  0.5281638   0.53024857  0.53232778  0.53436054
  0.53636627  0.53834396  0.54028061  0.54217197  0.54405261  0.54589285
  0.54770806  0.54951344  0.55127836  0.55301599  0.55472556  0.55643072
  0.55811855  0.55978384  0.56143264  0.56306642  0.56468209  0.56628781
  0.5678704   0.56941984  0.57095877  0.57248077  0.57399143  0.57549656
  0.57697253  0.57844375  0.57989773  0.58133893  0.5

In [204]:
from sklearn.metrics import confusion_matrix, recall_score
from sklearn import preprocessing, cross_validation, svm, tree
from sklearn import linear_model 

logist=linear_model.LogisticRegression(C=0.001,penalty='l2')
logist.fit(PCA_TR,y_tr)

tr_acc=logist.score(PCA_TR,y_tr)
tr_conf=confusion_matrix(y_tr, logist.predict(PCA_TR), labels=None)
tr_rc=recall_score(y_tr, logist.predict(PCA_TR), labels=None, pos_label=0, average='binary', sample_weight=None)
dv_acc=logist.score(PCA_DV,y_dev)
dv_conf=confusion_matrix(y_dev, logist.predict(PCA_DV), labels=None)
dv_rc=recall_score(y_dev, logist.predict(PCA_DV), labels=None, pos_label=0, average='binary', sample_weight=None)

print(tr_acc,dv_acc,tr_rc,dv_rc,dv_conf)

0.854623196152 0.608169014085 0.834722222222 0.554093567251 [[ 758  610]
 [ 781 1401]]


In [211]:
#MLP

from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes=10,activation='relu', solver='sgd',learning_rate_init=0.01,max_iter=100)
mlp.fit(PCA_TR,y_tr)
tr_acc=mlp.score(PCA_TR,y_tr)
tr_conf=confusion_matrix(y_tr, mlp.predict(PCA_TR), labels=None)
tr_rc=recall_score(y_tr, mlp.predict(PCA_TR), labels=None, pos_label=0, average='binary', sample_weight=None)
dv_acc=mlp.score(PCA_DV,y_dev)
dv_conf=confusion_matrix(y_dev, mlp.predict(PCA_DV), labels=None)
dv_rc=recall_score(y_dev, mlp.predict(PCA_DV), labels=None, pos_label=0, average='binary', sample_weight=None)

print(tr_acc,dv_acc,tr_rc,dv_rc,dv_conf)


0.999732763228 0.62338028169 1.0 0.46783625731 [[ 640  728]
 [ 609 1573]]


In [212]:

from sklearn.ensemble import VotingClassifier
from sklearn.cross_validation import cross_val_score

eclf1 = VotingClassifier(estimators=[('lg', logist), ('ml', mlp)],  voting='hard')
# eclf2 = VotingClassifier(estimators=[('lg', logist), ('ml', mlp),('sv',svm)],  voting='hard')
# eclf3 = VotingClassifier(estimators=[('lg', logist), ('ml', mlp),('gb',gb)],  voting='hard')
# eclf4 = VotingClassifier(estimators=[('lg', logist), ('ml', mlp),('sv',svm),('gb',gb)],  voting='hard')

eclf1.fit(PCA_TR, y_tr)
# eclf2.fit(X_tr, y_tr)
# eclf3.fit(X_tr, y_tr)
# eclf4.fit(X_tr, y_tr)

VotingClassifier(estimators=[('lg', LogisticRegression(C=0.001, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)), ('ml', MLPCl...e=True, solver='sgd', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False))],
         n_jobs=1, voting='hard', weights=None)

In [213]:
lg=recall_score(y_dev, logist.predict(PCA_DV), labels=None, pos_label=0, average='binary', sample_weight=None)
ml=recall_score(y_dev, mlp.predict(PCA_DV), labels=None, pos_label=0, average='binary', sample_weight=None)
# gb=recall_score(y_dev, gb.predict(X_dev), labels=None, pos_label=0, average='binary', sample_weight=None)
# sv=recall_score(y_dev, svm.predict(X_dev), labels=None, pos_label=0, average='binary', sample_weight=None)

e1=recall_score(y_dev, eclf1.predict(PCA_DV), labels=None, pos_label=0, average='binary', sample_weight=None)
dv_acc1=logist.score(PCA_DV,y_dev)
dv_acc2=mlp.score(PCA_DV,y_dev)
dv_acc3=eclf1.score(PCA_DV,y_dev)



print(lg,ml,e1,dv_acc1,dv_acc2,dv_acc3)

0.554093567251 0.46783625731 0.619883040936 0.608169014085 0.62338028169 0.593521126761


In [197]:
confusion_matrix(y_dev, eclf1.predict(PCA_DV), labels=None)


array([[ 969,  399],
       [1266,  916]])

In [198]:
confusion_matrix(y_dev, logist.predict(PCA_DV), labels=None)


array([[ 827,  541],
       [ 874, 1308]])

In [88]:
# X_tr=np.array()
# y_tr=np.array()
l=[]
l=get_labels('E:576/Project/ComParE2017_Addressee.ComParE.devel.arff')

myarray = np.asarray(l['devel'])

q=myarray[:,1]

xs=[]
ys=[]

for j in range(3550):
    
    if j<9:
        p=read_wave('E:576/Project/wav/devel_000'+str(j+1)+'.wav')
    elif 9<=j<99:
        p=read_wave('E:576/Project/wav/devel_00'+str(j+1)+'.wav')
    elif 99<=j<999:
        p=read_wave('E:576/Project/wav/devel_0'+str(j+1)+'.wav')
    else:
        p=read_wave('E:576/Project/wav/devel_'+str(j+1)+'.wav')

    
    xs.append(p)
    X_dev = np.concatenate(xs)

    
    y=myarray[j,1]
    a=p.shape[0]
    q2=np.zeros((a,1))
    for i in range(a):
        
        q2[i,0]=y
    
    ys.append(q2)
    y_dev = np.concatenate(ys)
    print(y_dev.shape,j)
  
     

Extracting labels from E:576\Project\ComParE2017_Addressee.ComParE.devel.arff
Processing E:576\Project\compare2017_addressee.compare.devel.arff
(27, 1) 0
(61, 1) 1
(149, 1) 2
(165, 1) 3
(220, 1) 4
(246, 1) 5
(290, 1) 6
(324, 1) 7
(373, 1) 8
(456, 1) 9
(550, 1) 10
(576, 1) 11
(606, 1) 12
(643, 1) 13
(672, 1) 14
(727, 1) 15
(816, 1) 16
(842, 1) 17
(868, 1) 18
(922, 1) 19
(1024, 1) 20
(1065, 1) 21
(1087, 1) 22
(1161, 1) 23
(1187, 1) 24
(1219, 1) 25
(1245, 1) 26
(1280, 1) 27
(1306, 1) 28
(1332, 1) 29
(1371, 1) 30
(1411, 1) 31
(1483, 1) 32
(1528, 1) 33
(1554, 1) 34
(1580, 1) 35
(1597, 1) 36
(1672, 1) 37
(1742, 1) 38
(1758, 1) 39
(1784, 1) 40
(1805, 1) 41
(1836, 1) 42
(1862, 1) 43
(1888, 1) 44
(1921, 1) 45
(1959, 1) 46
(2000, 1) 47
(2026, 1) 48
(2051, 1) 49
(2083, 1) 50
(2129, 1) 51
(2159, 1) 52
(2201, 1) 53
(2226, 1) 54
(2253, 1) 55
(2279, 1) 56
(2305, 1) 57
(2342, 1) 58
(2388, 1) 59
(2452, 1) 60
(2469, 1) 61
(2495, 1) 62
(2673, 1) 63
(2710, 1) 64
(2736, 1) 65
(2752, 1) 66
(2778, 1) 67
(280

In [101]:
from sklearn import preprocessing, cross_validation, svm, tree
from sklearn import linear_model 
logist=linear_model.LogisticRegression(C=0.001)
logist.fit(X_tr,y_tr)
accuracy=logist.score(X_tr,y_tr)
print(accuracy)

  y = column_or_1d(y, warn=True)


0.984890551672


In [102]:
logist.score(X_dev,y_dev)

0.98454531932826672

In [93]:
from sklearn.metrics import recall_score
recall_score(y_tr, logist.predict(X_tr), labels=None, pos_label=0, average='binary', sample_weight=None)

1.0

In [92]:
from sklearn.metrics import recall_score
recall_score(y_dev, logist.predict(X_dev), labels=None, pos_label=0, average='binary', sample_weight=None)

1.0

In [103]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_tr, logist.predict(X_tr), labels=None)

array([[150053,      0],
       [  2302,      0]])

In [104]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_dev, logist.predict(X_dev), labels=None)

array([[139005,      0],
       [  2182,      0]])

In [5]:
p

array([[-0.05847168, -0.06530762, -0.13421631, ..., -0.37460327,
        -0.3868103 , -0.42007446],
       [-0.49069214, -0.49987793, -0.47485352, ..., -0.04022217,
         0.12069702,  0.27218628],
       [ 0.37051392,  0.3883667 ,  0.33963013, ..., -0.5223999 ,
        -0.50454712, -0.45581055],
       ..., 
       [ 0.10220337,  0.12600708,  0.23419189, ..., -0.61553955,
        -0.44717407, -0.49819946],
       [-0.35906982,  0.10476685,  0.16650391, ..., -0.28457642,
        -0.05215454,  0.10620117],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]], dtype=float32)

In [8]:
def _int_feauture(value):
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def _bytes_feauture(value):
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def serialize_sample(writer, sample_data, root_dir, upsample=False):
  classes = [label for _, label in sample_data]
  class_ids = set(classes)
  num_samples_per_class = {class_name: sum(x == class_name for x in classes) for class_name in class_ids}
  print(num_samples_per_class)

  if upsample:
    max_samples = np.max(list(num_samples_per_class.values()))
    augmented_data = []

    for class_name, n_samples in num_samples_per_class.items():
        n_samples_to_add = max_samples - n_samples

        while n_samples_to_add > 0:
            for sample, label in sample_data:
                if n_samples_to_add <= 0:
                    break

                if label == class_name:
                    augmented_data.append((sample, label))
                    n_samples_to_add -= 1

    print('Augmented the dataset with {} samples'.format(len(augmented_data)))
    sample_data += augmented_data

    import random
    random.shuffle(sample_data)

  for i, (wav_file, label) in enumerate(sample_data):

    audio = read_wave(root_dir / wav_file)
    example = tf.train.Example(features=tf.train.Features(feature={
                'label': _int_feauture(label),
                'raw_audio': _bytes_feauture(audio.astype(np.float32).tobytes()),
            }))

    writer.write(example.SerializeToString())
    del audio, label

In [9]:
def main(data_folder, labels_file, tfrecords_folder):

  root_dir = Path(data_folder)
  labels = get_labels(labels_file)
  for portion in ['train', 'devel']:
    print('Creating tfrecords for [{}].'.format(portion))
    if not Path(tfrecords_folder).exists():
        Path(tfrecords_folder).mkdir()

    writer = tf.python_io.TFRecordWriter(
        (Path(tfrecords_folder) / '{}.tfrecords'.format(portion)
    ).as_posix())
    
    serialize_sample(writer, labels[portion], root_dir, upsample='train' in portion)
    writer.close()


In [12]:
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('wave_folder', 'wav/', 'The folder that contains the wav files.')
tf.app.flags.DEFINE_string('arff_path', 'ComPaRe*arff', 'The glob for all the arff files of the datset.')
tf.app.flags.DEFINE_string('tf_folder', 'tf_records', 'The folder to write the tf records.')

if __name__ == '__main__':
  main(FLAGS.wave_folder, FLAGS.arff_path, FLAGS.tf_folder)


ArgumentError: argument --wave_folder: conflicting option string: --wave_folder