In [95]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import librosa
import os
import csv
# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import np_utils

import tensorflow as tf
import keras
from tensorflow_core.keras import Model
from tensorflow_core.python.keras.layers import Input, Dense, GRU, LSTM, Dropout, Bidirectional
from tensorflow_core.keras import optimizers
from tensorflow_core.keras.models import load_model

In [96]:
data = pd.read_csv('NEWts5lang.csv')
data.head()

Unnamed: 0,filename,seq1,seq2,seq3,seq4,seq5,seq6,seq7,seq8,seq9,...,seq42,seq43,seq44,seq45,seq46,seq47,seq48,seq49,seq50,label
0,common_voice_tr_17341269.mp3,-28.671259,-28.671259,-28.671259,-28.594355,-23.302088,-21.683058,-22.529316,-20.512342,-19.944502,...,-11.691144,-10.529799,-11.387162,-11.123571,-10.124403,-9.600918,-10.687082,-11.519309,-12.568885,Turkish
1,common_voice_tr_17341269.mp3,-13.647232,-11.415384,-10.403269,-10.631279,-11.740904,-13.062352,-10.704817,-3.052637,-0.423507,...,-14.3075,-13.495781,-10.97455,-8.11975,-8.459702,-8.305902,-7.290689,-6.522213,-6.830919,Turkish
2,common_voice_tr_17341269.mp3,-6.729401,-6.814654,-6.663826,-6.449201,-6.960073,-7.248553,-7.998569,-7.281999,-6.771838,...,-6.760509,-6.277062,-5.68222,-3.713829,-4.381212,-5.394255,-4.812826,-5.150116,-4.575793,Turkish
3,common_voice_tr_17341269.mp3,-3.98576,-4.64868,-6.543986,-7.014935,-5.833005,-5.390105,-4.40402,-4.477667,-4.18349,...,-10.161282,-11.417397,-10.101839,-9.42199,-9.869721,-9.822425,-10.087319,-12.33375,-12.394396,Turkish
4,common_voice_tr_17341270.mp3,-18.675406,-12.77542,-10.191453,-9.995229,-9.822378,-10.787068,-11.464541,-11.791995,-12.880933,...,-8.885355,-6.351941,-5.791424,-4.474946,-3.936759,-4.92229,-4.93982,-3.950949,-3.556871,Turkish


In [97]:
# Dropping unneccesary columns
data = data.drop(['filename'],axis=1)

In [98]:
language_list = data.iloc[:, -1]
language_list

0        Turkish
1        Turkish
2        Turkish
3        Turkish
4        Turkish
          ...   
15001    Persian
15002    Persian
15003    Persian
15004    Persian
15005    Persian
Name: label, Length: 15006, dtype: object

In [99]:
encoder = LabelEncoder()
y = encoder.fit_transform(language_list)
print(y)

[4 4 4 ... 2 2 2]


In [100]:
encoder = LabelEncoder()
encoder.fit(y)
encoded_Y = encoder.transform(y)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)

In [101]:
print(dummy_y[1])

[0. 0. 0. 0. 1.]


In [102]:
scaler = StandardScaler()
X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))

In [103]:
X_train, X_test, y_train, y_test = train_test_split(X, dummy_y, test_size=0.2)

In [104]:
X_test.shape

(3002, 50)

In [105]:
X_train = X_train.reshape(12004,50,1)
X_test = X_test.reshape(3002,50,1)
y_train = y_train.reshape(12004,5)
y_test = y_test.reshape(3002,5)

In [107]:
DROPOUT = 0.3
RECURRENT_DROP_OUT = 0.2
optimizer = optimizers.Adam(decay=1e-4)
main_input = Input(shape=(50,1), name='main_input')
layer1 = Bidirectional(LSTM(100, return_sequences=True, name='layer1'))(main_input)
layer2 = Dropout(0.2)(layer1)
layer3 = Bidirectional(LSTM(50, return_sequences=False, name='layer2'))(layer2)
layer4 = Dropout(0.2)(layer3)
layer5 = Dense(100, activation='tanh', name='layer4')(layer4)
layer6 = Dropout(0.2)(layer5)
rnn_output = Dense(5, activation='softmax', name='rnn_output')(layer6)

model = Model(inputs=main_input, outputs=rnn_output)
print('\nCompiling model...')
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['acc'])
model.summary()
history = model.fit(X_train, y_train, batch_size=32, epochs=45, validation_data=(X_test, y_test), shuffle=True, verbose=1)



Compiling model...
Model: "model_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
main_input (InputLayer)      [(None, 50, 1)]           0         
_________________________________________________________________
bidirectional_2 (Bidirection (None, 50, 200)           81600     
_________________________________________________________________
dropout_32 (Dropout)         (None, 50, 200)           0         
_________________________________________________________________
bidirectional_3 (Bidirection (None, 100)               100400    
_________________________________________________________________
dropout_33 (Dropout)         (None, 100)               0         
_________________________________________________________________
layer4 (Dense)               (None, 100)               10100     
_________________________________________________________________
dropout_34 (Dropout)         (None, 10

Epoch 43/45
Epoch 44/45
Epoch 45/45


In [42]:
from keras import models
from keras import layers
model2 = models.Sequential()
model2.add(layers.LSTM(50, return_sequences=True,input_shape=(50,1)))
model2.add(layers.Dropout(0.2))
model2.add(layers.LSTM(25, return_sequences=False))
model2.add(layers.Dropout(0.2))
model2.add(layers.Dense(100, activation='relu'))
model2.add(layers.Dropout(0.2))
model2.add(layers.Dense(5, activation='softmax'))

model2.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['acc'])
model2.summary()
history = model2.fit(X_train, y_train, batch_size=32, epochs=50, validation_data=(X_test, y_test), shuffle=True, verbose=1)

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_10 (LSTM)               (None, 50, 50)            10400     
_________________________________________________________________
dropout_10 (Dropout)         (None, 50, 50)            0         
_________________________________________________________________
lstm_11 (LSTM)               (None, 25)                7600      
_________________________________________________________________
dropout_11 (Dropout)         (None, 25)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 100)               2600      
_________________________________________________________________
dropout_12 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 5)                

ValueError: Error when checking input: expected lstm_10_input to have 3 dimensions, but got array with shape (12000, 50)

In [28]:
DROPOUT = 0.3
RECURRENT_DROP_OUT = 0.2
optimizer = optimizers.Adam(decay=1e-4)
main_input = Input(shape=(64,1), name='main_input')
layer1 = LSTM(64, return_sequences=True, name='layer1')(main_input)
layer2 = Dropout(0.2)(layer1)
layer3 = LSTM(32, return_sequences=, name='layer2')(layer2)
layer4 = Dropout(0.2)(layer3)
layer5 = Dense(100, activation='tanh', name='layer4')(layer4)
layer6 = Dropout(0.2)(layer5)
rnn_output = Dense(5, activation='softmax', name='rnn_output')(layer6)

model = Model(inputs=main_input, outputs=rnn_output)
print('\nCompiling model...')
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['acc'])
model.summary()
history = model.fit(X_train, y_train, batch_size=32, epochs=45, validation_data=(X_test, y_test), shuffle=True, verbose=1)

        

TypeError: ('Keyword argument not understood:', 'shape')

In [52]:
model_json = model.to_json()
with open("model.json", "w") as json_file :
	json_file.write(model_json)

model.save_weights("model.h5")
print("Saved model to disk")

model.save('LSTM.model')

Saved model to disk


INFO:tensorflow:Assets written to: LSTM.model\assets


In [121]:
data = pd.read_csv('5langtest.csv')
data.head()
data = data.drop(['filename'],axis=1)
data.head()

Unnamed: 0,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,mfcc8,mfcc9,mfcc10,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,-326.365692,118.729942,0.666729,37.966267,8.479445,4.692266,-6.881396,-5.850728,-0.029433,-6.596133,...,0.346014,-5.936338,1.468385,-3.361923,-4.326117,-5.977447,0.464422,-7.558537,-2.305692,Turkish
1,-383.620148,112.471321,-3.116855,22.167076,-2.245688,2.442477,-3.634593,-5.498386,-3.254369,-7.458304,...,1.662149,-2.193036,-0.745241,-1.492682,-1.252945,-4.289709,-0.683882,-2.956189,-2.925009,Turkish
2,-362.079498,90.656273,-3.940087,24.174574,0.522142,5.686224,-4.541938,-3.048762,-1.666469,-5.033555,...,2.743518,-4.978319,0.326358,-0.071845,-3.650645,-4.583994,1.003159,-5.715622,-1.828743,Turkish
3,-397.155029,90.325745,3.057629,19.175505,1.570266,3.305055,-1.215333,-0.14866,-0.315445,-6.866125,...,5.052822,-2.335471,2.034746,-3.216006,-3.529693,-1.087466,0.672866,-4.361683,-1.362758,Turkish
4,-395.014313,121.667747,9.815867,23.712048,10.952191,-0.237414,-1.66051,-3.591281,-4.365257,-8.810498,...,0.54962,-5.497268,-1.179936,-1.567438,-7.719094,-3.438961,-2.345177,-7.053899,-1.770096,Turkish


In [152]:
language_list = data.iloc[:, -1]
language_list
encoder = LabelEncoder()
y_new = encoder.fit_transform(language_list)
encoder = LabelEncoder()
encoder.fit(y_new)
encoded_Y = encoder.transform(y_new)
print(y_new[380])
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y_new = np_utils.to_categorical(encoded_Y)
print(dummy_y_new)
dummy_y_new.shape

0
[[0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1.]
 ...
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]]


(1875, 5)

In [155]:
scaler = StandardScaler()
X_new = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))

In [156]:
testX = X.reshape(15000,1,20)
testY = dummy_y.reshape(15000,1,5)

In [157]:
X_new = X_new.reshape(1875,1,20)
dummy_y_new = dummy_y_new.reshape(1875,1,5)

In [158]:
loss, acc = model2.evaluate(X_new, dummy_y_new)  # returns loss and metrics
print("loss: %.2f" % loss)
print("acc: %.2f" % acc)

loss: 2.14
acc: 0.37


In [169]:
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import TimeDistributed
from keras.layers import Bidirectional

model3 = Sequential()
model3.add(Bidirectional(LSTM(20, return_sequences=True), input_shape=(20, 1)))
model3.add(TimeDistributed(Dense(5, activation='sigmoid')))
model3.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model3.summary()

Model: "sequential_32"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_4 (Bidirection (None, 20, 40)            3520      
_________________________________________________________________
time_distributed_4 (TimeDist (None, 20, 5)             205       
Total params: 3,725
Trainable params: 3,725
Non-trainable params: 0
_________________________________________________________________


In [1]:
print(history.history.keys())
plt.figure(1)
plt.plot(history.history['loss'])
plt.plot(history.history['acc'])
plt.plot(history.history['val_loss'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['loss', 'accuracy', 'val_loss', 'val_accuracy'], loc='upper left')

NameError: name 'history' is not defined