In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('train_data.csv')
df.head()
df.dtypes

path             object
transcription    object
action           object
object           object
location         object
dtype: object

In [3]:
train_data_text = df['transcription']
train_data_text.head()

0                    Turn on the kitchen lights
1                       Turn up the temperature
2    OK now switch the main language to Chinese
3            Turn down the bathroom temperature
4                           Change the language
Name: transcription, dtype: object

In [4]:
train_data_labels = df[df.columns[[2,3,4]]]
train_data_labels.head()

Unnamed: 0,action,object,location
0,activate,lights,kitchen
1,increase,heat,none
2,change language,Chinese,none
3,decrease,heat,washroom
4,change language,none,none


In [5]:
train_labels_list = train_data_labels.values
train_labels_list

array([['activate', 'lights', 'kitchen'],
       ['increase', 'heat', 'none'],
       ['change language', 'Chinese', 'none'],
       ...,
       ['bring', 'shoes', 'none'],
       ['decrease', 'volume', 'none'],
       ['deactivate', 'lights', 'kitchen']], dtype=object)

In [6]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [7]:
num_words = 10000
oov_token = '<OOV>'
pad_type = 'post'
trunc_type = 'post'

In [8]:
tokenizer = Tokenizer(num_words=num_words, oov_token=oov_token)
tokenizer.fit_on_texts(train_data_text)
word_index = tokenizer.word_index
print(word_index)

{'<OOV>': 1, 'the': 2, 'turn': 3, 'in': 4, 'lights': 5, 'on': 6, 'up': 7, 'down': 8, 'heat': 9, 'temperature': 10, 'switch': 11, 'kitchen': 12, 'off': 13, 'bedroom': 14, 'language': 15, 'volume': 16, 'washroom': 17, 'bathroom': 18, 'my': 19, 'to': 20, 'increase': 21, 'decrease': 22, 'heating': 23, 'music': 24, 'bring': 25, 'me': 26, 'set': 27, 'too': 28, 'lamp': 29, 'i': 30, 'get': 31, 'it': 32, 'sound': 33, 'need': 34, 'make': 35, 'newspaper': 36, 'shoes': 37, 'socks': 38, 'loud': 39, 'louder': 40, 'juice': 41, 'chinese': 42, 'some': 43, 'english': 44, 'go': 45, 'practice': 46, 'could': 47, 'you': 48, "phone's": 49, 'ok': 50, 'now': 51, 'main': 52, 'korean': 53, 'german': 54, 'change': 55, 'stop': 56, 'fetch': 57, 'play': 58, 'please': 59, 'quiet': 60, 'hear': 61, 'pause': 62, 'it’s': 63, 'audio': 64, 'quieter': 65, 'resume': 66, 'far': 67, 'lower': 68, 'a': 69, 'different': 70, 'this': 71, 'settings': 72, 'start': 73, 'reduce': 74, 'more': 75, 'put': 76, 'max': 77, 'phone': 78, 'less

In [9]:
train_data = tokenizer.texts_to_sequences(train_data_text)
train_data = pad_sequences(train_data, padding='post')
np.shape(train_data)
#train_data

(11566, 10)

In [10]:
def lower_(s):
    return [s[0].lower(), s[1].lower(), s[2].lower()]
result = list(map(lower_, train_labels_list))

#print(result)

train_label = tokenizer.texts_to_sequences(result)
np.shape(train_label)
#train_label

(11566, 3)

In [11]:
#train_action = df['action']
#train_object = df['object']
#train_location = df['location']
#tokenizer.fit_on_texts(train_action)
#tokenizer.fit_on_texts(train_object)
#tokenizer.fit_on_texts(train_location)
#train_action = tokenizer.texts_to_sequences(train_action)
#train_object = tokenizer.texts_to_sequences(train_object)
#train_location = tokenizer.texts_to_sequences(train_location)

#train_action = pad_sequences(train_action, padding='post')
#train_object = pad_sequences(train_object, padding='post')
#train_location = pad_sequences(train_location, padding='post')
#train_location.shape

In [12]:
#train_labels = concatenate(train_action , train_object , train_location)
#train_labels

In [13]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.optimizers import Adam
from sklearn import metrics

In [35]:
model = Sequential()
model.add(Dense(100, input_shape=(10,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(200))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(100))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(3))
#model.add(Activation('softmax'))

In [36]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_12 (Dense)             (None, 100)               1100      
_________________________________________________________________
activation_11 (Activation)   (None, 100)               0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 200)               20200     
_________________________________________________________________
activation_12 (Activation)   (None, 200)               0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 200)               0         
_________________________________________________________________
dense_14 (Dense)             (None, 100)              

In [37]:
model.compile(loss='categorical_crossentropy', metrics='accuracy', optimizer='adam')

In [38]:
df_val = pd.read_csv('valid_data.csv')

valid_data = tokenizer.texts_to_sequences(df_val['transcription'])
valid_data = pad_sequences(valid_data, padding='post')
print(np.shape(valid_data))

valid_labels = df_val[df_val.columns[[2,3,4]]]
valid_labels = valid_labels.values
#valid_labels
def lower_(s):
    return [s[0].lower(), s[1].lower(), s[2].lower()]
val_label = list(map(lower_, valid_labels))

#print(val_label)

val_label = tokenizer.texts_to_sequences(val_label)
print(np.shape(val_label))

(3118, 10)
(3118, 3)


In [39]:
X_train = train_data
y_train = np.array(train_label)
X_val = valid_data
y_val = np.array(val_label)
num_batch_size = 32
num_epochs = 50

model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_val, y_val))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7f77643fb760>

In [40]:
print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)

(11566, 10)
(11566, 3)
(3118, 10)
(3118, 3)


In [41]:
prediction = model.predict(X_val)
print(prediction)

[[ 51.213375   38.010662    3.4935455]
 [118.88107    91.396       6.738011 ]
 [134.32956   379.22208    15.33612  ]
 ...
 [ 26.20904    41.51264     2.7789607]
 [119.26259    93.092964    6.8237934]
 [ 35.01943    45.75925     3.2574496]]
