# Tensoflow NLP Model for Text Classification

## Imports

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from datetime import datetime
from sklearn.model_selection import train_test_split

## Pre-Processing

In [2]:
#load dataset
requests = pd.read_csv('../data/2021_closed_requests.csv', low_memory=False)

In [3]:
#split train and test data
(requests_train, requests_test) = train_test_split(requests, test_size = 0.2, random_state = 42)

In [4]:
#drop rows which are duplicate requests - so drop all rows with a value in the service_request_parent_id columns, only keep nulls
requests_train['service_request_parent_id'].fillna(0, inplace = True)
requests_train = requests_train[requests_train['service_request_parent_id'] == 0]
requests_train.shape

(256344, 23)

In [5]:
# drop service_request_parent_id
requests_train.drop(labels='service_request_parent_id', axis=1, inplace=True)

In [6]:
#drop SAP notification column
requests_train.drop(labels = 'sap_notification_number', axis=1, inplace=True)

In [7]:
#drop date columns
requests_train.drop(labels = ['date_requested', 'case_age_days', 'date_closed'], axis=1, inplace=True)

In [8]:
#drop case_record_type
requests_train.drop(labels = 'case_record_type', axis=1, inplace=True)

In [9]:
# change the strings that say 'nan' in service_name_detail to null values
requests_train['service_name_detail'] = np.where(requests_train['service_name_detail'] == 'nan', None, requests_train['service_name_detail'])

In [10]:
#drop status column
requests_train.drop(labels = 'status', axis=1, inplace=True)

In [11]:
#drop those with no council district listed
requests_train = requests_train.dropna(subset=['council_district'])
requests_train.shape

(253970, 16)

In [12]:
#drop all location columns
requests_train.drop(labels = ['council_district', 'lat', 'lng', 'street_address', 'zipcode'], axis=1, inplace=True)

In [13]:
# delete comm_plan_code and comm_plan_name columns
requests_train.drop(labels=['comm_plan_code', 'comm_plan_name'], axis=1, inplace=True)

In [14]:
# drop park_name column
requests_train.drop(labels=['park_name'], axis=1, inplace=True)

In [15]:
# only keep rows with the following case_origin: Mobile, Web, Phone
requests_train = requests_train[(requests_train['case_origin'] == 'Mobile') | (requests_train['case_origin'] == 'Web') | (requests_train['case_origin'] == 'Phone')]
requests_train.shape

(219488, 8)

In [16]:
# drop case origin column
requests_train.drop(labels=['case_origin'], axis=1, inplace=True)

In [17]:
# drop referred column
requests_train.drop(labels=['referred'], axis=1, inplace=True)

In [18]:
# drop floc and iamfloc columns
requests_train.drop(labels=['floc', 'iamfloc'], axis=1, inplace=True)

In [19]:
requests_train.head(2)

Unnamed: 0,service_request_id,service_name,service_name_detail,public_description
164116,3359023,Missed Collection,Missed Collection,They just didn't pick it up. The bin was plac...
215343,3420859,Missed Collection,Missed Collection,Missed recycling


In [20]:
# drop all values with no public description
requests_train = requests_train.dropna(subset=['public_description'])
requests_train.shape

(202027, 4)

In [21]:
requests_train
#Find number of unique values in service_name
len(np.unique(requests_train['service_name']))

32

In [22]:
# service_name_details_to_keep = ['Missed Collection', 'Graffiti Removal', '72 Hour Violation', 
#                                 'Illegal Dumping', 'Parking Zone Violation', 
#                                 'Graffiti Removal - Commercial', 'STREET LIGHT OUT', 
#                                 'Potential Missed Collection', 'CONCRETE SIDEWALK REPAIR ISSUE', 
#                                 'LIGHTS OUT', 'SWPP - Storm Water Polution Prevention', 
#                                 'EVALUATE TREE FOR REMOVAL', 'LIGHTS ON FLASH', 'POLE KNOCK OVER/DAMAGE', 
#                                 'LIMB FALLEN OR HANGING', 'Container Left Out', 'Vegetation Encroachment', 
#                                 'Encroachment', 'DRAIN INLET', 'Litter', 'Out Late', 'CONTRACTUAL TRIMMING', 
#                                 'EVALUATE FOR RESURFACING/SLURRY', 'Quality of Life Issues', 
#                                 'TRIM TREE FOR PEDS/VEHICLE CLEARANCE', 'TRAFFIC MAINTENANCE - OTHER', 
#                                 'PAINT CURB - MAINTENANCE', 'DAMAGED CURB', 'MINOR ASPHALT REPAIR',  
#                                 'TRAFFIC OPERATIONS CURB DESIGNATION EVALUATIONS', 'TREE FALLEN', 
#                                 'GUARDRAILS/BARRIER METAL/WOODEN', 'Street Litter Container', 'PALM FRONDS DOWN', 
#                                 'PAINT STRIPING - MAINTENANCE',  'TRAFFIC OPERATIONS TRAFFIC CALMING/SAFETY FEATURES', 
#                                 'SIGNAL FACING WRONG DIRECTION', 'SIDEWALK MINOR REHAB CONTRACT', 'INVESTIGATE', 
#                                 'TRIM TREE - BLOCKING TRAFFIC DEVICE', 'ASPHALT PAVEMENT REPAIR ISSUE',  
#                                 'TRAFFIC OPERATIONS SIGNAGE EVALUATIONS', 'LIGHT ON DURING THE DAY', 'Citizen Contact', 
#                                 'CHAIN LINK FENCE REPAIR', 'DRAINAGE DITCH', 'FREE TREE PLANTING REQUEST', 'Container Overflow', 
#                                 'TRAFFIC SIGNAL STRIPING/SIGNAGE REVIEW', 'SIDEWALK LIFTED']

In [23]:
all_details = list(np.unique(requests_train['service_name_detail'].dropna()))

In [24]:
len(all_details)

142

In [25]:
requests_train['detailed_type'] = np.where(requests_train['service_name_detail'].isin(all_details) == True, 
                                        requests_train['service_name_detail'], 
                                        requests_train['service_name'])

In [26]:
len(np.unique(requests_train['detailed_type']))

162

In [27]:
# #Find number of unique values
# len(np.unique(requests_train['detailed_type']))

In [28]:
# requests_train['detailed_type'].value_counts().tail(20)

In [29]:
requests_train['detailed_type'].value_counts()

Missed Collection                         29884
72 Hour Violation                         21682
Encampment                                21028
Graffiti Removal                          19995
Shared Mobility Device                    15804
                                          ...  
 BICYCLE FACILITIES DESIGN                    1
MILL & PAVE TRENCH REPAIRS                    1
 CIP EVALUATE FOR STREET WIDENING             1
RESTORE CONCRETE UTILITY TRENCH               1
DEFINE SCOPE FOR SW MJR REHAB CONTRACT        1
Name: detailed_type, Length: 162, dtype: int64

In [30]:
# drop service_name and service_name_detail
requests_train.drop(labels=['service_name', 'service_name_detail'], axis=1, inplace=True)

In [31]:
requests_train.sample(10)

Unnamed: 0,service_request_id,public_description,detailed_type
91433,3271508,This car hasn't been moved for the better part...,72 Hour Violation
234638,3444668,Illegal encampment area is known for the use o...,Encampment
93938,3274554,Trash was not collected from my residence,Missed Collection
321911,3559602,Translated: Graffiti... graciela,Graffiti Removal
339350,3585038,Broken stop sign,Traffic Sign Maintenance
239010,3450192,Homeless camp on side of road,Encampment
69261,3244382,Missed garbage pickup. Others missing as well.,Potential Missed Collection
103606,3286117,Car has been parked on street for over a month...,72 Hour Violation
177185,3374761,Trash at north park entrance by homeless,Encampment
357247,3614855,Trash,Shared Mobility Device


In [32]:
vectorized_len = np.vectorize(len)

In [33]:
requests_train['length_description'] = vectorized_len(requests_train['public_description'])

In [34]:
# len(requests_train[requests_train['length_description'] < 20])

In [35]:
# requests_train = requests_train[requests_train['length_description'] >= 7]

## Tensorflow Model

### Imports

In [36]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

import os
import datetime
import tensorflow_hub as hub

In [37]:
pd.set_option('display.max_colwidth', None)

In [38]:
requests_train.sample(10)

Unnamed: 0,service_request_id,public_description,detailed_type,length_description
222007,3428768,Dead palm tree from weevils. Poses a hazard to pedestrians.,EVALUATE TREE FOR REMOVAL,59
197197,3399178,homeless camp,Dead Animal,13
7139,3001358,"Sidewalk on north side of Palm, maybe 30 feet from light to turn right onto Saturn- someone pulled up a big chunk of the broken sidewalk and left laying there.",CONCRETE SIDEWALK REPAIR ISSUE,160
170365,3366531,ADA laying down all night BIRD,Shared Mobility Device,30
201305,3403983,Shady Oak Rd between Brookhaven Rd. & Windward St. Potholes need to be filled.,Pothole,79
323941,3562493,Parking issue,Parking Zone Violation,13
29558,3195885,Graffiti all down wall,Graffiti Removal,22
41558,3210784,Outside the old at&t building across from the old yummy buffet,Illegal Dumping,62
181674,3380195,Graffiti on light pole on Howard Avenue near entrance to sprouts parking lot,Graffiti Removal,76
295399,3523512,Graffiti on bus stop bench,Graffiti Removal,26


In [39]:
X_train, X_test = train_test_split(requests_train, test_size=0.2, random_state=42)

In [40]:
from sklearn.utils import class_weight

In [41]:
v1 = list(np.unique(requests_train['detailed_type']))

In [89]:
v1

[' BICYCLE FACILITIES DESIGN',
 ' BICYCLE FACILITIES EVALUATE FOR MARKINGS',
 ' BICYCLE FACILITIES EVALUATE FOR RACKS (OR OTHER)',
 ' BICYCLE FACILITIES EVALUATE FOR SIGNAGE',
 ' BICYCLE FACILITIES EVALUATE FOR STRIPING',
 ' BICYCLE FACILITIES GRANTS',
 ' BICYCLE FACILITIES PLAN CHECK',
 ' CIP EVALUATE FOR STREET WIDENING',
 ' CIP FINANCE PLAN UPDATE',
 ' STREET LIGHT EVALUATE FOR SHIELD',
 ' TEO WO GUARDRAILS/BARRIER METAL/WOODEN',
 ' TEO WO PAINT CURB - NEW',
 ' TEO WO PAINT STRIPING - NEW',
 ' TEO WO TRAFFIC SIGN - NEW',
 ' TRAFFIC OPERATIONS CURB DESIGNATION EVALUATIONS',
 ' TRAFFIC OPERATIONS MARKINGS EVALUATIONS',
 ' TRAFFIC OPERATIONS PARKING EVALUATIONS',
 ' TRAFFIC OPERATIONS SIGNAGE EVALUATIONS',
 ' TRAFFIC OPERATIONS STRIPING EVALUATIONS',
 ' TRAFFIC OPERATIONS TRAFFIC CALMING/SAFETY FEATURES',
 ' TRAFFIC OPERATIONS VISIBILITY EVALUATIONS',
 ' TRAFFIC SAFETY GUARD RAIL REVIEW',
 ' TRAFFIC SAFETY LEGALITY CHECK',
 ' TRAFFIC SAFETY PARKING REVIEW',
 ' TRAFFIC SAFETY RADAR SPEE

In [90]:
v2 = requests_train['detailed_type']

In [91]:
v2

164116         Missed Collection
215343         Missed Collection
245970           Illegal Dumping
241707    Shared Mobility Device
358042    Parking Zone Violation
                   ...          
119879    Shared Mobility Device
259178    Shared Mobility Device
131932                Encampment
146867           Illegal Dumping
121958                     Other
Name: detailed_type, Length: 202027, dtype: object

In [43]:
class_weights = list(class_weight.compute_class_weight(class_weight = 'balanced',
                                                       classes= np.unique(requests_train['detailed_type']),
                                                      y=requests_train['detailed_type']
                                                      ))

In [44]:
len(requests_train['detailed_type'].value_counts())

162

In [45]:
class_weights.sort()

In [86]:
class_weights

[0.04173070027150248,
 0.05751684562833596,
 0.059305699396689185,
 0.06236960474686573,
 0.07890915255084664,
 0.09971855484675997,
 0.10365557700220931,
 0.16877523980424688,
 0.17062255396272819,
 0.3583563927912587,
 0.4351291859433288,
 0.4477846488020037,
 0.4665470433646017,
 0.5085971643203835,
 0.5431534176452876,
 0.5648008364644838,
 0.5899149701577958,
 0.7535228078027675,
 0.7907928008329614,
 0.8193694132152302,
 0.8308329426472887,
 0.8336097907176339,
 0.847777190287954,
 0.9251337143275817,
 0.9607706062508322,
 1.00570987654321,
 1.0155376603530784,
 1.2686472501664092,
 1.2843256919810302,
 1.4317798472027328,
 1.4433799154092364,
 1.4863888521020028,
 1.5453286826686248,
 1.6112147892940314,
 1.7153786064836043,
 1.9304647785039941,
 1.9639059006513075,
 2.0244809203142538,
 2.389042618608391,
 2.6647013822939747,
 2.7051632254090676,
 2.728840802874355,
 2.7961440513757405,
 2.906946962502518,
 3.0122711278105805,
 3.034258508305548,
 3.094491927825261,
 3.21412434

In [47]:
len(class_weights)

162

In [48]:
weights = {}

In [None]:
c

In [49]:
for index, weight in enumerate(class_weights) :
    weights[index]=weight

In [87]:
weights

{0: 0.04173070027150248,
 1: 0.05751684562833596,
 2: 0.059305699396689185,
 3: 0.06236960474686573,
 4: 0.07890915255084664,
 5: 0.09971855484675997,
 6: 0.10365557700220931,
 7: 0.16877523980424688,
 8: 0.17062255396272819,
 9: 0.3583563927912587,
 10: 0.4351291859433288,
 11: 0.4477846488020037,
 12: 0.4665470433646017,
 13: 0.5085971643203835,
 14: 0.5431534176452876,
 15: 0.5648008364644838,
 16: 0.5899149701577958,
 17: 0.7535228078027675,
 18: 0.7907928008329614,
 19: 0.8193694132152302,
 20: 0.8308329426472887,
 21: 0.8336097907176339,
 22: 0.847777190287954,
 23: 0.9251337143275817,
 24: 0.9607706062508322,
 25: 1.00570987654321,
 26: 1.0155376603530784,
 27: 1.2686472501664092,
 28: 1.2843256919810302,
 29: 1.4317798472027328,
 30: 1.4433799154092364,
 31: 1.4863888521020028,
 32: 1.5453286826686248,
 33: 1.6112147892940314,
 34: 1.7153786064836043,
 35: 1.9304647785039941,
 36: 1.9639059006513075,
 37: 2.0244809203142538,
 38: 2.389042618608391,
 39: 2.6647013822939747,
 40:

In [50]:
#Instantiate
dataset_train = tf.data.Dataset.from_tensor_slices((X_train['public_description'].values, X_train['detailed_type'].values))
dataset_test = tf.data.Dataset.from_tensor_slices((X_test['public_description'].values, X_test['detailed_type'].values))

In [51]:
for text, target in dataset_train.take(5):
    print('description: {}, Target: {}'.format(text, target))

description: b'Parking for over a month', Target: b'Encampment'
description: b'Homeless', Target: b'Encampment'
description: b'Motor home, attached boat trailer and van all have no license plates/', Target: b'Encampment'
description: b'Request pick up', Target: b'Missed Collection'
description: b'Busted irrigation line Park Village Rd westbound side; I let landscape KTR know but this is not their footprint', Target: b'SWPP - Storm Water Polution Prevention'


In [52]:
for text, target in dataset_test.take(5):
    print('description: {}, Target: {}'.format(text, target))

description: b'New graffiti all over building', Target: b'Graffiti Removal'
description: b'Bus bench is on the sidewalk not allowing handicap to go by', Target: b'CONCRETE SIDEWALK REPAIR ISSUE'
description: b'Graffiti on AT&T box', Target: b'Graffiti Removal - Commercial'
description: b'Did not get emptied', Target: b'Missed Collection'
description: b'Trash cans on east side of street on Classique Way not picked up', Target: b'Missed Collection'


In [53]:
detailed_types = list(np.unique(requests_train['detailed_type']))

In [54]:
vals = list(range(162))

In [55]:
print(len(detailed_types))
print(len(vals))

162
162


In [56]:
table = tf.lookup.StaticHashTable(
    initializer=tf.lookup.KeyValueTensorInitializer(
        keys=tf.constant(detailed_types),
        values=tf.constant(vals),
    ),
        default_value=tf.constant(-1),
        name="target_encoding"
)


In [57]:
@tf.function
def target(x):
    return table.lookup(x)

In [58]:
def show_batch(dataset, size=5):
    for batch, label in dataset.take(size):
        print(batch.numpy())
        print(target(label).numpy())

In [59]:
show_batch(dataset_test, 6)

b'New graffiti all over building'
85
b'Bus bench is on the sidewalk not allowing handicap to go by'
44
b'Graffiti on AT&T box'
86
b'Did not get emptied'
104
b'Trash cans on east side of street on Classique Way not picked up'
104
b'Missed greens'
104


In [60]:
 def fetch(text, labels):
        return text, tf.one_hot(target(labels),162)

In [61]:
train_data_f = dataset_train.map(fetch)
test_data_f=dataset_test.map(fetch)

In [62]:
next(iter(train_data_f))

(<tf.Tensor: shape=(), dtype=string, numpy=b'Parking for over a month'>,
 <tf.Tensor: shape=(162,), dtype=float32, numpy=
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>)

In [63]:
train_data, train_labels = next(iter(train_data_f.batch(2)))
train_data, train_labels

(<tf.Tensor: shape=(2,), dtype=string, numpy=array([b'Parking for over a month', b'Homeless'], dtype=object)>,
 <tf.Tensor: shape=(2, 162), dtype=float32, numpy=
 array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 

In [64]:
embedding = "https://tfhub.dev/google/tf2-preview/nnlm-en-dim128/1"
hub_layer = hub.KerasLayer(embedding, output_shape=[128], input_shape=[],
                           dtype=tf.string, trainable=True)
hub_layer(train_data[:1])



<tf.Tensor: shape=(1, 128), dtype=float32, numpy=
array([[ 0.2947122 ,  0.06149567, -0.12671496,  0.03034275,  0.09676808,
         0.16921037, -0.09496925,  0.10810017,  0.05087587,  0.07639003,
         0.06503929, -0.05157888, -0.09376147,  0.10455734,  0.02672946,
        -0.05654573, -0.03370756, -0.01311209, -0.00636674,  0.24171862,
        -0.10896403,  0.18235618, -0.01823283, -0.00650783, -0.03342678,
        -0.07151679, -0.0044638 , -0.11139045, -0.11815578, -0.16122422,
        -0.04115741, -0.01341171, -0.14442578,  0.15274805, -0.01077449,
         0.08248467,  0.06425278, -0.04439811,  0.07978082,  0.01977678,
         0.10914295,  0.0390858 , -0.01445776,  0.19658498, -0.02042773,
         0.08300801,  0.09490787,  0.12943365,  0.09240463, -0.11280449,
         0.03183676, -0.04501188, -0.04222551,  0.0524281 ,  0.09684607,
        -0.06504567,  0.08754937,  0.0907004 ,  0.18327884,  0.01068473,
        -0.07351647, -0.00059976, -0.10533527, -0.10557347,  0.02202377,
 

In [65]:
model = tf.keras.Sequential()
model.add(hub_layer)
for units in [128, 128, 64, 32]:
    model.add(tf.keras.layers.Dense(units, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Dense(162, activation='softmax'))

model.summary()

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 128)               124642688 
                                                                 
 dense (Dense)               (None, 128)               16512     
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 128)               16512     
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense_2 (Dense)             (None, 64)                8256      
                                                                 
 dropout_2 (Dropout)         (None, 64)                0

In [66]:
model.compile(optimizer='adam',
             loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
             metrics=['accuracy'])

In [67]:
train_data_f=train_data_f.shuffle(70000).batch(512)
test_data_f=test_data_f.batch(512)

In [77]:
history = model.fit(train_data_f,
                   epochs=50,
                   validation_data=test_data_f,
                   verbose=1,
                   class_weight=weights)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [78]:
len(list(dataset_test))

40406

In [79]:
results = model.evaluate(dataset_test.map(fetch).batch(40406), verbose=2)

1/1 - 7s - loss: 2.8479 - accuracy: 0.5880 - 7s/epoch - 7s/step


In [80]:
print(results)

[2.8478591442108154, 0.5880067348480225]


In [81]:
test_data, test_labels = next(iter(dataset_test.map(fetch).batch(45963)))

In [82]:
y_pred=model.predict(test_data)



In [83]:
from sklearn.metrics import classification_report

In [84]:
print(classification_report(test_labels.numpy().argmax(axis=1), y_pred.argmax(axis=1)))

              precision    recall  f1-score   support

           2       0.00      0.00      0.00         1
           3       0.00      0.00      0.00         7
           4       0.00      0.00      0.00         3
           8       0.00      0.00      0.00         1
           9       0.00      0.00      0.00         7
          11       0.00      0.00      0.00         1
          14       0.00      0.00      0.00        84
          15       0.00      0.00      0.00        22
          16       0.00      0.00      0.00         6
          17       0.00      0.00      0.00        64
          18       0.00      0.00      0.00         7
          19       0.00      0.00      0.00        99
          20       0.00      0.00      0.00         6
          21       0.00      0.00      0.00         2
          22       0.00      0.00      0.00         9
          23       0.00      0.00      0.00         3
          26       0.00      0.00      0.00        14
          28       0.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [85]:
from sklearn.metrics import confusion_matrix
confusion_matrix(test_labels.numpy().argmax(axis=1), y_pred.argmax(axis=1))

array([[  0,   0,   0, ...,   0,   0,   0],
       [  0,   0,   0, ...,   1,   0,   0],
       [  0,   0,   0, ...,   0,   0,   0],
       ...,
       [  0,   0,   0, ..., 102,   0,  41],
       [  0,   0,   0, ...,   1, 176,   2],
       [  0,   0,   0, ...,  29,   0,  60]], dtype=int64)