# Tensoflow NLP Model for Text Classification

## Imports

In [153]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from datetime import datetime
from sklearn.model_selection import train_test_split
import pickle
from sklearn.metrics import classification_report
from tensorflow import keras

In [154]:
pd.set_option('display.max_colwidth', None)

## Tensorflow Imports

In [155]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

import os
import datetime
import tensorflow_hub as hub

## Processing Text Data

In [156]:
with open('../data/processed_21_data.pkl', 'rb') as file:
    processed_21_data = pickle.load(file)

In [157]:
#only keep relevant columns
processed_21_data = processed_21_data[['detailed_type', 'public_description']]

In [158]:
processed_21_data.sample(3)

Unnamed: 0,detailed_type,public_description
9177,Damaged/Knocked Over Pole,Street light was knocked down 5 to 6 months ago. The street light needs to be replaced and the barricades need to be picked up. Contact is Alvaro at 858-222-9892.
2820,Traffic Sign Maintenance,Street sign fading
163694,72 Hour Violation,Parking violation


In [159]:
X_train, X_test = train_test_split(processed_21_data, test_size=0.2, random_state=42)

In [160]:
from sklearn.utils import class_weight

In [161]:
class_weights = list(class_weight.compute_class_weight(class_weight = 'balanced',
                                                       classes= np.unique(processed_21_data['detailed_type']),
                                                      y=processed_21_data['detailed_type']
                                                      ))

In [162]:
class_weights[0:5]

[0.1823593766156387,
 8.660313862848117,
 3.020793656497419,
 5.369206531689766,
 2.7260414639946386]

In [163]:
num_options = len(class_weights)
num_options

51

In [164]:
weights = {}

In [165]:
for index, weight in enumerate(class_weights) :
    weights[index]=weight

In [166]:
weights

{0: 0.1823593766156387,
 1: 8.660313862848117,
 2: 3.020793656497419,
 3: 5.369206531689766,
 4: 2.7260414639946386,
 5: 1.385553156538603,
 6: 176.60854341736695,
 7: 3.915312126434105,
 8: 0.18867714203847055,
 9: 3.2320517749583493,
 10: 6.26747682596486,
 11: 2.9157070847206805,
 12: 2.677335796256781,
 13: 0.1989795274298356,
 14: 1.1417776993041502,
 15: 0.3181317045603625,
 16: 4.495490196078431,
 17: 0.1319768132505878,
 18: 0.5327557870810467,
 19: 25.890257673750128,
 20: 6.945279797312184,
 21: 0.32755111715481716,
 22: 2.7230392156862746,
 23: 1.7351014791881665,
 24: 0.540028307926862,
 25: 4.940099116569705,
 26: 1.660523578135082,
 27: 4.759421766781785,
 28: 494.50392156862745,
 29: 0.25155352608028664,
 30: 1.4686781157369393,
 31: 10.611672136665826,
 32: 34.580693815987935,
 33: 2.5450536364829,
 34: 4.131193998067063,
 35: 11.77390289449113,
 36: 1.476571876884525,
 37: 2.597184462020102,
 38: 2.319436780340654,
 39: 1.435008478144595,
 40: 11.187871528701978,
 41: 

In [167]:
#Instantiate
dataset_train = tf.data.Dataset.from_tensor_slices((X_train['public_description'].values, X_train['detailed_type'].values))
# dataset_train = dataset_train
dataset_test = tf.data.Dataset.from_tensor_slices((X_test['public_description'].values, X_test['detailed_type'].values))
# dataset_test = dataset_test

In [168]:
for text, target in dataset_train.take(5):
    print('description: {}, Target: {}'.format(text, target))

description: b'all lights out', Target: b'Street Light Out of Service'
description: b'Graffiti tag on streetlight post southwest corner of nutmeg and 4th Avenue', Target: b'Graffiti Removal'
description: b'Graffiti -Jacky', Target: b'Graffiti Removal'
description: b'There are many campers, motorhomes, trailers parked for weeks and putting trash all over. the have broke the lock on the hose bib on the lawnmower shop and created a $900 dollar water bill.  they have pooped on the sidewalk as well as urinating causing possible disease.  one man died on the street.  this is not a safe environment. please do something about it', Target: b'Encampment'
description: b'Homeless Encampment', Target: b'Encampment'


In [169]:
for text, target in dataset_test.take(5):
    print('description: {}, Target: {}'.format(text, target))

description: b"Once again our stop sign has been knocked over and this time I got the license plate number of the truck that knocked it down.....34093A1                                      This has been an ongoing issue for too long with constructions trucks that are driving down our easement.  This tiny street/easement is meant for the people who live on it, it is not a parking lot for lots of construction trucks.  They have been blocking driveways for the residents that live down there with no regard.  I have tried talking with them about it isn't parking for them and some of the trucks are way too large and that is why the sign is continually knocked down.       Please have someone put the stop sign back up and could we please also get a sign saying no construction trucks, they have the alley to park it but they want to do what is easiest despite the constant inconvenience and noise that is disrupting the residents who pay a fortune to live here.    Truck blocking driveway", Target

In [170]:
detailed_types = list(np.unique(X_train['detailed_type']))
detailed_types

['72 Hour Violation',
 'COVID-19',
 'Container Left Out',
 'Container Out Late',
 'Damaged/Knocked Over Pole',
 'Dead Animal',
 'Development Services - Code Enforcement',
 'Drain Inlet',
 'Encampment',
 'Encroachment',
 'Environmental Services Code Compliance',
 'Fallen/Hanging Tree Limb',
 'Flashing Traffic Signal Lights',
 'Graffiti Removal',
 'Graffiti Removal - Commercial',
 'Illegal Dumping',
 'Litter',
 'Missed Collection',
 'Other',
 'Oversized Vehicle',
 'Parking',
 'Parking Zone Violation',
 'Pavement Maintenance',
 'Potential Missed Collection',
 'Pothole',
 'Quality of Life Issues',
 'ROW Maintenance',
 'Resurfacing Evaluation',
 'Right-of-Way Code Enforcement',
 'Shared Mobility Device',
 'Sidewalk Repair Issue',
 'Stormwater',
 'Stormwater Code Enforcement',
 'Stormwater Pollution Prevention',
 'Street Flooded',
 'Street Light Maintenance',
 'Street Light Out of Service',
 'Street Sweeping',
 'Traffic Engineering',
 'Traffic Sign Maintenance',
 'Traffic Signal Issue',
 'Tr

In [171]:
vals = list(range(num_options))

In [172]:
num_options == len(vals)

True

In [173]:
table = tf.lookup.StaticHashTable(
    initializer=tf.lookup.KeyValueTensorInitializer(
        keys=tf.constant(detailed_types),
        values=tf.constant(vals),
    ),
        default_value=tf.constant(-1),
        name="target_encoding"
)


In [174]:
pickle.dump(detailed_types, open('../data/detailed_types.pkl', 'wb'))
pickle.dump(vals, open('../data/vals.pkl', 'wb'))

In [175]:
@tf.function
def target(x):
    return table.lookup(x)

In [176]:
def show_batch(dataset, size=5):
    for batch, label in dataset.take(size):
        print(batch.numpy())
        print(target(label).numpy())

In [177]:
show_batch(dataset_test, 6)

b"Once again our stop sign has been knocked over and this time I got the license plate number of the truck that knocked it down.....34093A1                                      This has been an ongoing issue for too long with constructions trucks that are driving down our easement.  This tiny street/easement is meant for the people who live on it, it is not a parking lot for lots of construction trucks.  They have been blocking driveways for the residents that live down there with no regard.  I have tried talking with them about it isn't parking for them and some of the trucks are way too large and that is why the sign is continually knocked down.       Please have someone put the stop sign back up and could we please also get a sign saying no construction trucks, they have the alley to park it but they want to do what is easiest despite the constant inconvenience and noise that is disrupting the residents who pay a fortune to live here.    Truck blocking driveway"
39
b'MISSION BAY DR 

In [178]:
def fetch(text, labels):
        return text, tf.one_hot(target(labels),num_options)

In [179]:
train_data_f = dataset_train.map(fetch)
# train_data_f_x = train_data_f
test_data_f=dataset_test.map(fetch)
# test_data_f_x = test_data_x

In [180]:
next(iter(train_data_f))

(<tf.Tensor: shape=(), dtype=string, numpy=b'all lights out'>,
 <tf.Tensor: shape=(51,), dtype=float32, numpy=
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       dtype=float32)>)

In [181]:
train_data, train_labels = next(iter(train_data_f.batch(2)))
# train_data_x = train_data
# train_labels_x = train_labels

In [182]:
# embedding = "https://tfhub.dev/google/tf2-preview/nnlm-en-dim128/1"
# hub_layer = hub.KerasLayer(embedding, output_shape=[128], input_shape=[],
#                            dtype=tf.string, trainable=True)
# hub_layer(train_data[:1])

In [183]:
model = tf.keras.Sequential()
model.add(hub_layer)
for units in [128, 128, 64, 32]:
    model.add(tf.keras.layers.Dense(units, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Dense(num_options, activation='softmax'))

model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 128)               124642688 
                                                                 
 dense_10 (Dense)            (None, 128)               16512     
                                                                 
 dropout_8 (Dropout)         (None, 128)               0         
                                                                 
 dense_11 (Dense)            (None, 128)               16512     
                                                                 
 dropout_9 (Dropout)         (None, 128)               0         
                                                                 
 dense_12 (Dense)            (None, 64)                8256      
                                                                 
 dropout_10 (Dropout)        (None, 64)               

In [184]:
train_data_f=train_data_f.shuffle(70000).batch(512)
# train_data_f_x = train_data_f
test_data_f=test_data_f.batch(512)
# test_data_f_x = test_data_f

## Model Fitting

In [185]:
# model.compile(optimizer='adam',
#              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
#              metrics=['accuracy'])

In [186]:
model = keras.models.load_model('../data/tf_model.pkl')

In [None]:
history = model.fit(train_data_f,
                   epochs=3,
                   validation_data=test_data_f,
                   verbose=1,
                   class_weight=weights)

Epoch 1/3

In [188]:
# model.save('../data/tf_model_X.pkl')

In [189]:
# pickle.dump(model, open('../data/tf_model.pkl', 'wb'))

## Model Results

In [190]:
for_results = len(list(dataset_test))
for_results_x = for_results

In [191]:
results = model.evaluate(dataset_test.map(fetch).batch(for_results), verbose=2)

1/1 - 8s - loss: 1.5874 - accuracy: 0.6179 - 8s/epoch - 8s/step


In [192]:
print(results)

[1.5873810052871704, 0.6179420948028564]


In [193]:
next(iter(dataset_test.map(fetch).batch(1)))

(<tf.Tensor: shape=(1,), dtype=string, numpy=
 array([b"Once again our stop sign has been knocked over and this time I got the license plate number of the truck that knocked it down.....34093A1                                      This has been an ongoing issue for too long with constructions trucks that are driving down our easement.  This tiny street/easement is meant for the people who live on it, it is not a parking lot for lots of construction trucks.  They have been blocking driveways for the residents that live down there with no regard.  I have tried talking with them about it isn't parking for them and some of the trucks are way too large and that is why the sign is continually knocked down.       Please have someone put the stop sign back up and could we please also get a sign saying no construction trucks, they have the alley to park it but they want to do what is easiest despite the constant inconvenience and noise that is disrupting the residents who pay a fortune to live 

In [194]:
type(dataset_test)

tensorflow.python.data.ops.from_tensor_slices_op.TensorSliceDataset

In [195]:
test_data, test_labels = next(iter(dataset_test.map(fetch).batch(for_results)))
test_data_x = test_data
test_labels_x = test_labels

In [196]:
y_pred=model.predict(test_data)



In [197]:
type(test_data)

tensorflow.python.framework.ops.EagerTensor

In [198]:
y_pred

array([[1.0767374e-02, 1.8732173e-05, 1.4638258e-07, ..., 1.8017289e-03,
        2.1123686e-05, 3.4416781e-04],
       [5.7529771e-28, 0.0000000e+00, 6.7365081e-38, ..., 3.5909276e-30,
        8.7956607e-29, 8.4588869e-24],
       [2.0094419e-09, 1.0781780e-11, 1.2984411e-06, ..., 1.7754719e-09,
        3.9535040e-01, 6.1823286e-08],
       ...,
       [8.8851970e-01, 1.4122645e-10, 1.4437809e-12, ..., 3.1019761e-20,
        1.9984509e-13, 2.8626611e-27],
       [1.6730356e-08, 1.3871118e-06, 1.5023913e-11, ..., 1.7816414e-05,
        1.7741436e-07, 3.0663768e-06],
       [6.5508844e-05, 5.1820029e-05, 4.6227101e-02, ..., 1.5358141e-14,
        3.3081014e-05, 3.7872236e-14]], dtype=float32)

In [199]:
results_df = pd.DataFrame(y_pred)
results_df.columns = ['72 Hour Violation',
 'COVID-19',
 'Container Left Out',
 'Container Out Late',
 'Damaged/Knocked Over Pole',
 'Dead Animal',
 'Development Services - Code Enforcement',
 'Drain Inlet',
 'Encampment',
 'Encroachment',
 'Environmental Services Code Compliance',
 'Fallen/Hanging Tree Limb',
 'Flashing Traffic Signal Lights',
 'Graffiti Removal',
 'Graffiti Removal - Commercial',
 'Illegal Dumping',
 'Litter',
 'Missed Collection',
 'Other',
 'Oversized Vehicle',
 'Parking',
 'Parking Zone Violation',
 'Pavement Maintenance',
 'Potential Missed Collection',
 'Pothole',
 'Quality of Life Issues',
 'ROW Maintenance',
 'Resurfacing Evaluation',
 'Right-of-Way Code Enforcement',
 'Shared Mobility Device',
 'Sidewalk Repair Issue',
 'Stormwater',
 'Stormwater Code Enforcement',
 'Stormwater Pollution Prevention',
 'Street Flooded',
 'Street Light Maintenance',
 'Street Light Out of Service',
 'Street Sweeping',
 'Traffic Engineering',
 'Traffic Sign Maintenance',
 'Traffic Signal Issue',
 'Traffic Signal Out of Service',
 'Traffic Signal Timing',
 'Trash/Recycling Collection',
 'Tree Maintenance',
 'Tree Removal',
 'Tree Trimming for Pedestrian/Vehicle Clearance',
 'Trimming Request',
 'Vegetation Encroachment',
 'Waste on Private Property',
 'Weed Cleanup']
results_df

Unnamed: 0,72 Hour Violation,COVID-19,Container Left Out,Container Out Late,Damaged/Knocked Over Pole,Dead Animal,Development Services - Code Enforcement,Drain Inlet,Encampment,Encroachment,...,Traffic Signal Out of Service,Traffic Signal Timing,Trash/Recycling Collection,Tree Maintenance,Tree Removal,Tree Trimming for Pedestrian/Vehicle Clearance,Trimming Request,Vegetation Encroachment,Waste on Private Property,Weed Cleanup
0,1.076737e-02,1.873217e-05,1.463826e-07,4.031726e-06,1.191184e-04,1.989930e-03,2.315349e-04,2.757364e-05,6.075676e-03,3.775804e-04,...,3.291605e-03,7.100753e-03,2.258399e-06,7.035618e-03,1.550452e-04,2.136120e-03,1.118503e-04,1.801729e-03,2.112369e-05,3.441678e-04
1,5.752977e-28,0.000000e+00,6.736508e-38,3.757159e-18,2.066923e-09,2.319255e-16,0.000000e+00,8.263026e-18,1.143858e-21,2.340354e-23,...,3.892734e-05,2.102886e-09,6.664391e-27,3.536095e-21,1.648852e-22,2.367728e-35,3.706641e-30,3.590928e-30,8.795661e-29,8.458887e-24
2,2.009442e-09,1.078178e-11,1.298441e-06,6.746213e-13,4.875332e-10,5.818962e-09,5.874600e-12,2.266695e-06,2.479990e-04,4.835981e-02,...,2.629686e-15,1.435686e-17,1.529576e-04,1.710128e-12,4.324635e-15,8.982122e-13,8.195557e-20,1.775472e-09,3.953504e-01,6.182329e-08
3,1.195709e-03,2.651525e-06,7.058992e-11,1.338177e-10,8.643900e-06,7.137408e-03,1.808413e-05,1.364962e-05,3.179254e-03,2.193728e-05,...,2.555840e-04,8.088582e-06,3.552545e-08,1.621454e-04,9.353169e-06,1.904492e-05,3.242889e-07,2.917681e-04,2.951177e-06,1.092388e-04
4,2.948388e-20,7.930307e-28,3.422967e-37,7.279107e-16,2.695871e-06,8.093353e-13,0.000000e+00,3.748853e-15,1.515109e-20,1.522497e-26,...,9.536613e-01,9.074862e-03,9.214313e-28,1.615531e-16,4.788878e-15,2.529453e-28,5.970606e-19,4.054131e-27,1.258499e-30,2.734306e-25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50435,4.289572e-05,8.213203e-13,2.168576e-12,4.507166e-12,3.740212e-18,6.639628e-10,4.282862e-10,1.323381e-12,2.370025e-05,1.121448e-05,...,7.442284e-15,2.062154e-12,6.660862e-10,4.182941e-13,6.144514e-19,2.475241e-12,8.322772e-20,1.941624e-12,1.336526e-08,6.656686e-13
50436,4.778258e-01,1.843475e-06,1.071107e-07,3.142330e-09,1.490388e-17,3.996383e-09,2.388219e-05,5.980951e-14,2.247255e-02,7.920344e-07,...,5.435305e-12,2.262491e-11,4.887905e-09,9.245323e-16,1.570737e-19,8.178684e-11,3.082084e-18,5.775598e-12,1.784995e-07,3.871855e-16
50437,8.885197e-01,1.412264e-10,1.443781e-12,3.199661e-13,1.078489e-31,1.968314e-15,3.562927e-07,1.208362e-23,2.040220e-03,2.682286e-11,...,1.762054e-19,1.471199e-15,4.474377e-15,3.841277e-25,7.817531e-32,1.615888e-16,3.929110e-28,3.101976e-20,1.998451e-13,2.862661e-27
50438,1.673036e-08,1.387112e-06,1.502391e-11,4.009413e-10,2.667859e-01,1.346809e-06,6.020863e-10,4.965522e-03,2.386889e-05,2.304506e-08,...,1.407953e-04,1.542969e-04,4.965913e-08,3.097315e-01,1.012659e-01,4.654663e-04,1.690370e-02,1.781641e-05,1.774144e-07,3.066377e-06


In [200]:
#return column where value is that above
j = 0
max_col = []

for j in range(100):

    i=0


    while i < results_df.shape[1]:
        if results_df.iloc[j,i] == results_df.iloc[j,:].max():
            max_col.append(list(results_df.columns)[i])
        i += 1

In [201]:
max_col

['Traffic Engineering',
 'Flashing Traffic Signal Lights',
 'Illegal Dumping',
 'ROW Maintenance',
 'Traffic Signal Out of Service',
 'Damaged/Knocked Over Pole',
 'Traffic Signal Out of Service',
 'Quality of Life Issues',
 'Missed Collection',
 'Pothole',
 'Oversized Vehicle',
 'Missed Collection',
 'Shared Mobility Device',
 'Graffiti Removal',
 '72 Hour Violation',
 'Pothole',
 'Waste on Private Property',
 'Encampment',
 'ROW Maintenance',
 'Traffic Signal Timing',
 'Illegal Dumping',
 'Missed Collection',
 '72 Hour Violation',
 '72 Hour Violation',
 'Parking Zone Violation',
 'Encampment',
 'Missed Collection',
 'Graffiti Removal',
 'Vegetation Encroachment',
 'Graffiti Removal - Commercial',
 'Quality of Life Issues',
 'Parking Zone Violation',
 'Parking',
 '72 Hour Violation',
 'Graffiti Removal - Commercial',
 'Street Flooded',
 'Traffic Signal Timing',
 'Traffic Sign Maintenance',
 'Street Light Out of Service',
 'Graffiti Removal - Commercial',
 'Graffiti Removal',
 'Shared 

In [202]:
test_df = pd.DataFrame(test_data).head(100)

In [203]:
test_df['prediction'] = max_col

In [204]:
test_df.sample(10)

Unnamed: 0,0,prediction
63,b'Unknown vehicle parked on residential street for more than 72 hours.',72 Hour Violation
4,b'Traffic signal out at Jackson and Navajo. The turn signal light did not show green last night for the cars going east on Jackson. Also would be good if that dedicated turn signal was an arrow instead of a solid',Traffic Signal Out of Service
22,b'Abandoned vehicle parked on street',72 Hour Violation
41,b'Out of corral since Monday??',Shared Mobility Device
93,b'Saw recycling truck drive by our blue bin and did not pick up due to a car parking close to it. Resident then moved trash can away from vehicle.',Missed Collection
32,b'Please ticket vehicles on 3 minute curb asked to leave',Parking
91,b'Recycling has not been picked up',Missed Collection
6,"b'PER SDPD, LIGHTS OUT IN ALL DIRECTIONS'",Traffic Signal Out of Service
19,b'Traffic light sensor is not being triggered for the middle turn lane. Conference way and Carmel Mountain Rd.',Traffic Signal Timing
57,b'Homeless sleeping in Balboa Park',Encampment


In [205]:
print(classification_report(test_labels.numpy().argmax(axis=1), y_pred.argmax(axis=1), zero_division = 1))

              precision    recall  f1-score   support

           0       0.84      0.76      0.80      5315
           1       0.41      0.68      0.51       109
           2       0.43      0.64      0.51       353
           3       0.38      0.83      0.52       174
           4       0.47      0.64      0.54       339
           5       0.89      0.91      0.90       685
           6       0.00      0.00      0.00         7
           7       0.47      0.49      0.47       235
           8       0.92      0.65      0.76      5255
           9       0.29      0.58      0.39       310
          10       0.15      0.34      0.21       146
          11       0.33      0.46      0.39       347
          12       0.85      0.81      0.83       365
          13       0.83      0.57      0.68      5084
          14       0.22      0.57      0.31       830
          15       0.57      0.06      0.10      3112
          16       0.09      0.35      0.14       218
          17       0.96    

## Text Processing Pipeline

In [114]:
def process_text(df):
    
    #only keep relevant columns
    df_text_target = df[['detailed_type', 'public_description']]
    
    #split into train and test
    X_train, X_test = train_test_split(df, test_size=0.2, random_state=42)
    
    #Instantiate
    dataset_train = tf.data.Dataset.from_tensor_slices((X_train['public_description'].values, X_train['detailed_type'].values))
    dataset_test = tf.data.Dataset.from_tensor_slices((X_test['public_description'].values, X_test['detailed_type'].values))
    
    train_data_f = dataset_train.map(fetch)
    test_data_f=dataset_test.map(fetch)
    
    next(iter(train_data_f))
    
    train_data, train_labels = next(iter(train_data_f.batch(2)))
    
    for_results = len(list(dataset_test))
    test_data, test_labels = next(iter(dataset_test.map(fetch).batch(len(list(dataset_test)))))
        
    return train_data_f, test_data_f, dataset_train, dataset_test, for_results, train_data, train_labels, test_data, test_labels 

# 2020 Data

In [115]:
with open('../data/processed_20_data.pkl', 'rb') as file:
    df_20 = pickle.load(file)

In [116]:
train_data_f, test_data_f, dataset_train, dataset_test, for_results, train_data, train_labels, test_data, test_labels  = process_text(df_20)

## Model Loading

In [117]:
# model = keras.models.load_model('../data/tf_model.pkl')

## Model Training Pipeline

In [118]:
history = model.fit(train_data_f,
                   epochs=3,
                   validation_data=test_data_f,
                   verbose=1,
                   class_weight=weights)

Epoch 1/10


InvalidArgumentError: Graph execution error:

Detected at node 'tokenize/StringSplit/StringSplit' defined at (most recent call last):
Node: 'tokenize/StringSplit/StringSplit'
input must be a vector, got shape: []
	 [[{{node tokenize/StringSplit/StringSplit}}]] [Op:__inference_train_function_937988]

In [None]:
results = model.evaluate(dataset_test.map(fetch).batch(len(list(dataset_test))), verbose=2)

In [None]:
print(results)

In [None]:
next(iter(dataset_test.map(fetch).batch(1)))

In [None]:
type(dataset_test)

In [None]:
y_pred=model.predict(test_data)

In [None]:
type(test_data)

In [None]:
y_pred

In [None]:
results_df = pd.DataFrame(y_pred)
results_df.columns = ['72 Hour Violation',
 'COVID-19',
 'Container Left Out',
 'Container Out Late',
 'Damaged/Knocked Over Pole',
 'Dead Animal',
 'Development Services - Code Enforcement',
 'Drain Inlet',
 'Encampment',
 'Encroachment',
 'Environmental Services Code Compliance',
 'Fallen/Hanging Tree Limb',
 'Flashing Traffic Signal Lights',
 'Graffiti Removal',
 'Graffiti Removal - Commercial',
 'Illegal Dumping',
 'Litter',
 'Missed Collection',
 'Other',
 'Oversized Vehicle',
 'Parking',
 'Parking Zone Violation',
 'Pavement Maintenance',
 'Potential Missed Collection',
 'Pothole',
 'Quality of Life Issues',
 'ROW Maintenance',
 'Resurfacing Evaluation',
 'Right-of-Way Code Enforcement',
 'Shared Mobility Device',
 'Sidewalk Repair Issue',
 'Stormwater',
 'Stormwater Code Enforcement',
 'Stormwater Pollution Prevention',
 'Street Flooded',
 'Street Light Maintenance',
 'Street Light Out of Service',
 'Street Sweeping',
 'Traffic Engineering',
 'Traffic Sign Maintenance',
 'Traffic Signal Issue',
 'Traffic Signal Out of Service',
 'Traffic Signal Timing',
 'Trash/Recycling Collection',
 'Tree Maintenance',
 'Tree Removal',
 'Tree Trimming for Pedestrian/Vehicle Clearance',
 'Trimming Request',
 'Vegetation Encroachment',
 'Waste on Private Property',
 'Weed Cleanup']
results_df

In [None]:
#return column where value is that above
j = 0
max_col = []

for j in range(100):

    i=0


    while i < results_df.shape[1]:
        if results_df.iloc[j,i] == results_df.iloc[j,:].max():
            max_col.append(list(results_df.columns)[i])
        i += 1

In [None]:
max_col

In [None]:
test_df = pd.DataFrame(test_data).head(100)

In [None]:
test_df['prediction'] = max_col

In [None]:
test_df.sample(10)

In [None]:
print(classification_report(test_labels.numpy().argmax(axis=1), y_pred.argmax(axis=1), zero_division = 1))

# 2022 Data

## Text Processing Pipeline

In [None]:
with open('../data/processed_22_data.pkl', 'rb') as file:
    df_22 = pickle.load(file)

In [None]:
train_data_f, test_data_f, dataset_train, dataset_test, for_results, train_data, train_labels, test_data, test_labels = process_text(df_22)

## Model Loading

In [None]:
# model = keras.models.load_model('../data/tf_model.pkl')

## Model Training Pipeline

In [None]:
history = model.fit(train_data_f,
                   epochs=10,
                   validation_data=test_data_f,
                   verbose=1,
                   class_weight=weights)

In [None]:
results = model.evaluate(dataset_test.map(fetch).batch(for_results), verbose=2)

In [None]:
print(results)

In [None]:
next(iter(dataset_test.map(fetch).batch(1)))

In [None]:
type(dataset_test)

In [None]:
y_pred=model.predict(test_data)

In [None]:
type(test_data)

In [None]:
y_pred

In [None]:
results_df = pd.DataFrame(y_pred)
results_df.columns = ['72 Hour Violation',
 'COVID-19',
 'Container Left Out',
 'Container Out Late',
 'Damaged/Knocked Over Pole',
 'Dead Animal',
 'Development Services - Code Enforcement',
 'Drain Inlet',
 'Encampment',
 'Encroachment',
 'Environmental Services Code Compliance',
 'Fallen/Hanging Tree Limb',
 'Flashing Traffic Signal Lights',
 'Graffiti Removal',
 'Graffiti Removal - Commercial',
 'Illegal Dumping',
 'Litter',
 'Missed Collection',
 'Other',
 'Oversized Vehicle',
 'Parking',
 'Parking Zone Violation',
 'Pavement Maintenance',
 'Potential Missed Collection',
 'Pothole',
 'Quality of Life Issues',
 'ROW Maintenance',
 'Resurfacing Evaluation',
 'Right-of-Way Code Enforcement',
 'Shared Mobility Device',
 'Sidewalk Repair Issue',
 'Stormwater',
 'Stormwater Code Enforcement',
 'Stormwater Pollution Prevention',
 'Street Flooded',
 'Street Light Maintenance',
 'Street Light Out of Service',
 'Street Sweeping',
 'Traffic Engineering',
 'Traffic Sign Maintenance',
 'Traffic Signal Issue',
 'Traffic Signal Out of Service',
 'Traffic Signal Timing',
 'Trash/Recycling Collection',
 'Tree Maintenance',
 'Tree Removal',
 'Tree Trimming for Pedestrian/Vehicle Clearance',
 'Trimming Request',
 'Vegetation Encroachment',
 'Waste on Private Property',
 'Weed Cleanup']
results_df

In [None]:
#return column where value is that above
j = 0
max_col = []

for j in range(100):

    i=0


    while i < results_df.shape[1]:
        if results_df.iloc[j,i] == results_df.iloc[j,:].max():
            max_col.append(list(results_df.columns)[i])
        i += 1

In [None]:
max_col

In [None]:
test_df = pd.DataFrame(test_data).head(100)

In [None]:
test_df['prediction'] = max_col

In [None]:
test_df.sample(10)

In [None]:
print(classification_report(test_labels.numpy().argmax(axis=1), y_pred.argmax(axis=1), zero_division = 1))

# More 2021 Data

## Model Training Pipeline

In [None]:
history = model.fit(train_data_f_x,
                   epochs=1,
                   validation_data=test_data_f_x,
                   verbose=1,
                   class_weight=weights)

In [None]:
results = model.evaluate(dataset_test_x.map(fetch).batch(for_results_x), verbose=2)

In [None]:
print(results)

In [None]:
next(iter(dataset_test_x.map(fetch).batch(1)))

In [None]:
type(dataset_test_x)

In [None]:
y_pred=model.predict(test_data_x)

In [None]:
type(test_data_x)

In [None]:
y_pred

In [None]:
results_df = pd.DataFrame(y_pred)
results_df.columns = ['72 Hour Violation',
 'COVID-19',
 'Container Left Out',
 'Container Out Late',
 'Damaged/Knocked Over Pole',
 'Dead Animal',
 'Development Services - Code Enforcement',
 'Drain Inlet',
 'Encampment',
 'Encroachment',
 'Environmental Services Code Compliance',
 'Fallen/Hanging Tree Limb',
 'Flashing Traffic Signal Lights',
 'Graffiti Removal',
 'Graffiti Removal - Commercial',
 'Illegal Dumping',
 'Litter',
 'Missed Collection',
 'Other',
 'Oversized Vehicle',
 'Parking',
 'Parking Zone Violation',
 'Pavement Maintenance',
 'Potential Missed Collection',
 'Pothole',
 'Quality of Life Issues',
 'ROW Maintenance',
 'Resurfacing Evaluation',
 'Right-of-Way Code Enforcement',
 'Shared Mobility Device',
 'Sidewalk Repair Issue',
 'Stormwater',
 'Stormwater Code Enforcement',
 'Stormwater Pollution Prevention',
 'Street Flooded',
 'Street Light Maintenance',
 'Street Light Out of Service',
 'Street Sweeping',
 'Traffic Engineering',
 'Traffic Sign Maintenance',
 'Traffic Signal Issue',
 'Traffic Signal Out of Service',
 'Traffic Signal Timing',
 'Trash/Recycling Collection',
 'Tree Maintenance',
 'Tree Removal',
 'Tree Trimming for Pedestrian/Vehicle Clearance',
 'Trimming Request',
 'Vegetation Encroachment',
 'Waste on Private Property',
 'Weed Cleanup']
results_df

In [None]:
#return column where value is that above
j = 0
max_col = []

for j in range(100):

    i=0


    while i < results_df.shape[1]:
        if results_df.iloc[j,i] == results_df.iloc[j,:].max():
            max_col.append(list(results_df.columns)[i])
        i += 1

In [None]:
max_col

In [None]:
test_df = pd.DataFrame(test_data).head(100)

In [None]:
test_df['prediction'] = max_col

In [None]:
test_df.sample(10)

In [None]:
print(classification_report(test_labels.numpy().argmax(axis=1), y_pred.argmax(axis=1), zero_division = 1))