# Tensoflow NLP Model for Text Classification

## Imports

In [404]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from datetime import datetime
from sklearn.model_selection import train_test_split
import pickle

## Pre-Processing

In [405]:
with open('../data/processed_21_data.pkl', 'rb') as file:
    processed_21_data = pickle.load(file)

In [406]:
#only keep relevant columns
processed_21_data = processed_21_data[['detailed_type', 'public_description']]

In [407]:
processed_21_data[processed_21_data['public_description'] == 'Encampment out front of Kristys.']

Unnamed: 0,detailed_type,public_description
133467,Encampment,Encampment out front of Kristys.


In [408]:
# #split train and test data
# (requests_train, requests_test) = train_test_split(processed_21_data, test_size = 0.2, random_state = 42)

## Tensorflow Model

### Imports

In [409]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

import os
import datetime
import tensorflow_hub as hub

In [410]:
pd.set_option('display.max_colwidth', None)

In [411]:
processed_21_data.sample(10)

Unnamed: 0,detailed_type,public_description
259788,Quality of Life Issues,Still no one has came and investigate illegal mechanic work done in the alley and streets oil spills every where this guy just washes everything off in the streets please send a different city worker who not on this case because nothing is getting done I got phone call few days back and he hung up on me and still doesn't get nothing done I see different cars going and coming and yet oil spills just gets.washed into the street please review or if I have to i will call news and get lawyer tou have many complaints amd I will let my lawyer know please dont call me asking questions come review or you will get later from my attorney and sue the city for not doing there job
260548,72 Hour Violation,"The car appears to be abandoned.. ( ( interior and exterior damage), has not been moved in at least 2 weeks...and .has 2020 license plates)."
97777,Other,Illegal dumping of rocks on bird nests
318752,Dead Animal,A dead possum in the middle of the rad
112045,Encampment,Fed ex box being used as a shelter under MTS trolley bridge.
125692,Shared Mobility Device,Link
228856,72 Hour Violation,Inoperable and abandoned
65459,Missed Collection,Missed greens collection
205859,Traffic Signal Out of Service,Broken traffic light
51767,Damaged/Knocked Over Pole,VANDALIZED STREET LIGHT


In [412]:
X_train, X_test = train_test_split(processed_21_data, test_size=0.2, random_state=42)

In [413]:
from sklearn.utils import class_weight

In [414]:
class_weights = list(class_weight.compute_class_weight(class_weight = 'balanced',
                                                       classes= np.unique(processed_21_data['detailed_type']),
                                                      y=processed_21_data['detailed_type']
                                                      ))

In [415]:
len(X_train['detailed_type'].value_counts())

51

In [416]:
class_weights

[0.1823593766156387,
 8.660313862848117,
 3.020793656497419,
 5.369206531689766,
 2.7260414639946386,
 1.385553156538603,
 176.60854341736695,
 3.915312126434105,
 0.18867714203847055,
 3.2320517749583493,
 6.26747682596486,
 2.9157070847206805,
 2.677335796256781,
 0.1989795274298356,
 1.1417776993041502,
 0.3181317045603625,
 4.495490196078431,
 0.1319768132505878,
 0.5327557870810467,
 25.890257673750128,
 6.945279797312184,
 0.32755111715481716,
 2.7230392156862746,
 1.7351014791881665,
 0.540028307926862,
 4.940099116569705,
 1.660523578135082,
 4.759421766781785,
 494.50392156862745,
 0.25155352608028664,
 1.4686781157369393,
 10.611672136665826,
 34.580693815987935,
 2.5450536364829,
 4.131193998067063,
 11.77390289449113,
 1.476571876884525,
 2.597184462020102,
 2.319436780340654,
 1.435008478144595,
 11.187871528701978,
 1.883107088989442,
 1.624520110278014,
 4.368409201136285,
 2.2559485473021326,
 2.6904457103842625,
 4.974888546968083,
 4.532574899804101,
 3.19240749882909

In [417]:
num_options = len(class_weights)
num_options

51

In [418]:
weights = {}

In [419]:
for index, weight in enumerate(class_weights) :
    weights[index]=weight

In [420]:
weights

{0: 0.1823593766156387,
 1: 8.660313862848117,
 2: 3.020793656497419,
 3: 5.369206531689766,
 4: 2.7260414639946386,
 5: 1.385553156538603,
 6: 176.60854341736695,
 7: 3.915312126434105,
 8: 0.18867714203847055,
 9: 3.2320517749583493,
 10: 6.26747682596486,
 11: 2.9157070847206805,
 12: 2.677335796256781,
 13: 0.1989795274298356,
 14: 1.1417776993041502,
 15: 0.3181317045603625,
 16: 4.495490196078431,
 17: 0.1319768132505878,
 18: 0.5327557870810467,
 19: 25.890257673750128,
 20: 6.945279797312184,
 21: 0.32755111715481716,
 22: 2.7230392156862746,
 23: 1.7351014791881665,
 24: 0.540028307926862,
 25: 4.940099116569705,
 26: 1.660523578135082,
 27: 4.759421766781785,
 28: 494.50392156862745,
 29: 0.25155352608028664,
 30: 1.4686781157369393,
 31: 10.611672136665826,
 32: 34.580693815987935,
 33: 2.5450536364829,
 34: 4.131193998067063,
 35: 11.77390289449113,
 36: 1.476571876884525,
 37: 2.597184462020102,
 38: 2.319436780340654,
 39: 1.435008478144595,
 40: 11.187871528701978,
 41: 

In [421]:
list1 = [3,1]

In [422]:
np.array(list1).shape

(2,)

In [497]:
array1 = np.array([1])
array2 = np.array([5])

In [526]:
string1 = 'string1'
string2 = 'string2'
string3 = 'string3'

string_array = np.array([string1])
print('shape', string_array.shape)
print('type', type(string_array))

shape (1,)
type <class 'numpy.ndarray'>


In [527]:
test_array = X_train['public_description'].values[0:1]
print('shape', test_array.shape)
print('type', type(test_array))

shape (1,)
type <class 'numpy.ndarray'>


In [528]:
type(X_train['public_description'].values[0])

str

In [529]:
dataset_train = tf.data.Dataset.from_tensor_slices((string_array, test_array))

In [512]:
#Instantiate
dataset_train = tf.data.Dataset.from_tensor_slices((X_train['public_description'].values, X_train['detailed_type'].values))
dataset_test = tf.data.Dataset.from_tensor_slices((X_test['public_description'].values, X_test['detailed_type'].values))

In [513]:
dataset_train = tf.data.Dataset.from_tensor_slices((testing, X_train['detailed_type'].values))

In [426]:
for text, target in dataset_train.take(5):
    print('description: {}, Target: {}'.format(text, target))

description: b'all lights out', Target: b'Street Light Out of Service'
description: b'Graffiti tag on streetlight post southwest corner of nutmeg and 4th Avenue', Target: b'Graffiti Removal'
description: b'Graffiti -Jacky', Target: b'Graffiti Removal'
description: b'There are many campers, motorhomes, trailers parked for weeks and putting trash all over. the have broke the lock on the hose bib on the lawnmower shop and created a $900 dollar water bill.  they have pooped on the sidewalk as well as urinating causing possible disease.  one man died on the street.  this is not a safe environment. please do something about it', Target: b'Encampment'
description: b'Homeless Encampment', Target: b'Encampment'


In [427]:
for text, target in dataset_test.take(5):
    print('description: {}, Target: {}'.format(text, target))

description: b"Once again our stop sign has been knocked over and this time I got the license plate number of the truck that knocked it down.....34093A1                                      This has been an ongoing issue for too long with constructions trucks that are driving down our easement.  This tiny street/easement is meant for the people who live on it, it is not a parking lot for lots of construction trucks.  They have been blocking driveways for the residents that live down there with no regard.  I have tried talking with them about it isn't parking for them and some of the trucks are way too large and that is why the sign is continually knocked down.       Please have someone put the stop sign back up and could we please also get a sign saying no construction trucks, they have the alley to park it but they want to do what is easiest despite the constant inconvenience and noise that is disrupting the residents who pay a fortune to live here.    Truck blocking driveway", Target

In [428]:
detailed_types = list(np.unique(X_train['detailed_type']))
detailed_types

['72 Hour Violation',
 'COVID-19',
 'Container Left Out',
 'Container Out Late',
 'Damaged/Knocked Over Pole',
 'Dead Animal',
 'Development Services - Code Enforcement',
 'Drain Inlet',
 'Encampment',
 'Encroachment',
 'Environmental Services Code Compliance',
 'Fallen/Hanging Tree Limb',
 'Flashing Traffic Signal Lights',
 'Graffiti Removal',
 'Graffiti Removal - Commercial',
 'Illegal Dumping',
 'Litter',
 'Missed Collection',
 'Other',
 'Oversized Vehicle',
 'Parking',
 'Parking Zone Violation',
 'Pavement Maintenance',
 'Potential Missed Collection',
 'Pothole',
 'Quality of Life Issues',
 'ROW Maintenance',
 'Resurfacing Evaluation',
 'Right-of-Way Code Enforcement',
 'Shared Mobility Device',
 'Sidewalk Repair Issue',
 'Stormwater',
 'Stormwater Code Enforcement',
 'Stormwater Pollution Prevention',
 'Street Flooded',
 'Street Light Maintenance',
 'Street Light Out of Service',
 'Street Sweeping',
 'Traffic Engineering',
 'Traffic Sign Maintenance',
 'Traffic Signal Issue',
 'Tr

In [429]:
vals = list(range(num_options))

In [430]:
num_options == len(vals)

True

In [431]:
table = tf.lookup.StaticHashTable(
    initializer=tf.lookup.KeyValueTensorInitializer(
        keys=tf.constant(detailed_types),
        values=tf.constant(vals),
    ),
        default_value=tf.constant(-1),
        name="target_encoding"
)


In [535]:
pickle.dump(detailed_types, open('../data/detailed_types.pkl', 'wb'))
pickle.dump(vals, open('../data/vals.pkl', 'wb'))

In [432]:
@tf.function
def target(x):
    return table.lookup(x)

In [433]:
def show_batch(dataset, size=5):
    for batch, label in dataset.take(size):
        print(batch.numpy())
        print(target(label).numpy())

In [434]:
show_batch(dataset_test, 6)

b"Once again our stop sign has been knocked over and this time I got the license plate number of the truck that knocked it down.....34093A1                                      This has been an ongoing issue for too long with constructions trucks that are driving down our easement.  This tiny street/easement is meant for the people who live on it, it is not a parking lot for lots of construction trucks.  They have been blocking driveways for the residents that live down there with no regard.  I have tried talking with them about it isn't parking for them and some of the trucks are way too large and that is why the sign is continually knocked down.       Please have someone put the stop sign back up and could we please also get a sign saying no construction trucks, they have the alley to park it but they want to do what is easiest despite the constant inconvenience and noise that is disrupting the residents who pay a fortune to live here.    Truck blocking driveway"
39
b'MISSION BAY DR 

In [435]:
def fetch(text, labels):
        return text, tf.one_hot(target(labels),num_options)

In [436]:
train_data_f = dataset_train.map(fetch)
test_data_f=dataset_test.map(fetch)

In [437]:
next(iter(train_data_f))

(<tf.Tensor: shape=(), dtype=string, numpy=b'all lights out'>,
 <tf.Tensor: shape=(51,), dtype=float32, numpy=
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       dtype=float32)>)

In [438]:
train_data, train_labels = next(iter(train_data_f.batch(2)))
train_data, train_labels

(<tf.Tensor: shape=(2,), dtype=string, numpy=
 array([b'all lights out',
        b'Graffiti tag on streetlight post southwest corner of nutmeg and 4th Avenue'],
       dtype=object)>,
 <tf.Tensor: shape=(2, 51), dtype=float32, numpy=
 array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0.]], dtype=float32)>)

In [439]:
embedding = "https://tfhub.dev/google/tf2-preview/nnlm-en-dim128/1"
hub_layer = hub.KerasLayer(embedding, output_shape=[128], input_shape=[],
                           dtype=tf.string, trainable=True)
hub_layer(train_data[:1])

<tf.Tensor: shape=(1, 128), dtype=float32, numpy=
array([[ 1.47995263e-01, -7.63717247e-03,  1.62601136e-02,
         1.52963430e-01,  7.51934499e-02,  8.86549726e-02,
        -6.71813870e-03, -1.12587452e-01, -1.01478808e-01,
         1.24230325e-01, -1.80843234e-01,  5.74152917e-02,
        -4.22778800e-02,  4.33041453e-02, -2.48068776e-02,
        -3.67566049e-02, -8.53721052e-02, -1.49181023e-01,
        -1.95396766e-02,  6.50828108e-02,  1.18911266e-01,
        -2.22919315e-01,  1.31057156e-02, -5.93604147e-02,
         8.29051733e-02, -1.95304886e-01,  1.01555437e-01,
         5.32250106e-02,  1.15061872e-01, -7.33501092e-02,
         4.23228256e-02,  3.95037904e-02, -3.54817114e-03,
        -4.91514914e-02,  1.39009073e-01,  9.05774087e-02,
         4.52620238e-02, -2.33229801e-01,  2.15860397e-01,
        -6.49156719e-02, -4.01241407e-02, -1.58689711e-02,
         1.07540227e-01, -7.06280991e-02,  2.09827513e-01,
         1.61402494e-01, -5.13916537e-02,  1.61632493e-01,
      

In [440]:
model = tf.keras.Sequential()
model.add(hub_layer)
for units in [128, 128, 64, 32]:
    model.add(tf.keras.layers.Dense(units, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Dense(num_options, activation='softmax'))

model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer_4 (KerasLayer)  (None, 128)               124642688 
                                                                 
 dense_25 (Dense)            (None, 128)               16512     
                                                                 
 dropout_20 (Dropout)        (None, 128)               0         
                                                                 
 dense_26 (Dense)            (None, 128)               16512     
                                                                 
 dropout_21 (Dropout)        (None, 128)               0         
                                                                 
 dense_27 (Dense)            (None, 64)                8256      
                                                                 
 dropout_22 (Dropout)        (None, 64)               

In [441]:
model.compile(optimizer='adam',
             loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
             metrics=['accuracy'])

In [442]:
train_data_f=train_data_f.shuffle(70000).batch(512)
test_data_f=test_data_f.batch(512)

In [443]:
history = model.fit(train_data_f,
                   epochs=20,
                   validation_data=test_data_f,
                   verbose=1,
                   class_weight=weights)

Epoch 1/20


  output, from_logits = _get_logits(


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [536]:
model.save('../data/tf_model.pkl')



INFO:tensorflow:Assets written to: ../data/tf_model.pkl\assets


INFO:tensorflow:Assets written to: ../data/tf_model.pkl\assets


In [444]:
pickle.dump(model, open('../data/tf_model.pkl', 'wb'))

Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...layers\dense
......vars
.........0
.........1
...layers\dense_1
......vars
.........0
.........1
...layers\dense_2
......vars
.........0
.........1
...layers\dense_3
......vars
.........0
.........1
...layers\dense_4
......vars
.........0
.........1
...layers\dropout
......vars
...layers\dropout_1
......vars
...layers\dropout_2
......vars
...layers\dropout_3
......vars
...layers\keras_layer
......vars
.........0
...metrics\mean
......vars
.........0
.........1
...metrics\mean_metric_wrapper
......vars
.........0
.........1
...optimizer
......vars
.........0
.........1
.........10
.........11
.........12
.........13
.........14
.........15
.........16
.........17
.........18
.........19
.........2
.........20
.........21
.........22
.........3
.........4
.........5
.........6
.........7
.........8
.........9
...vars
Keras model archive saving:
File Name                                             Modified             S

In [445]:
for_results = len(list(dataset_test))

In [446]:
results = model.evaluate(dataset_test.map(fetch).batch(for_results), verbose=2)

1/1 - 8s - loss: 1.5803 - accuracy: 0.5962 - 8s/epoch - 8s/step


In [447]:
print(results)

[1.5802637338638306, 0.5961736440658569]


In [448]:
next(iter(dataset_test.map(fetch).batch(1)))

(<tf.Tensor: shape=(1,), dtype=string, numpy=
 array([b"Once again our stop sign has been knocked over and this time I got the license plate number of the truck that knocked it down.....34093A1                                      This has been an ongoing issue for too long with constructions trucks that are driving down our easement.  This tiny street/easement is meant for the people who live on it, it is not a parking lot for lots of construction trucks.  They have been blocking driveways for the residents that live down there with no regard.  I have tried talking with them about it isn't parking for them and some of the trucks are way too large and that is why the sign is continually knocked down.       Please have someone put the stop sign back up and could we please also get a sign saying no construction trucks, they have the alley to park it but they want to do what is easiest despite the constant inconvenience and noise that is disrupting the residents who pay a fortune to live 

In [532]:
type(dataset_test)

tensorflow.python.data.ops.from_tensor_slices_op.TensorSliceDataset

In [449]:
test_data, test_labels = next(iter(dataset_test.map(fetch).batch(for_results)))

In [450]:
y_pred=model.predict(test_data)



In [451]:
type(test_data)

tensorflow.python.framework.ops.EagerTensor

In [452]:
y_pred

array([[1.4004092e-02, 3.2213966e-06, 4.3309168e-08, ..., 2.2522328e-04,
        3.3487740e-06, 4.0274586e-05],
       [7.9529974e-26, 1.9211725e-35, 6.4007550e-34, ..., 5.0238171e-27,
        2.7139044e-25, 3.7542751e-21],
       [1.0603684e-07, 4.1008612e-08, 3.5727218e-05, ..., 1.5112375e-08,
        4.7966960e-01, 1.2445651e-07],
       ...,
       [7.6039565e-01, 8.9000185e-09, 2.6873358e-11, ..., 1.5777474e-17,
        1.9883760e-12, 9.8323743e-23],
       [3.2549128e-09, 1.3021414e-06, 4.4240257e-13, ..., 6.3599640e-05,
        1.2652109e-08, 1.1497455e-06],
       [1.7459781e-04, 1.8472587e-04, 8.8852532e-02, ..., 9.1049282e-13,
        1.2046679e-04, 6.2506367e-12]], dtype=float32)

In [453]:
results_df = pd.DataFrame(y_pred)
results_df.columns = ['72 Hour Violation',
 'COVID-19',
 'Container Left Out',
 'Container Out Late',
 'Damaged/Knocked Over Pole',
 'Dead Animal',
 'Development Services - Code Enforcement',
 'Drain Inlet',
 'Encampment',
 'Encroachment',
 'Environmental Services Code Compliance',
 'Fallen/Hanging Tree Limb',
 'Flashing Traffic Signal Lights',
 'Graffiti Removal',
 'Graffiti Removal - Commercial',
 'Illegal Dumping',
 'Litter',
 'Missed Collection',
 'Other',
 'Oversized Vehicle',
 'Parking',
 'Parking Zone Violation',
 'Pavement Maintenance',
 'Potential Missed Collection',
 'Pothole',
 'Quality of Life Issues',
 'ROW Maintenance',
 'Resurfacing Evaluation',
 'Right-of-Way Code Enforcement',
 'Shared Mobility Device',
 'Sidewalk Repair Issue',
 'Stormwater',
 'Stormwater Code Enforcement',
 'Stormwater Pollution Prevention',
 'Street Flooded',
 'Street Light Maintenance',
 'Street Light Out of Service',
 'Street Sweeping',
 'Traffic Engineering',
 'Traffic Sign Maintenance',
 'Traffic Signal Issue',
 'Traffic Signal Out of Service',
 'Traffic Signal Timing',
 'Trash/Recycling Collection',
 'Tree Maintenance',
 'Tree Removal',
 'Tree Trimming for Pedestrian/Vehicle Clearance',
 'Trimming Request',
 'Vegetation Encroachment',
 'Waste on Private Property',
 'Weed Cleanup']
results_df

Unnamed: 0,72 Hour Violation,COVID-19,Container Left Out,Container Out Late,Damaged/Knocked Over Pole,Dead Animal,Development Services - Code Enforcement,Drain Inlet,Encampment,Encroachment,...,Traffic Signal Out of Service,Traffic Signal Timing,Trash/Recycling Collection,Tree Maintenance,Tree Removal,Tree Trimming for Pedestrian/Vehicle Clearance,Trimming Request,Vegetation Encroachment,Waste on Private Property,Weed Cleanup
0,1.400409e-02,3.221397e-06,4.330917e-08,6.282779e-06,3.867684e-05,1.438066e-03,1.303489e-04,3.368154e-06,3.484664e-03,1.353177e-04,...,2.566203e-03,4.697328e-03,3.485864e-07,9.288951e-04,1.275201e-05,2.196812e-04,1.236679e-05,2.252233e-04,3.348774e-06,4.027459e-05
1,7.952997e-26,1.921172e-35,6.400755e-34,2.068107e-16,1.341530e-08,2.048549e-15,0.000000e+00,3.419371e-16,1.080467e-19,9.889387e-21,...,4.227916e-05,6.974168e-10,1.952794e-23,2.430370e-19,8.868974e-23,4.153758e-32,6.027741e-27,5.023817e-27,2.713904e-25,3.754275e-21
2,1.060368e-07,4.100861e-08,3.572722e-05,1.717829e-11,1.291417e-09,2.862426e-08,2.923027e-08,1.206629e-05,1.835862e-03,1.932395e-02,...,1.462917e-13,4.733621e-16,2.532649e-03,6.677794e-12,3.369088e-13,6.672093e-11,2.552749e-16,1.511238e-08,4.796696e-01,1.244565e-07
3,1.019432e-03,1.595867e-05,1.035575e-08,6.502469e-11,6.937907e-06,8.153989e-03,1.655700e-04,5.380665e-05,1.198037e-02,1.496114e-04,...,2.206408e-05,2.596452e-08,6.854315e-07,2.283301e-06,1.428559e-06,3.241310e-06,1.644668e-08,2.024184e-04,1.701057e-04,6.113483e-05
4,3.403670e-18,3.331161e-24,3.346037e-31,2.307904e-12,5.328438e-05,4.748878e-13,3.296403e-29,2.777487e-13,2.280970e-18,1.924043e-23,...,9.166949e-01,3.303359e-02,2.621090e-24,6.016587e-14,7.728987e-14,1.322142e-23,2.452885e-15,2.012452e-23,1.665746e-26,1.474113e-22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50435,3.999212e-06,1.737688e-12,5.024334e-11,8.712839e-11,1.406222e-15,2.702042e-11,4.280966e-11,9.768862e-12,2.679289e-06,6.875886e-05,...,1.761600e-14,6.540602e-12,9.473541e-09,1.615753e-13,1.274155e-19,1.204441e-13,6.033810e-20,5.696827e-14,4.034709e-08,5.217684e-13
50436,4.174303e-01,1.838978e-05,1.642062e-06,5.097051e-08,3.432065e-15,2.128779e-08,2.614768e-04,4.738856e-12,3.778809e-02,2.583959e-06,...,9.020051e-11,2.505037e-10,1.761776e-08,7.535187e-14,2.758951e-16,9.931037e-10,4.978675e-15,4.491558e-11,9.784725e-07,5.391362e-14
50437,7.603956e-01,8.900018e-09,2.687336e-11,7.246794e-13,7.261433e-28,6.634876e-14,1.177669e-05,1.948144e-20,4.752715e-03,4.518063e-11,...,1.092053e-17,2.017346e-14,8.617533e-15,1.284107e-21,6.749636e-26,3.743257e-14,2.161552e-22,1.577747e-17,1.988376e-12,9.832374e-23
50438,3.254913e-09,1.302141e-06,4.424026e-13,2.513169e-12,2.049538e-02,2.776089e-07,1.735650e-08,4.735282e-04,4.156228e-06,2.138645e-10,...,1.021523e-05,2.920063e-05,9.274232e-11,2.622942e-01,2.209396e-01,3.228817e-03,1.014484e-01,6.359964e-05,1.265211e-08,1.149745e-06


In [454]:
#return column where value is that above
j = 0
max_col = []

for j in range(100):

    i=0


    while i < results_df.shape[1]:
        if results_df.iloc[j,i] == results_df.iloc[j,:].max():
            max_col.append(list(results_df.columns)[i])
        i += 1

In [455]:
max_col

['Traffic Engineering',
 'Flashing Traffic Signal Lights',
 'Waste on Private Property',
 'Quality of Life Issues',
 'Traffic Signal Out of Service',
 'Damaged/Knocked Over Pole',
 'Traffic Signal Out of Service',
 'Quality of Life Issues',
 'Missed Collection',
 'Pothole',
 'Encampment',
 'Missed Collection',
 'Shared Mobility Device',
 'Graffiti Removal',
 '72 Hour Violation',
 'Pothole',
 'Waste on Private Property',
 'Encampment',
 'Other',
 'Traffic Signal Timing',
 'Illegal Dumping',
 'Missed Collection',
 '72 Hour Violation',
 '72 Hour Violation',
 'Parking Zone Violation',
 'Encampment',
 'Trash/Recycling Collection',
 'Graffiti Removal',
 'Vegetation Encroachment',
 'Graffiti Removal - Commercial',
 'Quality of Life Issues',
 'Parking Zone Violation',
 'Parking',
 '72 Hour Violation',
 'Graffiti Removal - Commercial',
 'Sidewalk Repair Issue',
 'Traffic Signal Timing',
 'Traffic Sign Maintenance',
 'Street Light Out of Service',
 'Graffiti Removal - Commercial',
 'Graffiti Rem

In [456]:
test_df = pd.DataFrame(test_data).head(100)

In [457]:
test_df['prediction'] = max_col

In [475]:
test_df.sample(10)

Unnamed: 0,0,prediction
77,b'Signal out',Traffic Signal Out of Service
32,b'Please ticket vehicles on 3 minute curb asked to leave',Parking
9,b'Requested repair of pothole in front of our driveway/garage about 1 month ago. A different pothole was repaired down the street. This one continues to get larger. Can you please repair it. Thank you',Pothole
40,b'Graffiti on fire hydrant',Graffiti Removal
5,b'Electrical access and base of lamp covered with dirt/erosion from city park',Damaged/Knocked Over Pole
7,b'Drugs and poop homeless',Quality of Life Issues
50,b'Encampment around and under the El Cajon BLVD and 805 freeway overpass. Trash being left in cul de sac on Bancroft Street and several tents present by the 805 Northbound Freeway entrance.',Encampment
35,"b""The Sprinkler is busted off in the tree box between my sidewalk and curb.\nIt has been that way for a few weeks. The City Landscape crew does my street every Monday morning. I'm at a loss why no one has noticed the missing Sprinkler and washed out dirt.\nIf it could be fixed I would appreciate it. \nThru the 20 years I have owned this property I have fixed it myself on several occasions but turning it over to you this time.\nRegards\nKevin Rupert\n562-552-6441""",Sidewalk Repair Issue
85,b'Trash is missed',Missed Collection
52,b'Recycle truck was a day late operating at an unsafe speed and drive right past our bins AGAIN! Truck number 815 323',Container Left Out


In [467]:
from sklearn.metrics import classification_report

In [460]:
print(classification_report(test_labels.numpy().argmax(axis=1), y_pred.argmax(axis=1), zero_division = 1))

              precision    recall  f1-score   support

           0       0.85      0.71      0.77      5315
           1       0.37      0.70      0.48       109
           2       0.35      0.67      0.46       353
           3       0.30      0.86      0.44       174
           4       0.46      0.65      0.54       339
           5       0.91      0.91      0.91       685
           6       0.00      0.00      0.00         7
           7       0.44      0.57      0.50       235
           8       0.91      0.65      0.76      5255
           9       0.31      0.58      0.40       310
          10       0.11      0.36      0.16       146
          11       0.32      0.44      0.37       347
          12       0.84      0.81      0.82       365
          13       0.82      0.46      0.59      5084
          14       0.19      0.63      0.29       830
          15       0.51      0.02      0.04      3112
          16       0.09      0.32      0.14       218
          17       0.96    

In [461]:
from sklearn.metrics import confusion_matrix
confusion_matrix(test_labels.numpy().argmax(axis=1), y_pred.argmax(axis=1))

array([[3760,    4,   27, ...,    0,   21,    4],
       [   1,   76,    0, ...,    0,    0,    2],
       [   2,    1,  235, ...,    0,   11,    0],
       ...,
       [   2,    0,    0, ...,  173,    1,   56],
       [   1,    2,   11, ...,    3,  218,    2],
       [   1,    2,    0, ...,   43,    1,   59]], dtype=int64)