In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
from numpy.random import seed
seed(1)

In [3]:
df = pd.read_csv('casino_crime.csv')
df.head()

Unnamed: 0,Id,Year,State,Countym,Industry,Suspicious Activity,Count,Lat,Long
0,0,2014,Arizona,Coconino County,Tribal Authorized Casino,Alters or Cancels Transaction to Avoid CTR Req...,1,35.829692,-111.773728
1,1,2014,Arizona,Coconino County,Tribal Authorized Casino,Exchanges Small Bills for Large Bills or Vice ...,1,35.829692,-111.773728
2,2,2014,Arizona,Coconino County,Tribal Authorized Casino,Other Structuring,1,35.829692,-111.773728
3,3,2014,Arizona,Coconino County,Tribal Authorized Casino,Provided Questionable or False Documentation,1,35.829692,-111.773728
4,4,2016,Arizona,Coconino County,Tribal Authorized Casino,Account Takeover,1,35.829692,-111.773728


In [4]:
df['Count'] = df['Count'].str.replace(',', '').astype(float).astype(int)

In [5]:
data = df[['State', 'Industry', 'Suspicious Activity', 'Count']]

data.head()

Unnamed: 0,State,Industry,Suspicious Activity,Count
0,Arizona,Tribal Authorized Casino,Alters or Cancels Transaction to Avoid CTR Req...,1
1,Arizona,Tribal Authorized Casino,Exchanges Small Bills for Large Bills or Vice ...,1
2,Arizona,Tribal Authorized Casino,Other Structuring,1
3,Arizona,Tribal Authorized Casino,Provided Questionable or False Documentation,1
4,Arizona,Tribal Authorized Casino,Account Takeover,1


In [6]:
data['Suspicious Activity'].unique()

array(['Alters or Cancels Transaction to Avoid CTR Requirement',
       'Exchanges Small Bills for Large Bills or Vice Versa',
       'Other Structuring',
       'Provided Questionable or False Documentation', 'Account Takeover',
       'Identity Theft', 'Minimal Gaming with Large Transactions',
       'Refused or Avoided Request for Documentation',
       'Single Individual with Multiple Identities',
       'Suspicion Concerning the Physical Condition of Funds',
       'Suspicious Exchange of Currencies',
       'Transaction Out of Pattern for Customer(s)',
       'Transaction(s) Below BSA Recordkeeping Threshold',
       'Transaction(s) Below CTR Threshold',
       'Two or More Individuals Working Together',
       'Alters or Cancels Transaction to Avoid BSA Recordkeeping Requirement',
       'Counterfeit Instrument',
       'Customer Cancels Transaction to Avoid Bsa Reporting and Recordkeeping Requirements',
       'Embezzlement/Theft/Disappearance of Funds',
       'Inquiry about E

In [7]:
activity = data.groupby(['Suspicious Activity', 'State', 'Industry'])['Count'].sum().reset_index()
activity.dtypes

Suspicious Activity    object
State                  object
Industry               object
Count                   int64
dtype: object

In [8]:
activity.head()

Unnamed: 0,Suspicious Activity,State,Industry,Count
0,ACH,California,Card Club,2
1,ACH,California,Tribal Authorized Casino,30
2,ACH,Nevada,State Licensed Casino,12
3,ACH,New Mexico,Other Casino/Card Club,1
4,ACH,New Mexico,Tribal Authorized Casino,1


In [9]:
activity['Suspicious Activity'].value_counts()

Transaction(s) Below CTR Threshold                           14
Refused or Avoided Request for Documentation                 13
Other Structuring                                            13
Two or More Individuals Working Together                     13
Other Other Suspicious Activities                            13
                                                             ..
Market Manipulation/Wash Trading                              1
Proceeds Sent to or Received from Unrelated Third Party       1
Excessive or Unusal Cash Borrowing against Policy/Annuity     1
Business Loan                                                 1
Other Cyber Event                                             1
Name: Suspicious Activity, Length: 84, dtype: int64

In [10]:
# activity['Suspicious Activity'] = activity['Suspicious Activity'].replace(
#     {'Other Other Suspicious Activities': 'Diff Suspicious Activities'})

In [11]:
# activity['Suspicious Activity'].value_counts()

In [12]:
activity = pd.get_dummies(activity, columns=['State', 'Suspicious Activity'])
activity.head()

Unnamed: 0,Industry,Count,State_Arizona,State_California,State_Colorado,State_Nevada,State_New Mexico,State_Utah,Suspicious Activity_ACH,Suspicious Activity_Account Takeover,...,Suspicious Activity_Transaction Out of Pattern for Customer(s),"Suspicious Activity_Transaction with No Apparent Economic, Business, or Lawful Purpose",Suspicious Activity_Transaction(s) Below BSA Recordkeeping Threshold,Suspicious Activity_Transaction(s) Below CTR Threshold,Suspicious Activity_Transaction(s) Involving Foreign High Risk Jurisdiction,Suspicious Activity_Two or More Individuals Working Together,Suspicious Activity_Unauthorized Electronic Intrusion,Suspicious Activity_Unknown Source of Chips,Suspicious Activity_Unlicensed or Unregistered MSB,Suspicious Activity_Wire
0,Card Club,2,0,1,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,Tribal Authorized Casino,30,0,1,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,State Licensed Casino,12,0,0,0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,Other Casino/Card Club,1,0,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,Tribal Authorized Casino,1,0,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0


# Data Pre-Processing

In [13]:
X = activity.drop(columns = ['Industry'])
y = activity['Industry'].values.reshape(-1, 1)
print(X.shape, y.shape)

(555, 91) (555, 1)


In [14]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [15]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [16]:
# y_test

In [17]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [18]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [19]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

# Create a Deep Learning Model

In [20]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [21]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=600, activation='relu', input_dim=91))
model.add(Dense(units=200, activation='relu'))
model.add(Dense(units=4, activation='softmax'))

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [22]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [23]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 600)               55200     
_________________________________________________________________
dense_1 (Dense)              (None, 200)               120200    
_________________________________________________________________
dense_2 (Dense)              (None, 4)                 804       
Total params: 176,204
Trainable params: 176,204
Non-trainable params: 0
_________________________________________________________________


In [24]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2
)

Epoch 1/60
416/416 - 0s - loss: 1.2108 - acc: 0.4832
Epoch 2/60
416/416 - 0s - loss: 1.0145 - acc: 0.5625
Epoch 3/60
416/416 - 0s - loss: 0.9397 - acc: 0.5817
Epoch 4/60
416/416 - 0s - loss: 0.8953 - acc: 0.5913
Epoch 5/60
416/416 - 0s - loss: 0.8565 - acc: 0.5889
Epoch 6/60
416/416 - 0s - loss: 0.8192 - acc: 0.6130
Epoch 7/60
416/416 - 0s - loss: 0.7934 - acc: 0.5889
Epoch 8/60
416/416 - 0s - loss: 0.7665 - acc: 0.6106
Epoch 9/60
416/416 - 0s - loss: 0.7337 - acc: 0.6226
Epoch 10/60
416/416 - 0s - loss: 0.7096 - acc: 0.6130
Epoch 11/60
416/416 - 0s - loss: 0.6817 - acc: 0.6346
Epoch 12/60
416/416 - 0s - loss: 0.6582 - acc: 0.6202
Epoch 13/60
416/416 - 0s - loss: 0.6487 - acc: 0.6106
Epoch 14/60
416/416 - 0s - loss: 0.6311 - acc: 0.6370
Epoch 15/60
416/416 - 0s - loss: 0.6092 - acc: 0.6611
Epoch 16/60
416/416 - 0s - loss: 0.6023 - acc: 0.6562
Epoch 17/60
416/416 - 0s - loss: 0.5924 - acc: 0.6178
Epoch 18/60
416/416 - 0s - loss: 0.5702 - acc: 0.6418
Epoch 19/60
416/416 - 0s - loss: 0.56

<tensorflow.python.keras.callbacks.History at 0x1a44716150>

# Quantify our Trained Model

In [25]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=1)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Normal Neural Network - Loss: 3.3148674278808157, Accuracy: 0.2733812928199768


# Make Predictions

In [26]:
encoded_predictions = model.predict_classes(X_test_scaled[:5])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [27]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:4])}")

Predicted classes: [' Card Club' ' State Licensed Casino' ' Tribal Authorized Casino'
 ' Card Club' ' Other Casino/Card Club']
Actual Labels: [array([' State Licensed Casino'], dtype=object), array([' Tribal Authorized Casino'], dtype=object), array([' Card Club'], dtype=object), array([' Tribal Authorized Casino'], dtype=object)]
