In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from matplotlib import pyplot as plt
from sklearn import tree

In [None]:
df = pd.read_csv('../Data/cleanedSampleNoMidnight.csv')
df

In [None]:
X = df.drop(['Violation Code'], axis=1)
y = df['Violation Code']

In [None]:
X = pd.get_dummies(X, columns=['Street', 'Day',"Time Range"])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
# Define the hyperparameters to tune
param_grid = {
    'max_depth': [2, 4, 6, 8],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Create a GridSearchCV object and fit it on the training data
grid_search = GridSearchCV(DecisionTreeClassifier(), param_grid=param_grid, cv=5)
grid_search.fit(X_train, y_train)


In [None]:
# Print the best hyperparameters found
print(grid_search.best_params_)

# Make predictions on the testing data using the best model
y_pred = grid_search.best_estimator_.predict(X_test)

# Evaluate the performance of the classifier
accuracy_score(y_test, y_pred)



In [None]:
accs = pd.DataFrame(columns=["Locations/No Locations","Technique","max_depth","min_samples_leaf","min_samples,split","n_estimators","Accuracy Score"])

accs.loc[len(accs)] = ["With Locations","Decision Tree",*grid_search.best_params_.values(),"N/A",accuracy_score(y_test, y_pred)]
accs

In [None]:
fig = plt.figure(figsize=(25,20))
_ = tree.plot_tree(grid_search.best_estimator_,filled=True)

In [None]:
# Define the hyperparameters to tune
param_grid = {
    'n_estimators': [100, 200, 500],
    'max_depth': [2, 4, 6, 8],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}


In [None]:
# Create a GridSearchCV object and fit it on the training data
grid_search2 = GridSearchCV(RandomForestClassifier(), param_grid=param_grid, cv=5)
grid_search2.fit(X_train, y_train)

# Print the best hyperparameters found
print(grid_search2.best_params_)

In [None]:
# Make predictions on the testing data using the best model
y_pred = grid_search2.best_estimator_.predict(X_test)

# Evaluate the performance of the classifier
accuracy_score(y_test, y_pred)

accs.loc[len(accs)] = ["With Locations","Random Forest",*grid_search2.best_params_.values(),accuracy_score(y_test, y_pred)]
accs

In [None]:
rf = RandomForestClassifier(max_depth=8,min_samples_leaf=2,min_samples_split=10, n_estimators=100)
rf.fit(X_train, y_train)# Make predictions on the testing data using the best model


In [None]:
dfnoLoc = df.drop(["Longitude","Latitude"],axis=1)
dfnoLoc

In [None]:
X2 = dfnoLoc.drop(['Violation Code'], axis=1)
y2 = dfnoLoc['Violation Code']
X2 = pd.get_dummies(X2, columns=['Street', 'Day',"Time Range"])
X_train2, X_test2, y_train2, y_test2 = train_test_split(X2, y2, test_size=0.2, random_state=0)

In [None]:
# Define the hyperparameters to tune
param_grid = {
    'max_depth': [2, 4, 6, 8],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Create a GridSearchCV object and fit it on the training data
grid_search3 = GridSearchCV(DecisionTreeClassifier(), param_grid=param_grid, cv=5)
grid_search3.fit(X_train2, y_train2)


In [None]:
# Print the best hyperparameters found
print(grid_search.best_params_)

# Make predictions on the testing data using the best model
y_pred2 = grid_search3.best_estimator_.predict(X_test2)

# Evaluate the performance of the classifier
accuracy_score(y_test2, y_pred2)

In [None]:
accs.loc[len(accs)] = ["Without Locations","Decision Tree",*grid_search3.best_params_.values(),"N/A",accuracy_score(y_test2, y_pred2)]
accs

In [None]:
fig = plt.figure(figsize=(25,20))
_ = tree.plot_tree(grid_search3.best_estimator_,filled=True)

In [None]:
# Define the hyperparameters to tune
param_grid = {
    'n_estimators': [100, 200, 500],
    'max_depth': [2, 4, 6, 8],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}


In [None]:
# Create a GridSearchCV object and fit it on the training data
grid_search4 = GridSearchCV(RandomForestClassifier(), param_grid=param_grid, cv=5)
grid_search4.fit(X_train2, y_train2)

# Print the best hyperparameters found
print(grid_search4.best_params_)

In [None]:
# Make predictions on the testing data using the best model
y_pred2 = grid_search4.best_estimator_.predict(X_test2)

# Evaluate the performance of the classifier
accuracy_score(y_test2, y_pred2)

accs.loc[len(accs)] = ["Without Locations","Random Forest",*grid_search4.best_params_.values(),accuracy_score(y_test2, y_pred2)]
accs

In [None]:
accs.to_csv("../Results/DecTreeandRanForestNoMidnight.csv",index = False)