In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

# Loading the Data and Pre-Processing it.

In [None]:
data = pd.read_csv(r'fatal-police-shootings-data.csv')

In [None]:
data.head()

In [None]:
data.describe()

In [None]:
data.drop(['id','name','longitude','latitude','is_geocoding_exact'],axis=1,inplace=True)

In [None]:
data.isnull().sum()

In [None]:
data.shape

# Handling missing value

In [None]:
#Note we are filling the values and not dropping because out of 6214, 90% threshold meets. i.e no data more 608 are missing.
# Age can be filled with mean
data['age'] = data['age'].fillna(data['age'].mean())

#Gender can be filled with mode i.e repetitive values.
data['gender'] = data['gender'].fillna(data['gender'].mode().iloc[0])

#Race can be filled with mode too
data['race'] = data['race'].fillna(data['race'].mode().iloc[0])

#Flee can be filled with mode too
data['flee'] = data['flee'].fillna(data['flee'].mode().iloc[0])

#Armed can be filled with mode too
data['armed'] = data['armed'].fillna(data['armed'].mode().iloc[0])

In [None]:
data.shape

In [None]:
data.dropna(inplace=True)

In [None]:
data.isnull().sum()

In [None]:
data.info()

In [None]:
data.shape

In [None]:
data.head()

In [None]:
data.head()

In [None]:
data.manner_of_death.unique()

In [None]:
data.armed.unique()

In [None]:
data.threat_level.unique()

# Exploratory Data Analysis

In [None]:
data.threat_level.value_counts().plot('bar')

In [None]:
data.manner_of_death.value_counts().plot('barh')

In [None]:
# Do all necessary Visualization as you want

# Preprocessing the data and Scaling

In [None]:
data.head()

In [None]:
label_encoder = preprocessing.LabelEncoder()

In [None]:
df = data

In [None]:
df.head(2)

In [None]:
#Converting str labels into numeric categories
df['manner_of_death'] = label_encoder.fit_transform(df['manner_of_death'])
df['armed'] = label_encoder.fit_transform(df['armed'])
df['gender'] = label_encoder.fit_transform(df['gender'])
df['race'] = label_encoder.fit_transform(df['race'])
df['city'] = label_encoder.fit_transform(df['city'])
df['state'] = label_encoder.fit_transform(df['state'])
df['signs_of_mental_illness'] = label_encoder.fit_transform(df['signs_of_mental_illness'])
df['threat_level'] = label_encoder.fit_transform(df['threat_level'])
df['flee'] = label_encoder.fit_transform(df['flee'])



In [None]:
df.drop('body_camera',inplace=True,axis=1)

In [None]:
df.drop('date',inplace=True,axis=1)

In [None]:
df.head()

# Normalizing data between 1 and 0

In [None]:
names = ['manner_of_death','armed','age','gender','race','city','state','signs_of_mental_illness','threat_level','flee']
normalize_data =  preprocessing.normalize(df, axis=0)
scaled_df = pd.DataFrame(normalize_data, columns=names)
scaled_df.head()

# KNN

In [None]:
x = df[['manner_of_death','armed','age','gender','city','state','signs_of_mental_illness','threat_level','flee']].values

In [None]:
y = df['race'].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [None]:
knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

In [None]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

In [None]:
plt.scatter(y_test,y_pred)

# SVM

In [None]:
from sklearn.svm import SVC
# svm_model_linear = SVC(kernel = 'linear', C = 1).fit(X_train, y_train)
# svm_predictions = svm_model_linear.predict(X_test)


In [None]:
svm_model_linear = SVC(kernel = 'linear', C = 1).fit(X_train, y_train)

In [None]:
svm_predictions = svm_model_linear.predict(X_test)
  
# model accuracy for X_test  
accuracy = svm_model_linear.score(X_test, y_test)
  
# creating a confusion matrix
cm = confusion_matrix(y_test, svm_predictions)

# Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB


In [None]:
gnb = GaussianNB().fit(X_train, y_train)
gnb_predictions = gnb.predict(X_test)
  
# accuracy on X_test
accuracy = gnb.score(X_test, y_test)
print(accuracy)
  
# creating a confusion matrix
cm = confusion_matrix(y_test, gnb_predictions)