In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import FunctionTransformer
import datetime
from numpy import array

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression

In [3]:
from tensorflow.keras.layers import Dense, LSTM, Dropout, Bidirectional
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from prophet import Prophet

In [4]:
def sin_transformer(period):
	return FunctionTransformer(lambda x: np.sin(x / period * 2 * np.pi))

def cos_transformer(period):
	return FunctionTransformer(lambda x: np.cos(x / period * 2 * np.pi))

In [5]:
police_shooting = pd.read_csv('US Police shootings in from 2015-22_Clean.csv')
police_shooting['date'] = pd.to_datetime(police_shooting['date'])
us_population_by_state = pd.read_csv('nst-est2021-popchg2010-2022.csv')

In [6]:
available_states = police_shooting['state'].unique()
available_races = police_shooting['race'].unique()
available_arms = police_shooting['armed'].unique()
available_flee = police_shooting['flee'].unique()

In [7]:
police_shooting

Unnamed: 0,date,armed,age,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,longitude,latitude
0,2015-01-02,gun,53.0,American Native,Shelton,Washington,True,attack,Not fleeing,False,-123.122,47.247
1,2015-01-02,gun,47.0,White,Aloha,Oregon,False,attack,Not fleeing,False,-122.892,45.487
2,2015-01-03,unarmed,23.0,Hispanic,Wichita,Kansas,False,other,Not fleeing,False,-97.281,37.695
3,2015-01-04,toy gun,32.0,White,San Francisco,California,True,attack,Not fleeing,False,-122.422,37.763
4,2015-01-04,miscellaneous lethal weapons,39.0,Hispanic,Evans,Colorado,False,attack,Not fleeing,False,-104.692,40.384
...,...,...,...,...,...,...,...,...,...,...,...,...
7724,2022-09-07,sharp object,35.0,NG,Flagstaff,Arizona,False,other,Foot,False,-111.584,35.224
7725,2022-09-07,gun,35.0,White,Springville,Alabama,False,attack,Car,False,-86.472,33.775
7726,2022-09-07,knife,61.0,Black,St. Louis,Missouri,False,other,Not fleeing,False,-90.290,38.668
7727,2022-09-07,gun,27.0,NG,Washington County,Tennessee,False,attack,Other,False,-82.539,36.309


In [8]:
def filter_data(df: pd.DataFrame, state: str = None, race: str= None, armed: str = None, flee: str = None):    
    columns: list[str] = ['date']
    query: list[str] = []
    
    if state:
        columns.append('state')
        query.append(f'state == "{state}"')
    if race:
        columns.append('race')
        query.append(f'race == "{race}"')
    if armed:
        columns.append('armed')
        query.append(f'armed == "{armed}"')
    if flee:
        columns.append('flee')
        query.append(f'flee == "{flee}"')
    
    df = df[columns].reset_index(drop=True)
    df = df.query(' and '.join(query))
    return df

In [9]:
def resample(df: pd.DataFrame, sample: str):
    df = df.set_index('date')
    
    df1 = df.resample(sample).agg({ df.columns[0]: 'size' })
    df1 = df1.rename({df.columns[0]: 'count'}, axis=1)
    # for index, column in enumerate(df.columns):
    #     df1.insert(index, column, df[column].iloc[0])
    return df1

In [10]:
def extract_date_feature(date: datetime.datetime):
    return sin_transformer(12).fit_transform(date.month), cos_transformer(12).fit_transform(date.month), sin_transformer(365).fit_transform(date.timetuple().tm_yday), cos_transformer(12).fit_transform(date.timetuple().tm_yday)

In [11]:
def extract_date_feature_from_df(df: pd.DataFrame):
    df.insert(0, "month", df.index.month)
    df.insert(1, "day_of_year", df.index.day_of_year)
    
    df.insert(2, "month_sin", sin_transformer(12).fit_transform(df["month"]))
    df.insert(3, "month_cos", cos_transformer(12).fit_transform(df["month"]))

    df.insert(4, "day_sin", sin_transformer(365).fit_transform(df["day_of_year"]))
    df.insert(5, "day_cos", cos_transformer(365).fit_transform(df["day_of_year"]))
    
    df.drop(columns=['month', 'day_of_year'], inplace=True)
    
    return df

In [55]:
def predict_trend(state: str = None, race: str = None, armed: str = None, flee: str = None):
    dataset = resample(filter_data(police_shooting, state=state, race=race, armed=armed, flee=flee), 'W')
    dataset = dataset.rename({'count': 'y'}, axis=1)
    dataset['y'] = dataset['y'].astype(int)
    
    dataset = dataset.reset_index()
    
    dataset = dataset.rename({'date': 'ds', 'y': 'y'}, axis=1)
    
    m = Prophet()
    m.fit(dataset)
    
    future = m.make_future_dataframe(periods=30, freq='W')
    future.tail()
    
    forecast = m.predict(future)
    
    # m.plot(forecast)
    
    # m.plot_components(forecast)
    
    return dataset, m, forecast

In [29]:
available_states

array(['Washington', 'Oregon', 'Kansas', 'California', 'Colorado',
       'Oklahoma', 'Arizona', 'Iowa', 'Pennsylvania', 'Texas', 'Ohio',
       'Louisiana', 'Montana', 'Utah', 'Arkansas', 'Illinois', 'Nevada',
       'New Mexico', 'Minnesota', 'Missouri', 'Virginia', 'New Jersey',
       'Indiana', 'Kentucky', 'Massachusetts', 'New Hampshire', 'Florida',
       'Idaho', 'Maryland', 'Nebraska', 'Michigan', 'Georgia',
       'Tennessee', 'North Carolina', 'Alaska', 'New York', 'Maine',
       'Alabama', 'Mississippi', 'Wisconsin', 'South Carolina',
       'Delaware', 'District of Columbia', 'West Virginia', 'Hawaii',
       'Wyoming', 'North Dakota', 'Connecticut', 'South Dakota',
       'Vermont', 'Rhode Island'], dtype=object)

In [30]:
available_races

array(['American Native', 'White', 'Hispanic', 'Black', 'NG',
       'Not Hispanic'], dtype=object)

In [31]:
available_arms = np.delete(available_arms, np.where(available_arms == 'claimed to be armed'))

In [32]:
available_flee

array(['Not fleeing', 'Car', 'Foot', 'Other', 'NG'], dtype=object)

In [56]:
def preidct(column_type: str):
    avilable_types: list[str] = []
    
    if column_type == 'state':
        avilable_types = available_states
    elif column_type == 'race':
        avilable_types = available_races
    elif column_type == 'armed':
        avilable_types = available_arms
    elif column_type == 'flee':
        avilable_types = available_flee
    
    predict_ds = pd.DataFrame([], columns=['date', column_type, 'shooting count'])
    for avilable_type in avilable_types:
        function_parameter = { column_type: avilable_type }
        org_dataset, m, forecast = predict_trend(**function_parameter)
        forecast[column_type] = avilable_type
        forecast = forecast.rename({ 'ds': 'date', 'yhat': 'shooting count' }, axis=1)
        predict_ds = pd.concat([predict_ds, forecast[['date', column_type, 'shooting count']]], axis=0)
    predict_ds.to_csv(f'{column_type}-predict.csv', index=False)

In [60]:
preidct(column_type = 'flee')

03:39:12 - cmdstanpy - INFO - Chain [1] start processing
03:39:12 - cmdstanpy - INFO - Chain [1] done processing
03:39:13 - cmdstanpy - INFO - Chain [1] start processing
03:39:13 - cmdstanpy - INFO - Chain [1] done processing
03:39:13 - cmdstanpy - INFO - Chain [1] start processing
03:39:13 - cmdstanpy - INFO - Chain [1] done processing
03:39:14 - cmdstanpy - INFO - Chain [1] start processing
03:39:14 - cmdstanpy - INFO - Chain [1] done processing
03:39:14 - cmdstanpy - INFO - Chain [1] start processing
03:39:14 - cmdstanpy - INFO - Chain [1] done processing
