In [431]:
# Initial imports
import pandas as pd
import datetime
from path import Path
from sklearn import tree
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import joblib
import pickle
from sklearn.ensemble import RandomForestClassifier

In [432]:
# Load the arestee.csv dataset.
arestee_df = pd.read_csv("IL/NIBRS_ARRESTEE.csv", parse_dates=['ARREST_DATE'])

arestee_df
arestee_df.info()
arestee_df.describe()
arestee_df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4140 entries, 0 to 4139
Data columns (total 18 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   DATA_YEAR                  4140 non-null   int64         
 1   ARRESTEE_ID                4140 non-null   int64         
 2   INCIDENT_ID                4140 non-null   int64         
 3   ARRESTEE_SEQ_NUM           4140 non-null   int64         
 4   ARREST_DATE                4140 non-null   datetime64[ns]
 5   ARREST_TYPE_ID             4140 non-null   int64         
 6   MULTIPLE_INDICATOR         4140 non-null   object        
 7   OFFENSE_TYPE_ID            4140 non-null   int64         
 8   AGE_ID                     4140 non-null   int64         
 9   AGE_NUM                    4140 non-null   int64         
 10  SEX_CODE                   4140 non-null   object        
 11  RACE_ID                    4140 non-null   int64         
 12  ETHNIC

Unnamed: 0,DATA_YEAR,ARRESTEE_ID,INCIDENT_ID,ARRESTEE_SEQ_NUM,ARREST_DATE,ARREST_TYPE_ID,MULTIPLE_INDICATOR,OFFENSE_TYPE_ID,AGE_ID,AGE_NUM,SEX_CODE,RACE_ID,ETHNICITY_ID,RESIDENT_CODE,UNDER_18_DISPOSITION_CODE,CLEARANCE_IND,AGE_RANGE_LOW_NUM,AGE_RANGE_HIGH_NUM
0,2019,33189213,111310423,1,2019-03-01,2,N,16,5,13,F,2,2,R,R,,13,0
1,2019,33182447,111310447,1,2019-03-05,2,N,16,5,29,F,1,2,R,,,29,0
2,2019,33182461,111317969,1,2019-03-08,1,N,16,5,14,M,2,2,R,R,,14,0
3,2019,33189268,111310543,1,2019-03-18,1,N,5,5,31,F,1,2,R,,,31,0
4,2019,33182483,111310559,1,2019-03-19,3,N,16,5,44,M,2,2,N,,,44,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4135,2019,36247339,118888164,1,2019-10-16,3,N,51,5,55,F,2,2,R,,,55,0
4136,2019,36255107,118879914,1,2019-10-08,3,N,48,5,39,M,2,2,N,,,39,0
4137,2019,36255122,118879927,1,2019-10-08,3,N,27,5,50,M,2,2,R,,,50,0
4138,2019,36247354,118879933,1,2019-05-21,3,N,27,5,29,M,2,2,R,,,29,0


In [433]:
# months = arestee_df['ARREST_DATE'].map(lambda x: x.split('-')[1]).value_counts()
# months

In [434]:
# arestee_df['ARREST_DATE'] = pd.to_datetime(arestee_df['ARREST_DATE'])
# arestee_df.info()
# arestee_df.head()
# arrest_date = arestee_df['ARREST_DATE']
# arrest_date

In [435]:
# Create function to find which season the arrest took place
def get_season(date):
    if date > '2019-01-21' and date < '2019-03-19':
        return "winter"
    elif date > '2019-03-20' and date < '2019-06-20':
        return "spring"
    elif date > '2019-06-21' and date < '2019-09-21':
        return 'summer'
    elif date > '2019-09-21' and date < '2019-12-20':     
        return 'autumn'
    else:
        return "winter"


In [436]:
# target but encode it df column arrest season
season = arestee_df["ARREST_DATE"].map(str).map(get_season)

In [437]:
arestee_df.insert(loc= 5, column='SEASON', value = season)

In [438]:
# Calculate number of crimes per season.
seasons_crimes= arestee_df.groupby('SEASON')['ARRESTEE_SEQ_NUM'].agg(TOTAL_INCIDENT='sum')
seasons_crimes

Unnamed: 0_level_0,TOTAL_INCIDENT
SEASON,Unnamed: 1_level_1
autumn,1181
spring,1159
summer,1754
winter,1042


In [439]:
# Extract the season with the highest total of incidents.
high_crime_season = seasons_crimes[seasons_crimes.TOTAL_INCIDENT == seasons_crimes.TOTAL_INCIDENT.max()]
high_crime_season

Unnamed: 0_level_0,TOTAL_INCIDENT
SEASON,Unnamed: 1_level_1
summer,1754


In [440]:
# Load the arrest_type.csv dataset.
arrest_types_df = pd.read_csv("IL/NIBRS_ARREST_TYPE.csv")
arrest_types_df

Unnamed: 0,ARREST_TYPE_ID,ARREST_TYPE_CODE,ARREST_TYPE_NAME
0,1,O,On View
1,2,S,Summoned / Cited
2,3,T,Taken INTO Custody


In [441]:
# Load the offense_type dataset.
offense_types_df = pd.read_csv("IL/NIBRS_OFFENSE_TYPE.csv")
offense_types_df

Unnamed: 0,OFFENSE_TYPE_ID,OFFENSE_CODE,OFFENSE_NAME,CRIME_AGAINST,CT_FLAG,HC_FLAG,HC_CODE,OFFENSE_CATEGORY_NAME,OFFENSE_GROUP
0,58,23*,Not Specified,Property,N,Y,6.0,Larceny/Theft Offenses,A
1,1,09C,Justifiable Homicide,Person,N,N,,Homicide Offenses,A
2,2,26A,False Pretenses/Swindle/Confidence Game,Property,Y,Y,,Fraud Offenses,A
3,3,36B,Statutory Rape,Person,N,Y,,"Sex Offenses, Non-forcible",A
4,4,11C,Sexual Assault With An Object,Person,N,Y,2.0,Sex Offenses,A
...,...,...,...,...,...,...,...,...,...
81,68,26H,Money Laundering,Society,N,N,,Other Offenses,A
82,36,11A,Rape,Person,N,Y,2.0,Sex Offenses,A
83,50,23E,Theft From Coin-Operated Machine or Device,Property,N,N,,Larceny/Theft Offenses,A
84,10,90D,Driving Under the Influence,Society,N,N,,Driving Under the Influence,B


In [447]:
# Load the offense.csv dataset.
offenses_df = pd.read_csv("IL/NIBRS_OFFENSE.csv")
offenses_df

Unnamed: 0,DATA_YEAR,OFFENSE_ID,INCIDENT_ID,OFFENSE_TYPE_ID,ATTEMPT_COMPLETE_FLAG,LOCATION_ID,NUM_PREMISES_ENTERED,METHOD_ENTRY_CODE
0,2019,138494615,113389118,51,C,20,,
1,2019,138500320,113383842,39,C,20,,
2,2019,138500319,113383842,16,C,20,,
3,2019,138500318,113383842,28,C,20,,
4,2019,138500335,113383859,45,A,45,,
...,...,...,...,...,...,...,...,...
15320,2019,140568489,115745570,5,C,20,,
15321,2019,140568502,115745579,51,C,20,,
15322,2019,140561140,115745588,56,C,20,,
15323,2019,140561141,115738995,51,C,20,,


In [443]:
# Load the location_type.csv dataset.
location_df = pd.read_csv("IL/NIBRS_LOCATION_TYPE.csv")
location_df.drop('LOCATION_CODE', axis=1, inplace=True)
location_df

Unnamed: 0,LOCATION_ID,LOCATION_NAME
0,1,Air/Bus/Train Terminal
1,2,Bank/Savings and Loan
2,3,Bar/Nightclub
3,4,Church/Synagogue/Temple/Mosque
4,5,Commercial/Office Building
5,6,Construction Site
6,7,Convenience Store
7,8,Department/Discount Store
8,10,Field/Woods
9,11,Government/Public Building


In [444]:
joined_df = arestee_df.merge(arrest_types_df, on ="ARREST_TYPE_ID")
joined_df

Unnamed: 0,DATA_YEAR,ARRESTEE_ID,INCIDENT_ID,ARRESTEE_SEQ_NUM,ARREST_DATE,SEASON,ARREST_TYPE_ID,MULTIPLE_INDICATOR,OFFENSE_TYPE_ID,AGE_ID,...,SEX_CODE,RACE_ID,ETHNICITY_ID,RESIDENT_CODE,UNDER_18_DISPOSITION_CODE,CLEARANCE_IND,AGE_RANGE_LOW_NUM,AGE_RANGE_HIGH_NUM,ARREST_TYPE_CODE,ARREST_TYPE_NAME
0,2019,33189213,111310423,1,2019-03-01,winter,2,N,16,5,...,F,2,2,R,R,,13,0,S,Summoned / Cited
1,2019,33182447,111310447,1,2019-03-05,winter,2,N,16,5,...,F,1,2,R,,,29,0,S,Summoned / Cited
2,2019,33189271,111310549,2,2019-03-18,winter,2,N,16,5,...,M,2,2,R,,,30,0,S,Summoned / Cited
3,2019,33182336,111310215,1,2019-02-12,winter,2,N,23,5,...,F,1,2,R,,,32,0,S,Summoned / Cited
4,2019,31971399,107364860,1,2019-01-21,winter,2,N,23,5,...,F,1,2,N,,,35,0,S,Summoned / Cited
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4135,2019,36247339,118888164,1,2019-10-16,autumn,3,N,51,5,...,F,2,2,R,,,55,0,T,Taken INTO Custody
4136,2019,36255107,118879914,1,2019-10-08,autumn,3,N,48,5,...,M,2,2,N,,,39,0,T,Taken INTO Custody
4137,2019,36255122,118879927,1,2019-10-08,autumn,3,N,27,5,...,M,2,2,R,,,50,0,T,Taken INTO Custody
4138,2019,36247354,118879933,1,2019-05-21,spring,3,N,27,5,...,M,2,2,R,,,29,0,T,Taken INTO Custody


In [445]:
joined_df2 = joined_df.merge(offense_types_df, on ="OFFENSE_TYPE_ID")
joined_df2

Unnamed: 0,DATA_YEAR,ARRESTEE_ID,INCIDENT_ID,ARRESTEE_SEQ_NUM,ARREST_DATE,SEASON,ARREST_TYPE_ID,MULTIPLE_INDICATOR,OFFENSE_TYPE_ID,AGE_ID,...,ARREST_TYPE_CODE,ARREST_TYPE_NAME,OFFENSE_CODE,OFFENSE_NAME,CRIME_AGAINST,CT_FLAG,HC_FLAG,HC_CODE,OFFENSE_CATEGORY_NAME,OFFENSE_GROUP
0,2019,33189213,111310423,1,2019-03-01,winter,2,N,16,5,...,S,Summoned / Cited,35A,Drug/Narcotic Violations,Society,N,Y,,Drug/Narcotic Offenses,A
1,2019,33182447,111310447,1,2019-03-05,winter,2,N,16,5,...,S,Summoned / Cited,35A,Drug/Narcotic Violations,Society,N,Y,,Drug/Narcotic Offenses,A
2,2019,33189271,111310549,2,2019-03-18,winter,2,N,16,5,...,S,Summoned / Cited,35A,Drug/Narcotic Violations,Society,N,Y,,Drug/Narcotic Offenses,A
3,2019,31975212,107368302,2,2019-01-11,winter,2,N,16,5,...,S,Summoned / Cited,35A,Drug/Narcotic Violations,Society,N,Y,,Drug/Narcotic Offenses,A
4,2019,31975196,107368239,1,2019-01-06,winter,2,N,16,5,...,S,Summoned / Cited,35A,Drug/Narcotic Violations,Society,N,Y,,Drug/Narcotic Offenses,A
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4135,2019,37098216,121196510,3,2019-08-21,summer,3,N,30,5,...,T,Taken INTO Custody,40A,Prostitution,Society,N,Y,,Prostitution Offenses,A
4136,2019,37098227,121196510,23,2019-08-28,summer,3,N,30,5,...,T,Taken INTO Custody,40A,Prostitution,Society,N,Y,,Prostitution Offenses,A
4137,2019,37098230,121196510,26,2019-09-02,summer,3,N,30,5,...,T,Taken INTO Custody,40A,Prostitution,Society,N,Y,,Prostitution Offenses,A
4138,2019,34055873,113383897,1,2019-01-22,winter,3,N,38,5,...,T,Taken INTO Custody,09B,Negligent Manslaughter,Person,N,Y,,Homicide Offenses,A


In [448]:
joined_df3 = joined_df2.merge(offenses_df, on ="INCIDENT_ID")
joined_df3

Unnamed: 0,DATA_YEAR_x,ARRESTEE_ID,INCIDENT_ID,ARRESTEE_SEQ_NUM,ARREST_DATE,SEASON,ARREST_TYPE_ID,MULTIPLE_INDICATOR,OFFENSE_TYPE_ID_x,AGE_ID,...,HC_CODE,OFFENSE_CATEGORY_NAME,OFFENSE_GROUP,DATA_YEAR_y,OFFENSE_ID,OFFENSE_TYPE_ID_y,ATTEMPT_COMPLETE_FLAG,LOCATION_ID,NUM_PREMISES_ENTERED,METHOD_ENTRY_CODE
0,2019,33189213,111310423,1,2019-03-01,winter,2,N,16,5,...,,Drug/Narcotic Offenses,A,2019,136132003,16,C,41,,
1,2019,33182447,111310447,1,2019-03-05,winter,2,N,16,5,...,,Drug/Narcotic Offenses,A,2019,136125409,35,C,13,,
2,2019,33182447,111310447,1,2019-03-05,winter,2,N,16,5,...,,Drug/Narcotic Offenses,A,2019,136132061,16,C,13,,
3,2019,33189271,111310549,2,2019-03-18,winter,2,N,16,5,...,,Drug/Narcotic Offenses,A,2019,136130811,16,C,13,,
4,2019,33182481,111310549,1,2019-03-18,winter,1,N,16,5,...,,Drug/Narcotic Offenses,A,2019,136130811,16,C,13,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5431,2019,37098240,121201572,1,2019-12-19,autumn,3,N,43,5,...,2.0,Sex Offenses,A,2019,146744765,8,C,20,,
5432,2019,37098240,121201572,1,2019-12-19,autumn,3,N,43,5,...,2.0,Sex Offenses,A,2019,146744764,43,C,20,,
5433,2019,34055873,113383897,1,2019-01-22,winter,3,N,38,5,...,,Homicide Offenses,A,2019,138485498,38,C,13,,
5434,2019,37604623,122506312,1,2020-01-10,winter,3,N,7,5,...,,Larceny/Theft Offenses,A,2019,148246444,7,C,18,,


In [449]:
joined_df3.columns.to_list()

['DATA_YEAR_x',
 'ARRESTEE_ID',
 'INCIDENT_ID',
 'ARRESTEE_SEQ_NUM',
 'ARREST_DATE',
 'SEASON',
 'ARREST_TYPE_ID',
 'MULTIPLE_INDICATOR',
 'OFFENSE_TYPE_ID_x',
 'AGE_ID',
 'AGE_NUM',
 'SEX_CODE',
 'RACE_ID',
 'ETHNICITY_ID',
 'RESIDENT_CODE',
 'UNDER_18_DISPOSITION_CODE',
 'CLEARANCE_IND',
 'AGE_RANGE_LOW_NUM',
 'AGE_RANGE_HIGH_NUM',
 'ARREST_TYPE_CODE',
 'ARREST_TYPE_NAME',
 'OFFENSE_CODE',
 'OFFENSE_NAME',
 'CRIME_AGAINST',
 'CT_FLAG',
 'HC_FLAG',
 'HC_CODE',
 'OFFENSE_CATEGORY_NAME',
 'OFFENSE_GROUP',
 'DATA_YEAR_y',
 'OFFENSE_ID',
 'OFFENSE_TYPE_ID_y',
 'ATTEMPT_COMPLETE_FLAG',
 'LOCATION_ID',
 'NUM_PREMISES_ENTERED',
 'METHOD_ENTRY_CODE']

In [450]:
joined_df4 = joined_df3.merge(location_df, on ="LOCATION_ID")
joined_df4

Unnamed: 0,DATA_YEAR_x,ARRESTEE_ID,INCIDENT_ID,ARRESTEE_SEQ_NUM,ARREST_DATE,SEASON,ARREST_TYPE_ID,MULTIPLE_INDICATOR,OFFENSE_TYPE_ID_x,AGE_ID,...,OFFENSE_CATEGORY_NAME,OFFENSE_GROUP,DATA_YEAR_y,OFFENSE_ID,OFFENSE_TYPE_ID_y,ATTEMPT_COMPLETE_FLAG,LOCATION_ID,NUM_PREMISES_ENTERED,METHOD_ENTRY_CODE,LOCATION_NAME
0,2019,33189213,111310423,1,2019-03-01,winter,2,N,16,5,...,Drug/Narcotic Offenses,A,2019,136132003,16,C,41,,,School-Elementary/Secondary
1,2019,33184419,111310437,1,2019-03-04,winter,2,N,16,5,...,Drug/Narcotic Offenses,A,2019,136125400,16,C,41,,,School-Elementary/Secondary
2,2019,33189238,111310463,1,2019-03-08,winter,2,N,16,5,...,Drug/Narcotic Offenses,A,2019,136132091,16,C,41,,,School-Elementary/Secondary
3,2019,33182460,111317968,1,2019-03-08,winter,2,N,16,5,...,Drug/Narcotic Offenses,A,2019,136132099,16,C,41,,,School-Elementary/Secondary
4,2019,33189275,111310567,1,2019-03-20,spring,2,N,16,5,...,Drug/Narcotic Offenses,A,2019,136132224,16,C,41,,,School-Elementary/Secondary
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5431,2019,34055863,113389144,1,2019-02-11,winter,1,M,49,5,...,Burglary/Breaking & Entering,A,2019,138494656,5,C,35,,,Gambling Facility/Casino/Race Track
5432,2019,34059542,113385084,1,2019-05-08,spring,3,N,49,5,...,Burglary/Breaking & Entering,A,2019,138500445,5,C,35,,,Gambling Facility/Casino/Race Track
5433,2019,34059542,113385084,1,2019-05-08,spring,3,N,49,5,...,Burglary/Breaking & Entering,A,2019,138500444,49,C,35,,F,Gambling Facility/Casino/Race Track
5434,2019,34059558,113397130,1,2019-05-08,spring,3,N,49,5,...,Burglary/Breaking & Entering,A,2019,138500461,49,C,35,,F,Gambling Facility/Casino/Race Track


In [451]:
joined_df4.to_csv('mock_df.csv', index=False)

In [340]:
#seasons_encoded = pd.get_dummies(get_season, columns=["season_data"])
#seasons_encoded

In [341]:
# Define the features set
X = df.drop(["ARREST_DATE", "CLEARANCE_IND", "season"], axis=1).fillna(0)

# Display the features set
X

y = df["season"]

NameError: name 'df' is not defined

In [None]:
#X["CLEARANCE_IND"].value_counts()


In [None]:
# # Define the target set
# y = seasons.values
# #y = df["ARREST_DATE"].values
# y[:5]

In [None]:
X

In [None]:
#list(X[col].values)

In [None]:
col_to_be_encoded = ["MULTIPLE_INDICATOR", "SEX_CODE", "RESIDENT_CODE", "UNDER_18_DISPOSITION_CODE"]
for col in col_to_be_encoded:
    le = preprocessing.LabelEncoder()
    X[col] = le.fit_transform(list(X[col].values))
    pickle.dump(le, open(f"{col}.pkl","wb"))

X

In [None]:
le = preprocessing.LabelEncoder()
y_encoded = le.fit_transform(list(y.values))
pickle.dump(le, open(f"season.pkl","wb"))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, random_state=78)

In [None]:
# Determine the shape of our training and testing sets.
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
list(y_train)

In [None]:
clf = RandomForestClassifier()
clf.fit(X, y_encoded)

In [None]:
predictions = clf.predict(X_test)

In [None]:
# Calculating the accuracy score.
acc_score = accuracy_score(y_test, predictions)
acc_score

In [None]:
joblib.dump(clf, "trained_random_forest.joblib")

In [None]:
import pandas as pd
offense_type_df = pd.read_csv("IL/NIBRS_OFFENSE_TYPE.csv")
offense_type_df

In [None]:
offenses_df = 

In [None]:
months = {"JAN" : "January", "FEB" : "February", "MAR" : "March", "APR" : "April", "MAY" : "May", "JUN" : "June", "JUL" : "July", "AUG" : "August", "SEP" : "September", "OCT" : "October", "NOV" : "November", "DEC" : "December"}
months["JAN"]

In [None]:
months_1 = {"JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"}
for i in months_1:
    datetime.datetime.strptime(i,'%b').strftime('%B')
print(months_1)

In [None]:
for i in df['ARREST_DATE']:
    datetime.datetime.strptime('i','%b').strftime('%B')

In [None]:
april = datetime.datetime.strptime('APR','%b').strftime('%B')
april

In [None]:
from datetime import date, datetime

Y = 2019 

seasons = [('winter', (date(Y,  1,  1),  date(Y,  3, 20))),
           ('spring', (date(Y,  3, 21),  date(Y,  6, 20))),
           ('summer', (date(Y,  6, 21),  date(Y,  9, 22))),
           ('autumn', (date(Y,  9, 23),  date(Y, 12, 20))),
           ('winter', (date(Y, 12, 21),  date(Y, 12, 31)))]

def get_season(now):
    if isinstance(now, datetime):
        now = now.date()
    now = now.replace(year=Y)
    return next(season for season, (start, end) in seasons
                if start <= now <= end)

# print(get_season(date.today()))
#for i in seasons:
#    print(i);
    

In [None]:
import datetime
d = datetime.date(2019, 7 , 24)
print(d)

In [None]:
today = datetime.date.today()
print(today)

In [None]:
birthday = datetime.date(1993, 8, 1)
print(birthday)

In [None]:
days_since_birth = (today - birthday).days
print(days_since_birth)

In [None]:
tdelta = datetime.timedelta(days=10)
print(today - tdelta)

In [None]:
print(today.month)

In [None]:
print(today.day)

In [None]:
print(today.weekday())

In [None]:
print(datetime.time(7, 2, 20, 15))

In [None]:
# Add 10 hours to current day 
hour_delta = datetime.timedelta(hours=10)
print(datetime.datetime.now() + hour_delta)

In [None]:
datetime_today = datetime.datetime.now(tz=pytz.UTC)
datetime_pacific = datetime_today.astimezone(pytz.timezone('US/Pacific'))
print(datetime_pacific)
for tz in pytz.all_timezones:
    print(tz)

In [None]:
# string formatting with dates
# 2019-03-09 -> March 3, 2019
print(datetime_pacific.strftime('%B %d %Y'))