In [0]:
from numpy.random import seed
seed(42)

In [3]:
from google.colab import files
uploaded = files.upload()

Saving lineartest.csv to lineartest (3).csv
Saving exampleData.csv to exampleData (3).csv


In [4]:
import pandas as pd
import io

df = pd.read_csv(io.StringIO(uploaded['exampleData.csv'].decode('utf-8')))
df.head()

Unnamed: 0,a_age,a_gender,a_race,v_age,v_gender,v_race,crime,weapon
0,20,Male,White,65,Female,Multiple,Justifiable Homicide,Firearm
1,25,Female,Multiple,60,Female,White,Murder and Nonnegligent Manslaughter,Asphyxiation
2,30,Male,Other,55,Male,White,Justifiable Homicide,Pushed or Thrown Out Window
3,35,Female,Asian,50,Female,Black,Negligent Manslaughter,Lethal Cutting Instrument
4,40,Female,Black,45,Male,American Indian,Negligent Manslaughter,Explosives


In [0]:
import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelBinarizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV

# We create the preprocessing pipelines for both numeric and categorical data.
numeric_features = ['a_age', 'v_age']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])
categorical_features = ['a_gender', 'a_race', 'v_gender', 'v_race']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])
label_features = ['crime', 'weapon']
label_transformer = Pipeline(steps=[
    ('lb', LabelBinarizer())])
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])
        #('ys', label_transformer, label_features)])
# Append classifier to preprocessing pipeline.
# Now we have a full prediction pipeline.
clf_weapon = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', LogisticRegression(solver='lbfgs', multi_class='multinomial'))])
clf_crime = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', LogisticRegression(solver='lbfgs', multi_class='multinomial'))])

In [6]:
y1 = df['weapon']
y2 = df['crime']
X = df.drop(['weapon'], axis=1).drop(['crime'], axis=1)
print(X, y1, y2)

   a_age a_gender           a_race  v_age v_gender           v_race
0     20     Male            White     65   Female         Multiple
1     25   Female         Multiple     60   Female            White
2     30     Male            Other     55     Male            White
3     35   Female            Asian     50   Female            Black
4     40   Female            Black     45     Male  American Indian
5     45   Female         Multiple     40     Male            Black
6     50     Male  Native Hawaiian     35   Female            Other
7     55   Female            White     30     Male            White
8     60     Male            White     25   Female            Asian
9     65   Female            Asian     20   Female            Asian 0                        Firearm
1                   Asphyxiation
2    Pushed or Thrown Out Window
3      Lethal Cutting Instrument
4                     Explosives
5                        Firearm
6                   Blunt Object
7    Pushed or Thrown

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y1, test_size=0.2)
print(X_train) 
print(y_train)
weaponmodel = clf_weapon.fit(X_train, y_train)
print("model score: %.3f" % clf_weapon.score(X_test, y_test))

   a_age a_gender           a_race  v_age v_gender           v_race
0     20     Male            White     65   Female         Multiple
6     50     Male  Native Hawaiian     35   Female            Other
8     60     Male            White     25   Female            Asian
5     45   Female         Multiple     40     Male            Black
3     35   Female            Asian     50   Female            Black
7     55   Female            White     30     Male            White
1     25   Female         Multiple     60   Female            White
4     40   Female            Black     45     Male  American Indian
0                        Firearm
6                   Blunt Object
8      Lethal Cutting Instrument
5                        Firearm
3      Lethal Cutting Instrument
7    Pushed or Thrown Out Window
1                   Asphyxiation
4                     Explosives
Name: weapon, dtype: object
model score: 0.000


In [8]:
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y2, test_size=0.2)
print(X_train2) 
print(y_train2)
crimemodel = clf_crime.fit(X_train2, y_train2)
print("model score: %.3f" % clf_crime.score(X_test2, y_test2))

   a_age a_gender           a_race  v_age v_gender           v_race
8     60     Male            White     25   Female            Asian
5     45   Female         Multiple     40     Male            Black
3     35   Female            Asian     50   Female            Black
4     40   Female            Black     45     Male  American Indian
7     55   Female            White     30     Male            White
9     65   Female            Asian     20   Female            Asian
6     50     Male  Native Hawaiian     35   Female            Other
2     30     Male            Other     55     Male            White
8    Murder and Nonnegligent Manslaughter
5                  Negligent Manslaughter
3                  Negligent Manslaughter
4                  Negligent Manslaughter
7                    Justifiable Homicide
9    Murder and Nonnegligent Manslaughter
6                    Justifiable Homicide
2                    Justifiable Homicide
Name: crime, dtype: object
model score: 0.000


In [12]:
new_df = pd.read_csv(io.StringIO(uploaded['lineartest.csv'].decode('utf-8')))
new_df

Unnamed: 0,a_age,a_gender,a_race,v_age,v_gender,v_race
0,15,Male,White,60,Female,Asian
1,20,Female,Asian,62,Male,White
2,21,Male,Black,53,Male,Black
3,22,Male,Multiple,43,Male,Black
4,23,Male,Other,46,Male,Black
5,24,Female,Asian,39,Male,White
6,25,Male,Asian,38,Female,White
7,30,Female,White,37,Female,Multiple
8,35,Female,Black,55,Female,Other
9,36,Male,White,45,Male,White


In [15]:
new_df_crime = crimemodel.predict(new_df)
new_df_crime

array(['Justifiable Homicide', 'Negligent Manslaughter',
       'Negligent Manslaughter', 'Negligent Manslaughter',
       'Justifiable Homicide', 'Justifiable Homicide',
       'Justifiable Homicide', 'Negligent Manslaughter',
       'Negligent Manslaughter', 'Justifiable Homicide',
       'Justifiable Homicide', 'Murder and Nonnegligent Manslaughter',
       'Murder and Nonnegligent Manslaughter',
       'Murder and Nonnegligent Manslaughter', 'Negligent Manslaughter'],
      dtype=object)

In [16]:
new_df_weapon = weaponmodel.predict(new_df)
new_df_weapon

array(['Firearm', 'Firearm', 'Firearm', 'Firearm', 'Firearm', 'Firearm',
       'Lethal Cutting Instrument', 'Firearm', 'Explosives', 'Firearm',
       'Lethal Cutting Instrument', 'Lethal Cutting Instrument',
       'Lethal Cutting Instrument', 'Lethal Cutting Instrument',
       'Firearm'], dtype=object)