In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import dill as pickle

from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.externals import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn import svm
from sklearn.utils import resample
from sklearn.externals import joblib
import sklearn.metrics as metrics

## Importing required functions
- For data cleaning and feature extraction

In [2]:
clean_columns = pickle.load(open("outputs/functions/clean_columns.pickle", "rb"))
create_windows = pickle.load(open("outputs/functions/create_windows.pickle", "rb"))
extract_features = pickle.load(open("outputs/functions/extract_features.pickle", "rb"))
min_boundary = pickle.load(open("outputs/functions/min_boundary.pickle", "rb"))
max_boundary = pickle.load(open("outputs/functions/max_boundary.pickle", "rb"))
min_speed = pickle.load(open("outputs/functions/min_speed.pickle", "rb"))
max_speed = pickle.load(open("outputs/functions/max_speed.pickle", "rb"))
min_accuracy = pickle.load(open("outputs/functions/min_accuracy.pickle", "rb"))
max_accuracy = pickle.load(open("outputs/functions/max_accuracy.pickle", "rb"))

## Importing chosen model weights
- Imported best classifier from previous notebook
- Can assert to check

In [4]:
MODEL_PATH = "outputs/models/Gradient Boosted Machine_0.72_286175.pkl"
model = joblib.load(MODEL_PATH)

## Import Evaluation set
- Assumes that evaluation set will come in the same format as provided features
- Multiple .csv files
- Currently testing with provided training data
    - To replace with evaluation feature data

In [5]:
# To insert the path including file name for each .csv file in a list
SOURCE_LIST = [
    "../grab-ai-safety-data/features/part-00001-e6120af0-10c2-4248-97c4-81baf4304e5c-c000.csv",
    "../grab-ai-safety-data/features/part-00002-e6120af0-10c2-4248-97c4-81baf4304e5c-c000.csv",
    "../grab-ai-safety-data/features/part-00003-e6120af0-10c2-4248-97c4-81baf4304e5c-c000.csv"
]


In [6]:
li = []
for csv in SOURCE_LIST:
    df = pd.read_csv(csv)
    li.append(df)

df = pd.concat(
    li, 
    axis=0, 
    ignore_index=True
)

## Apply transformations

In [7]:
df = (
    df.pipe(
        clean_columns
    ).pipe(
        create_windows
    ).pipe(
        extract_features
    )
)

## Predict using imported model

In [8]:
preds = model.predict(df)

## Export results to csv

In [None]:
# pd.to_csv("destination_path", preds)