# Big G Express Predictive Models

## Imports

In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import datetime as dt
from my_functions import create_target_window, stratifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from umap import UMAP
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score, f1_score
from scipy.stats import chi2_contingency, f_oneway
import pickle

## Read ins

In [2]:
faults = pd.read_pickle('../data/faults_df.pickle')
diag = pd.read_pickle('../data/diag_df.pickle')
fdwide = pd.read_pickle('../data/fdwide_df.pickle')

## First Model

In [4]:
df = create_target_window(fdwide, both_derate_types=False, target_window_hours=36)
df = stratifier(df, trucks = True, breaks = [0.6, 0.8, 1])

In [5]:
dropcols = ['RecordID','actionDescription','ESS_Id','eventDescription', 'ecuSoftwareVersion', 'ecuSerialNumber', 'ecuModel', 'ecuMake', 'ecuSource', 
            'faultValue', 'EquipmentID', 'MCTNumber', 'LocationTimeStamp', 'eventGroup', 'FaultId', 'EventTimeStamp', 'ParkingBrake', 'ServiceDistance',
            'SwitchedBatteryVoltage', 'FuelTemperature', 'Throttle', 'spn-fmi']
catcols = ['spn', 'fmi', 'LampStatus']
X = df.assign(timeTillLast = df['timeTillLast'].dt.total_seconds()).drop(columns = dropcols).dropna()
X.shape

(431977, 28)

In [None]:
# May want to use stratifier after filtering nulls

ct = ColumnTransformer([
            ('ohe', OneHotEncoder(sparse = False, drop = 'first'), catcols)
        ],
        remainder="passthrough"
    )

pipeline = Pipeline([
    ('ct', ct),
    ('scaler', StandardScaler()),
    ('logreg', LogisticRegression())
])

pipeline.fit(X)