In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Import Libraries

# DS 
import os
from itertools import islice
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from mpl_toolkits.mplot3d import Axes3D
from scipy import stats
import seaborn as sns

# PLOTLY
import plotly.offline as po
import plotly.graph_objs as go

# SKLEARN
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier, plot_tree
from sklearn.ensemble import (GradientBoostingRegressor, 
                              GradientBoostingClassifier, 
                              AdaBoostClassifier,
                              AdaBoostRegressor,
                              RandomForestRegressor,
                              RandomForestClassifier)
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score, make_scorer, confusion_matrix, accuracy_score, plot_roc_curve
from sklearn.ensemble.partial_dependence import partial_dependence, plot_partial_dependence

import warnings
warnings.filterwarnings('ignore')
# Aesthetic Plots
%matplotlib inline


# Aesthetic Plots
import mplcyberpunk
plt.style.use('cyberpunk')
%matplotlib inline

In [3]:
from src.gradient_clean_data import *

In [4]:
# GLOBAL VARS
FSIZE = (12, 8)

In [5]:
# Load CSV
df = pd.read_csv('data/Train.csv')

In [6]:
# CSV Head
df.head()

Unnamed: 0,SalesID,SalePrice,MachineID,ModelID,datasource,auctioneerID,YearMade,MachineHoursCurrentMeter,UsageBand,saledate,...,Undercarriage_Pad_Width,Stick_Length,Thumb,Pattern_Changer,Grouser_Type,Backhoe_Mounting,Blade_Type,Travel_Controls,Differential_Type,Steering_Controls
0,1139246,66000,999089,3157,121,3.0,2004,68.0,Low,11/16/2006 0:00,...,,,,,,,,,Standard,Conventional
1,1139248,57000,117657,77,121,3.0,1996,4640.0,Low,3/26/2004 0:00,...,,,,,,,,,Standard,Conventional
2,1139249,10000,434808,7009,121,3.0,2001,2838.0,High,2/26/2004 0:00,...,,,,,,,,,,
3,1139251,38500,1026470,332,121,3.0,2001,3486.0,High,5/19/2011 0:00,...,,,,,,,,,,
4,1139253,11000,1057373,17311,121,3.0,2007,722.0,Medium,7/23/2009 0:00,...,,,,,,,,,,


In [7]:
# Clean DF for Gradient Boosting
colToDropGB1 = ['PrevSale','ProductGroupDesc', 'MachineID','SaleYear']
colToDropGB2 = ['SaleCount', 'MaxToDate', 'PrevSale', 'ProductGroupDesc', 'MachineID',
                'Pad_Type', 'Turbocharged', 'Backhoe_Mounting', 'Differential_Type', 'SaleYear']

In [9]:
X, y = gradient_clean_Xy(df)

In [11]:
X

Unnamed: 0,SalesID,ModelID,datasource,auctioneerID,YearMade,MachineHoursCurrentMeter,UsageBand,saledate,fiModelDesc,fiBaseModel,...,Pattern_Changer,Grouser_Type,Backhoe_Mounting,Blade_Type,Travel_Controls,Differential_Type,Steering_Controls,Vehicle Type,Power Rating,Horsepower
0,1139246,3157,121,3.0,2004,68.0,Low,11/16/2006 0:00,521D,521,...,,,,,,Standard,Conventional,Wheel Loader,110.0 to 120.0 Horsepower,120.0
1,1139248,77,121,3.0,1996,4640.0,Low,3/26/2004 0:00,950FII,950,...,,,,,,Standard,Conventional,Wheel Loader,150.0 to 175.0 Horsepower,175.0
2,1139249,7009,121,3.0,2001,2838.0,High,2/26/2004 0:00,226,226,...,,,,,,,,Skid Steer Loaders,1351.0 to 1601.0 Lb Operating Capacity,
3,1139251,332,121,3.0,2001,3486.0,High,5/19/2011 0:00,PC120-6E,PC120,...,,,,,,,,Track Excavators,12.0 to 14.0 Metric Tons,
4,1139253,17311,121,3.0,2007,722.0,Medium,7/23/2009 0:00,S175,S175,...,,,,,,,,Skid Steer Loaders,1601.0 to 1751.0 Lb Operating Capacity,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
401120,6333336,21439,149,1.0,2005,,,11/2/2011 0:00,35NX2,35,...,None or Unspecified,Double,,,,,,Track Excavators,3.0 to 4.0 Metric Tons,
401121,6333337,21439,149,1.0,2005,,,11/2/2011 0:00,35NX2,35,...,None or Unspecified,Double,,,,,,Track Excavators,3.0 to 4.0 Metric Tons,
401122,6333338,21439,149,1.0,2005,,,11/2/2011 0:00,35NX2,35,...,None or Unspecified,Double,,,,,,Track Excavators,3.0 to 4.0 Metric Tons,
401123,6333341,21435,149,2.0,2005,,,10/25/2011 0:00,30NX,30,...,None or Unspecified,Double,,,,,,Track Excavators,2.0 to 3.0 Metric Tons,
