In [1]:
# Load all helper function
%run -i '../util/helper.py'

### Read Data

In [2]:
df = pd.read_csv('../Dataset/brazilian_ecommerce_encoded.csv')
df = df.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
y = df["is_delayed"]
X = df.drop(["is_delayed", "Unnamed02"], axis=1, inplace=False)

In [3]:
print(X.shape)
print(y.shape)

(115633, 29)
(115633,)


### Model Deployment with Best Model Parameters

In [4]:
parameter_grid = {'num_leaves': [127],
                  'max_depth': [20],
                  'n_estimators': [300]}
pipe_final = make_pipeline(GridSearchCV(lgb.LGBMClassifier(random_state=random_state),
                                      param_grid=parameter_grid,
                                      return_train_score=True,
                                      cv=StratifiedKFold(n_splits=10,shuffle=False),
                                      n_jobs=-1, 
                                      scoring=['recall','f1'],
                                      refit='f1'))
start3 = time()
pipe_final.fit(X, np.ravel(y))
end3 = time()
print("LightGBM model takes " + str(end3-start3) + "seconds")

LightGBM model takes 44.072818994522095seconds


### Metrics on the Dataset

In [5]:
final_prediction = pipe_final.predict(X)

print(f'Accuracy: {accuracy_score(y,final_prediction)}')
print(f'Recall: {recall_score(y,final_prediction)}')
print(f'Precision: {precision_score(y,final_prediction)}')
print(f'F-1 Score: {f1_score(y,final_prediction)}')

Accuracy: 0.990002853856598
Recall: 0.8732216613125287
Precision: 0.9933437744714174
F-1 Score: 0.9294175112956404


### Save Model

In [6]:
filename = '../model/Final_Model.sav'
pickle.dump(pipe_final, open(filename, 'wb'))