In [25]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.impute import SimpleImputer


from data_validation_service import data_validation_service 
from data_persistence import save_object, load_object

In [26]:
test_data = load_object('american_express_test_data')

In [33]:
test_data_features = test_data.drop('customer_ID', axis=1)
test_data_features.head()
print(test_data_features.shape)

(924621, 51)


In [34]:
#impute missing values in column with mean
imp = SimpleImputer(missing_values = np.nan, strategy = 'mean')
imp.fit(test_data_features)
test_data_features_non_null = imp.transform(test_data_features)

In [4]:
# load models
logistic_regression_model = load_object('logistic_regression_model')
gradient_boosted_tree_model = load_object('gradient_boosted_tree_model')

In [None]:
models = [logistic_regression_model, gradient_boosted_tree_model]

In [None]:
prediction_output_columns = ['_0_prediction', 'prediction']
for model in models:
    predictions = model.predict_proba(test_data_features_non_null)
    prediction_results_pd = pd.DataFrame(predictions, columns = prediction_output_columns)
    prediction_results = pd.concat([test_data['customer_ID'], prediction_results_pd['prediction']], axis=1)


In [37]:
logistic_regression_predictions = logistic_regression_model.predict_proba(test_data_features_non_null)

In [50]:
lr_prediction = pd.DataFrame(logistic_regression_predictions, columns=['_0_prediction', 'prediction'])

In [51]:
lr_prediction_results = pd.concat([test_data['customer_ID'], lr_prediction['prediction']], axis=1)

In [53]:
lr_prediction_results.to_csv('lr_prediction_results.csv', header=True, index=False)

In [55]:
gradient_boosted_tree_predictions = gradient_boosted_tree_model.predict_proba(test_data_features_non_null)

In [56]:
gbt_prediction = pd.DataFrame(gradient_boosted_tree_predictions, columns=['_0_prediction', 'prediction'])
gbt_prediction_results = pd.concat([test_data['customer_ID'], gbt_prediction['prediction']], axis=1)

In [58]:
gbt_prediction_results.to_csv('gbt_prediction_results.csv', header=True, index=False)