# Model Submission Notebook - Home Credit Default Risk

### initialization - load packages and data

In [None]:
import pandas as pd
import joblib
import numpy as np

MainDir = "../input/../input/home-credit-default-risk"
test = pd.read_csv(f'{MainDir}/application_test.csv')

### Load Model

In [None]:
preprocessor = joblib.load('../input/defaultdata06/default_preprocessor_06.joblib')
model = joblib.load('../input/defaultdata06/default_model_06.joblib')
print(type(model))

### Preprocessing

In [None]:
test['DAYS_EMPLOYED'].replace({365243: np.nan}, inplace = True)         # get rid of the junk
test['CI_ratio'] = test['AMT_CREDIT'] / test['AMT_INCOME_TOTAL']        # credit-to-income ratio
test['AI_ratio'] = test['AMT_ANNUITY'] / test['AMT_INCOME_TOTAL']       # annuity-to-income ratio
test['AC_ratio'] = test['AMT_CREDIT'] / test['AMT_ANNUITY']             # credit to annuity - basically the term of the loan in years
test['CG_ratio'] = test['AMT_CREDIT'] / test['AMT_GOODS_PRICE']         # credit to goods price ratio - how much was financed?
test['FLAG_CG_ratio'] = test['AMT_CREDIT'] > test['AMT_GOODS_PRICE']    # FLAG if you borrowed more than the price of the item
test['log_INCOME'] = np.log(test['AMT_INCOME_TOTAL'])                    # log of income
test['log_ANNUITY'] = np.log(test['AMT_ANNUITY'])                        # log of annuity
test['log_CREDIT'] = np.log(test['AMT_CREDIT'])                          # log of credit
test['log_GOODS'] = np.log(test['AMT_GOODS_PRICE'])                      # log of goods price
test['MissingBureau'] = test.iloc[:, 41:44].isnull().sum(axis=1).astype("category")   # number of bureaus with no score

test['AVG_EXT'] = test.iloc[:, 41:44].sum(axis=1)/(3- test.iloc[:,41:44].isnull().sum(axis=1))   # average of the (at most) three scores
test.EXT_SOURCE_1.fillna(test.AVG_EXT, inplace=True)
test.EXT_SOURCE_2.fillna(test.AVG_EXT, inplace=True)
test.EXT_SOURCE_3.fillna(test.AVG_EXT, inplace=True)

test.drop(['REG_REGION_NOT_LIVE_REGION','AMT_REQ_CREDIT_BUREAU_WEEK','HOUSETYPE_MODE','OCCUPATION_TYPE','FLAG_MOBIL','FLAG_CONT_MOBILE',
           'NAME_TYPE_SUITE', 'FLAG_DOCUMENT_4','ORGANIZATION_TYPE', 'FONDKAPREMONT_MODE', 'FLAG_DOCUMENT_15', 'FLAG_DOCUMENT_7', 'FLAG_DOCUMENT_16',
           'FLAG_DOCUMENT_9', 'FLAG_DOCUMENT_10', 'FLAG_DOCUMENT_11','FLAG_DOCUMENT_12', 'FLAG_DOCUMENT_13', 'FLAG_DOCUMENT_14', 'AMT_REQ_CREDIT_BUREAU_DAY',
           'AMT_REQ_CREDIT_BUREAU_HOUR', 'FLAG_DOCUMENT_21','FLAG_DOCUMENT_20', 'FLAG_DOCUMENT_19', 'FLAG_DOCUMENT_18', 'FLAG_DOCUMENT_17','FLAG_DOCUMENT_2'],
           axis=1, inplace=True)

X_test = preprocessor.transform(test)
print(X_test.shape)

### Test Predictions

In [None]:
test_pred = model.predict_proba(X_test)
print(test_pred.shape)
print(test_pred[:5])

### Submission

In [None]:
submission = pd.read_csv('../input/home-credit-default-risk/sample_submission.csv')
submission.head(10)  # We need the probability of default (column [1] from test_pred)
submission.TARGET = test_pred[:,1]   # replace the default values with our predictions
submission.head(10)
submission.to_csv('default_submission_06.csv', index=False, header = True)