In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter
import matplotlib.pyplot as plt

In [3]:
from sklearn.metrics import balanced_accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report,balanced_accuracy_score
from sklearn.linear_model import LogisticRegression

In [4]:
from imblearn.metrics import classification_report_imbalanced
from imblearn.ensemble import EasyEnsembleClassifier
from imblearn.metrics import classification_report_imbalanced
from imblearn.over_sampling import SMOTE

In [10]:
columns = [
    "loan_amnt", "int_rate", "installment", "home_ownership",
    "annual_inc", "verification_status", "issue_d", "loan_status",
    "pymnt_plan", "dti", "delinq_2yrs", "inq_last_6mths",
    "open_acc", "pub_rec", "revol_bal", "total_acc",
    "initial_list_status", "out_prncp", "out_prncp_inv", "total_pymnt",
    "total_pymnt_inv", "total_rec_prncp", "total_rec_int", "total_rec_late_fee",
    "recoveries", "collection_recovery_fee", "last_pymnt_amnt", "next_pymnt_d",
    "collections_12_mths_ex_med", "policy_code", "application_type", "acc_now_delinq",
    "tot_coll_amt", "tot_cur_bal", "open_acc_6m", "open_act_il",
    "open_il_12m", "open_il_24m", "mths_since_rcnt_il", "total_bal_il",
    "il_util", "open_rv_12m", "open_rv_24m", "max_bal_bc",
    "all_util", "total_rev_hi_lim", "inq_fi", "total_cu_tl",
    "inq_last_12m", "acc_open_past_24mths", "avg_cur_bal", "bc_open_to_buy",
    "bc_util", "chargeoff_within_12_mths", "delinq_amnt", "mo_sin_old_il_acct",
    "mo_sin_old_rev_tl_op", "mo_sin_rcnt_rev_tl_op", "mo_sin_rcnt_tl", "mort_acc",
    "mths_since_recent_bc", "mths_since_recent_inq", "num_accts_ever_120_pd", "num_actv_bc_tl",
    "num_actv_rev_tl", "num_bc_sats", "num_bc_tl", "num_il_tl",
    "num_op_rev_tl", "num_rev_accts", "num_rev_tl_bal_gt_0",
    "num_sats", "num_tl_120dpd_2m", "num_tl_30dpd", "num_tl_90g_dpd_24m",
    "num_tl_op_past_12m", "pct_tl_nvr_dlq", "percent_bc_gt_75", "pub_rec_bankruptcies",
    "tax_liens", "tot_hi_cred_lim", "total_bal_ex_mort", "total_bc_limit",
    "total_il_high_credit_limit", "hardship_flag", "debt_settlement_flag"
]

target = ["loan_status"]

In [11]:
# Load the data
file_path = Path('LoanStats_2019Q1.csv.zip')
df = pd.read_csv(file_path, skiprows=1)[:-2]
df = df.loc[:, columns].copy()

# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')

# Drop the null rows
df = df.dropna()

# Remove the `Issued` loan status
issued_mask = df['loan_status'] != 'Issued'
df = df.loc[issued_mask]

# convert interest rate to numerical
df['int_rate'] = df['int_rate'].str.replace('%', '')
df['int_rate'] = df['int_rate'].astype('float') / 100


# Convert the target column values to low_risk and high_risk based on their values
x = {'Current': 'low_risk'}   
df = df.replace(x)

x = dict.fromkeys(['Late (31-120 days)', 'Late (16-30 days)', 'Default', 'In Grace Period'], 'high_risk')    
df = df.replace(x)

df.reset_index(inplace=True, drop=True)

len(df)

68817

In [12]:
df = pd.get_dummies(df, columns=["issue_d"
                                 ,"pymnt_plan"
                                 ,"home_ownership"
                                 ,"initial_list_status"
                                 ,"next_pymnt_d"
                                 ,"application_type"
                                 ,"hardship_flag"
                                 ,"debt_settlement_flag"
                                 ,"verification_status"])

# Split the Data into Training and Testing

In [13]:
# Create our features
X =  df.drop(columns="loan_status")

# Create our target
y = df["loan_status"]

In [14]:
X.describe()

Unnamed: 0,loan_amnt,int_rate,installment,annual_inc,dti,delinq_2yrs,inq_last_6mths,open_acc,pub_rec,revol_bal,...,initial_list_status_w,next_pymnt_d_Apr-2019,next_pymnt_d_May-2019,application_type_Individual,application_type_Joint App,hardship_flag_N,debt_settlement_flag_N,verification_status_Not Verified,verification_status_Source Verified,verification_status_Verified
count,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,...,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0
mean,16677.594562,0.127718,480.652863,88213.71,21.778153,0.217766,0.497697,12.58734,0.12603,17604.142828,...,0.876121,0.383161,0.616839,0.86034,0.13966,1.0,1.0,0.478007,0.373992,0.148001
std,10277.34859,0.04813,288.062432,115580.0,20.199244,0.718367,0.758122,6.022869,0.336797,21835.8804,...,0.329446,0.486161,0.486161,0.346637,0.346637,0.0,0.0,0.49952,0.483865,0.355104
min,1000.0,0.06,30.89,40.0,0.0,0.0,0.0,2.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
25%,9000.0,0.0881,265.73,50000.0,13.89,0.0,0.0,8.0,0.0,6293.0,...,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
50%,15000.0,0.118,404.56,73000.0,19.76,0.0,0.0,11.0,0.0,12068.0,...,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
75%,24000.0,0.1557,648.1,104000.0,26.66,0.0,1.0,16.0,0.0,21735.0,...,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0
max,40000.0,0.3084,1676.23,8797500.0,999.0,18.0,5.0,72.0,4.0,587191.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [15]:
y.value_counts()

low_risk     68470
high_risk      347
Name: loan_status, dtype: int64

In [16]:
# Create X_train, X_test, y_train, y_test
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=1)

## Data Pre-Processing

Scale the training and testing data using the `StandardScaler` from `sklearn`. Remember that when scaling the data, you only scale the features data (`X_train` and `X_testing`).

* **Create the StandardScaler instance**

In [17]:
scaler = StandardScaler()

* **Fit the Standard Scaler with the training data**

In [18]:
scaler.fit(X_train,X_test)

* **Scale the training and testing data**

In [19]:
X_train_scaled = X_train_scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Ensemble Learners

In this section, you will compare two ensemble algorithms to determine which algorithm results in the best performance. You will train a Balanced Random Forest Classifier and an Easy Ensemble classifier . For each algorithm, be sure to complete the folliowing steps:

1. Train the model using the training data. 
2. Calculate the balanced accuracy score from sklearn.metrics.
3. Print the confusion matrix from sklearn.metrics.
4. Generate a classication report using the `imbalanced_classification_report` from imbalanced-learn.
5. For the Balanced Random Forest Classifier onely, print the feature importance sorted in descending order (most important feature to least important) along with the feature score

Note: Use a random state of 1 for each algorithm to ensure consistency between tests

### Balanced Random Forest Classifier

In [23]:
rf_model = RandomForestClassifier(random_state=1)

* **Resample the training data with the BalancedRandomForestClassifier**

In [33]:
rf_model = RandomForestClassifier(random_state=1)

In [34]:
rf_model.fit(X_train_scaled,y_train)
rf_predictions = rf_model.predict(X_test_scaled)

NameError: name 'X_test_scaled' is not defined

* **Calculated the balanced accuracy score**

In [None]:
acc_score = accuracy_score(y_test, rf_predictions)
acc_score

* **Display the confusion matrix**

In [None]:
cm = confusion_matrix(y_test,rf_predictions)
cm_df = pd.DataFrame(cm, index=["Actual 0", "Actual 1 "], columns=["Prediction 0", "Prediction 1"])
cm_df

* **Print the imbalanced classification report**

In [None]:
cr = classification_report(y_test,rf_predictions)
print("Random Forest Classification Report :")
print(cr)

* **List the features sorted in descending order by feature importance**

In [None]:
sorted(zip(rf_model.feature_importances_, X.columns), reverse=True)

### Easy Ensemble Classifier

In [30]:
EE = EasyEnsembleClassifier()

* **Train the EasyEnsembleClassifier**

In [31]:
EE.fit(X_train, y_train)
ee_predictions = EE.predict(X_test)

* **Display the confusion matrix**

In [None]:
ee_cm = confusion_matrix(y_test,ee_predictions)
ee_cm

* **Print the imbalanced classification report**

In [None]:
ee_imbal_class = classification_report_imbalanced(y_test,
                                                  ee_predictions)
print(ee_imbal_class)

---
---

# Split the Data into Training and Testing

In [13]:
# Create our features
X =  df.drop(columns="loan_status")

# Create our target
y = df["loan_status"]

In [14]:
X.describe()

Unnamed: 0,loan_amnt,int_rate,installment,annual_inc,dti,delinq_2yrs,inq_last_6mths,open_acc,pub_rec,revol_bal,...,initial_list_status_w,next_pymnt_d_Apr-2019,next_pymnt_d_May-2019,application_type_Individual,application_type_Joint App,hardship_flag_N,debt_settlement_flag_N,verification_status_Not Verified,verification_status_Source Verified,verification_status_Verified
count,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,...,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0
mean,16677.594562,0.127718,480.652863,88213.71,21.778153,0.217766,0.497697,12.58734,0.12603,17604.142828,...,0.876121,0.383161,0.616839,0.86034,0.13966,1.0,1.0,0.478007,0.373992,0.148001
std,10277.34859,0.04813,288.062432,115580.0,20.199244,0.718367,0.758122,6.022869,0.336797,21835.8804,...,0.329446,0.486161,0.486161,0.346637,0.346637,0.0,0.0,0.49952,0.483865,0.355104
min,1000.0,0.06,30.89,40.0,0.0,0.0,0.0,2.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
25%,9000.0,0.0881,265.73,50000.0,13.89,0.0,0.0,8.0,0.0,6293.0,...,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
50%,15000.0,0.118,404.56,73000.0,19.76,0.0,0.0,11.0,0.0,12068.0,...,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
75%,24000.0,0.1557,648.1,104000.0,26.66,0.0,1.0,16.0,0.0,21735.0,...,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0
max,40000.0,0.3084,1676.23,8797500.0,999.0,18.0,5.0,72.0,4.0,587191.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [15]:
y.value_counts()

low_risk     68470
high_risk      347
Name: loan_status, dtype: int64

# <U>**PART 2**

---

# Credit Risk Resampling Techniques

In [5]:
import warnings
warnings.filterwarnings('ignore')

In [6]:
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter
import matplotlib.pyplot as plt

In [7]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, balanced_accuracy_score

In [8]:
from imblearn.over_sampling import SMOTE
from imblearn.metrics import classification_report_imbalanced

In [9]:
from collections import Counter

# Read the CSV and Perform Basic Data Cleaning

In [10]:
columns = [
    "loan_amnt", "int_rate", "installment", "home_ownership",
    "annual_inc", "verification_status", "issue_d", "loan_status",
    "pymnt_plan", "dti", "delinq_2yrs", "inq_last_6mths",
    "open_acc", "pub_rec", "revol_bal", "total_acc",
    "initial_list_status", "out_prncp", "out_prncp_inv", "total_pymnt",
    "total_pymnt_inv", "total_rec_prncp", "total_rec_int", "total_rec_late_fee",
    "recoveries", "collection_recovery_fee", "last_pymnt_amnt", "next_pymnt_d",
    "collections_12_mths_ex_med", "policy_code", "application_type", "acc_now_delinq",
    "tot_coll_amt", "tot_cur_bal", "open_acc_6m", "open_act_il",
    "open_il_12m", "open_il_24m", "mths_since_rcnt_il", "total_bal_il",
    "il_util", "open_rv_12m", "open_rv_24m", "max_bal_bc",
    "all_util", "total_rev_hi_lim", "inq_fi", "total_cu_tl",
    "inq_last_12m", "acc_open_past_24mths", "avg_cur_bal", "bc_open_to_buy",
    "bc_util", "chargeoff_within_12_mths", "delinq_amnt", "mo_sin_old_il_acct",
    "mo_sin_old_rev_tl_op", "mo_sin_rcnt_rev_tl_op", "mo_sin_rcnt_tl", "mort_acc",
    "mths_since_recent_bc", "mths_since_recent_inq", "num_accts_ever_120_pd", "num_actv_bc_tl",
    "num_actv_rev_tl", "num_bc_sats", "num_bc_tl", "num_il_tl",
    "num_op_rev_tl", "num_rev_accts", "num_rev_tl_bal_gt_0",
    "num_sats", "num_tl_120dpd_2m", "num_tl_30dpd", "num_tl_90g_dpd_24m",
    "num_tl_op_past_12m", "pct_tl_nvr_dlq", "percent_bc_gt_75", "pub_rec_bankruptcies",
    "tax_liens", "tot_hi_cred_lim", "total_bal_ex_mort", "total_bc_limit",
    "total_il_high_credit_limit", "hardship_flag", "debt_settlement_flag"
]

target = ["loan_status"]

In [11]:
# Load the data
file_path = Path('LoanStats_2019Q1.csv.zip')
df = pd.read_csv(file_path, skiprows=1)[:-2]
df = df.loc[:, columns].copy()

# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')

# Drop the null rows
df = df.dropna()

# Remove the `Issued` loan status
issued_mask = df['loan_status'] != 'Issued'
df = df.loc[issued_mask]

# convert interest rate to numerical
df['int_rate'] = df['int_rate'].str.replace('%', '')
df['int_rate'] = df['int_rate'].astype('float') / 100


# Convert the target column values to low_risk and high_risk based on their values
x = {'Current': 'low_risk'}   
df = df.replace(x)

x = dict.fromkeys(['Late (31-120 days)', 'Late (16-30 days)', 'Default', 'In Grace Period'], 'high_risk')    
df = df.replace(x)

df.reset_index(inplace=True, drop=True)

len(df)

68817

In [12]:
df = pd.get_dummies(df, columns=["issue_d"
                                 ,"pymnt_plan"
                                 ,"home_ownership"
                                 ,"initial_list_status"
                                 ,"next_pymnt_d"
                                 ,"application_type"
                                 ,"hardship_flag"
                                 ,"debt_settlement_flag"
                                 ,"verification_status"])

In [20]:
# Create the StandardScaler instance
scaler = StandardScaler()

In [21]:
# Fit the Standard Scaler with the training data
# When fitting scaling functions, only train on the training dataset
scaler.fit(X_train,X_test)


StandardScaler()

In [22]:
# Scale the training and testing data
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)


# Ensemble Learners

In this section, you will compare two ensemble algorithms to determine which algorithm results in the best performance. You will train a Balanced Random Forest Classifier and an Easy Ensemble classifier . For each algorithm, be sure to complete the folliowing steps:

1. Train the model using the training data. 
2. Calculate the balanced accuracy score from sklearn.metrics.
3. Print the confusion matrix from sklearn.metrics.
4. Generate a classication report using the `imbalanced_classification_report` from imbalanced-learn.
5. For the Balanced Random Forest Classifier onely, print the feature importance sorted in descending order (most important feature to least important) along with the feature score

Note: Use a random state of 1 for each algorithm to ensure consistency between tests

### Balanced Random Forest Classifier

In [23]:
rf_model = RandomForestClassifier(random_state=1)

* **Resample the training data with the BalancedRandomForestClassifier**

In [33]:
rf_model = RandomForestClassifier(random_state=1)

In [34]:
rf_model.fit(X_train_scaled,y_train)
rf_predictions = rf_model.predict(X_test_scaled)

NameError: name 'X_test_scaled' is not defined

* **Calculated the balanced accuracy score**

In [None]:
acc_score = accuracy_score(y_test, rf_predictions)
acc_score

* **Display the confusion matrix**

In [None]:
cm = confusion_matrix(y_test,rf_predictions)
cm_df = pd.DataFrame(cm, index=["Actual 0", "Actual 1 "], columns=["Prediction 0", "Prediction 1"])
cm_df

* **Print the imbalanced classification report**

In [None]:
cr = classification_report(y_test,rf_predictions)
print("Random Forest Classification Report :")
print(cr)

* **List the features sorted in descending order by feature importance**

In [None]:
sorted(zip(rf_model.feature_importances_, X.columns), reverse=True)

### Easy Ensemble Classifier

In [30]:
EE = EasyEnsembleClassifier()

* **Train the EasyEnsembleClassifier**

In [31]:
EE.fit(X_train, y_train)
ee_predictions = EE.predict(X_test)

* **Display the confusion matrix**

In [None]:
ee_cm = confusion_matrix(y_test,ee_predictions)
ee_cm

* **Print the imbalanced classification report**

In [None]:
ee_imbal_class = classification_report_imbalanced(y_test,
                                                  ee_predictions)
print(ee_imbal_class)

# Split the Data into Training and Testing

In [13]:
# Create our features
X =  df.drop(columns="loan_status")

# Create our target
y = df["loan_status"]

In [14]:
X.describe()

Unnamed: 0,loan_amnt,int_rate,installment,annual_inc,dti,delinq_2yrs,inq_last_6mths,open_acc,pub_rec,revol_bal,...,initial_list_status_w,next_pymnt_d_Apr-2019,next_pymnt_d_May-2019,application_type_Individual,application_type_Joint App,hardship_flag_N,debt_settlement_flag_N,verification_status_Not Verified,verification_status_Source Verified,verification_status_Verified
count,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,...,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0,68817.0
mean,16677.594562,0.127718,480.652863,88213.71,21.778153,0.217766,0.497697,12.58734,0.12603,17604.142828,...,0.876121,0.383161,0.616839,0.86034,0.13966,1.0,1.0,0.478007,0.373992,0.148001
std,10277.34859,0.04813,288.062432,115580.0,20.199244,0.718367,0.758122,6.022869,0.336797,21835.8804,...,0.329446,0.486161,0.486161,0.346637,0.346637,0.0,0.0,0.49952,0.483865,0.355104
min,1000.0,0.06,30.89,40.0,0.0,0.0,0.0,2.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
25%,9000.0,0.0881,265.73,50000.0,13.89,0.0,0.0,8.0,0.0,6293.0,...,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
50%,15000.0,0.118,404.56,73000.0,19.76,0.0,0.0,11.0,0.0,12068.0,...,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
75%,24000.0,0.1557,648.1,104000.0,26.66,0.0,1.0,16.0,0.0,21735.0,...,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0
max,40000.0,0.3084,1676.23,8797500.0,999.0,18.0,5.0,72.0,4.0,587191.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [15]:
y.value_counts()

low_risk     68470
high_risk      347
Name: loan_status, dtype: int64

In [16]:
# Create X_train, X_test, y_train, y_test
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=1)

## Data Pre-Processing

Scale the training and testing data using the `StandardScaler` from `sklearn`. Remember that when scaling the data, you only scale the features data (`X_train` and `X_testing`).

* **Create the StandardScaler instance**

In [17]:
scaler = StandardScaler()

* **Fit the Standard Scaler with the training data**

In [18]:
X_train_scaler = scaler.fit(X_train)

* **Scale the training and testing data**

In [19]:
X_train_scaled = X_train_scaler.transform(X_train)

# Oversampling

In this section, you will compare two oversampling algorithms to determine which algorithm results in the best performance. You will oversample the data using the naive random oversampling algorithm and the SMOTE algorithm. For each algorithm, be sure to complete the folliowing steps:

1. View the count of the target classes using `Counter` from the collections library. 
3. Use the resampled data to train a logistic regression model.
3. Calculate the balanced accuracy score from sklearn.metrics.
4. Print the confusion matrix from sklearn.metrics.
5. Generate a classication report using the `imbalanced_classification_report` from imbalanced-learn.

Note: Use a random state of 1 for each sampling algorithm to ensure consistency between tests

### Naive Random Oversampling

### Naive Random Oversampling

In [20]:
# Resample the training data with the RandomOversampler
# YOUR CODE HERE

In [21]:
# Train the Logistic Regression model using the resampled data
# YOUR CODE HERE

In [22]:
# Calculated the balanced accuracy score
# YOUR CODE HERE

In [23]:
# Display the confusion matrix
# YOUR CODE HERE

In [24]:
# Print the imbalanced classification report
# YOUR CODE HERE

### SMOTE Oversampling

* **Resample the training data with SMOTE**

In [25]:
X_smote,y_smote = SMOTE(random_state=1).fit_resample(X_train,y_train)
Counter(y_smote)

Counter({'low_risk': 51366, 'high_risk': 51366})

* **Train the Logistic Regression model using the resampled data**

In [26]:
lr = LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100,
          n_jobs=None, penalty='l2', random_state=1, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False)

In [27]:
lr.fit(X_smote,y_smote)

LogisticRegression(random_state=1)

In [28]:
lr_predictions = lr.predict(X_test)

* **Display the confusion matrix**

In [29]:
cm = confusion_matrix(y_test,lr_predictions)
pd.DataFrame(cm, index=["Actual 0", "Actual 1"], columns=["Prediction 0", "Prediction 1"])

Unnamed: 0,Prediction 0,Prediction 1
Actual 0,64,37
Actual 1,5419,11685


* **Calculated the balanced accuracy score**

In [30]:
bal_score = balanced_accuracy_score(y_test,lr_predictions)
bal_score

0.658418446498532

* **Print the imbalanced classification report**

In [31]:
imbalanced = classification_report_imbalanced(y_test,lr_predictions)
print(imbalanced)

                   pre       rec       spe        f1       geo       iba       sup

  high_risk       0.01      0.63      0.68      0.02      0.66      0.43       101
   low_risk       1.00      0.68      0.63      0.81      0.66      0.44     17104

avg / total       0.99      0.68      0.63      0.81      0.66      0.44     17205

