# WK 2/Diagnostic and Descriptive Analysis
## Variable Selection

In [1]:
import pandas as pd
import os
from sys import platform
import matplotlib.pyplot as plt
import datetime
import numpy as np
import pickle
import seaborn
import time

## Step 1 - Read pickle file from Week 1

In [2]:
### Read the data and features from the pickle
final_data, discrete_features, continuous_features = pickle.load( open( "C:\\Users\\ly266e\\Documents\\Training\\CMU\\Master\\Fall 2023 Mini 7\\Business_Analytics\\HW\\HW5\\Update\\PickleData\\clean_data_feature.pickle", "rb" ) )

In [3]:
final_data.shape

(181545, 25)

## Step 2 - Calculate returns for each loan

In [4]:
### Define the names of the four returns we'll be calculating
ret_cols = ["ret_PESS", "ret_OPT", "ret_INTa", "ret_INTb", "ret_INTc"]

In [5]:
## Remove all rows for loans that were paid back on the days they were issued
final_data['loan_length'] = (final_data.last_pymnt_d - final_data.issue_d) / np.timedelta64(1, 'M')
n_rows = len(final_data)
final_data = final_data[final_data.loan_length != 0]
print("Removed " + str(n_rows - len(final_data)) + " rows")

Removed 8949 rows


#### Return Method 2 (pessimistic)

In [6]:
# Calculate the return using a simple annualized profit margin
# Pessimistic definition (method 2)
final_data['term_num'] = final_data.term.str.extract('(\d+)',expand=False).astype(int)
final_data['ret_PESS'] = ( (final_data.total_pymnt - final_data.funded_amnt)
                                            / final_data.funded_amnt ) * (12 / final_data['term_num'])

#### Return Method 1 (optimistic)

In [7]:
# Assuming that if a loan gives a positive return, we can immediately find a similar loan to invest in; 
#if the loan takes a loss, we use method 2 to compute the return
final_data['ret_OPT'] = ( (final_data.total_pymnt - final_data.funded_amnt)
                                            / final_data.funded_amnt ) * (12 / final_data['loan_length'])
final_data.loc[final_data.ret_OPT < 0,'ret_OPT'] = final_data.ret_PESS[final_data.ret_OPT < 0]

#### Return Method 3 (re-investment)

In [8]:
def ret_method_3(T, i):
    '''
    Given an investment time horizon (in months) and re-investment
    interest rate, calculate the return of each loan
    '''

    # Assuming that the total amount paid back was paid at equal
    # intervals during the duration of the loan, calculate the size of each of these installment
    actual_installment = (final_data.total_pymnt - final_data.recoveries) / final_data['loan_length']

    # Assuming the amount is immediately re-invested at the prime
    # rate, find the total amount of money we'll have by the end of the loan
    cash_by_end_of_loan = actual_installment * (1 - pow(1 + i, final_data.loan_length)) / ( 1 - (1 + i) )

    cash_by_end_of_loan = cash_by_end_of_loan + final_data.recoveries

    # Assuming that cash is then re-invested at the prime rate,
    # with monthly re-investment, until T months from the start of the loan
    remaining_months = T - final_data['loan_length']
    final_return = cash_by_end_of_loan * pow(1 + i, remaining_months)

    # Find the percentage return
    return( (12/T) * ( ( final_return - final_data['funded_amnt'] ) / final_data['funded_amnt'] ) )

final_data['ret_INTa'] = ret_method_3(5*12, 0.001)
final_data['ret_INTb'] = ret_method_3(5*12, 0.0025)
final_data['ret_INTc'] = ret_method_3(5*12, 0.005)

In [9]:
final_data.columns.tolist()

['id',
 'loan_amnt',
 'funded_amnt',
 'term',
 'int_rate',
 'grade',
 'emp_length',
 'home_ownership',
 'annual_inc',
 'verification_status',
 'issue_d',
 'loan_status',
 'purpose',
 'dti',
 'delinq_2yrs',
 'open_acc',
 'pub_rec',
 'revol_bal',
 'revol_util',
 'total_pymnt',
 'recoveries',
 'inq_last_6mths',
 'pct_tl_nvr_dlq',
 'last_pymnt_d',
 'earliest_cr_line',
 'loan_length',
 'term_num',
 'ret_PESS',
 'ret_OPT',
 'ret_INTa',
 'ret_INTb',
 'ret_INTc']

In [10]:
# Define the output path for the full pickle with returns info
# full_pickle_file = "/".join( ['.'] + ["PickleData"] + ["201213ret_data.pickle"] )
pickle_file = "/".join(['.', "PickleData", "ret_data_selection.pickle"])
os.makedirs(os.path.dirname(pickle_file), exist_ok=True)
full_pickle_file = "/".join( ['.'] + ["PickleData"] + ["ret_data_selection.pickle"] )

pickle.dump( [final_data, discrete_features, continuous_features, ret_cols], open(full_pickle_file, "wb") )