# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from category_encoders.target_encoder import TargetEncoder
import sklearn
import tensorflow 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense , Dropout
from tensorflow.keras.layers import BatchNormalization
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_log_error

In [2]:
## Loading the Train csv file
loan_df_train=pd.read_csv("1-Train.csv")
loan_df_train.head(5)

Unnamed: 0,BusinessType,CD,City,DateApproved,Gender,JobsRetained,Lender,LoanRange,NAICSCode,NonProfit,RaceEthnicity,State,Veteran,Zip,Industry,Index
0,Independent Contractors,CA - 49,LA JOLLA,4/30/20,Unanswered,1,"U.S. Bank, National Association",$16300-27300,531210,N,Unanswered,CA,Unanswered,92037.0,Offices of Real Estate Agents and Brokers,1026947
1,Limited Liability Company(LLC),CA - 39,BREA,4/30/20,Male Owned,4,"Bank of America, National Association",$7964-16300,423490,N,Unanswered,CA,Non-Veteran,92821.0,Other Professional Equipment and Supplies Merc...,1143280
2,Limited Liability Company(LLC),MI - 08,ROCHESTER HILLS,4/6/20,Unanswered,208,Oxford Bank,$2-5 million,541612,N,Unanswered,MI,Unanswered,48309.0,Human Resources Consulting Services,286838
3,Independent Contractors,PA - 01,PHILADELPHIA,6/24/20,Male Owned,1,Sun FCU,$0-7964,541430,N,White,PA,Unanswered,19123.0,Graphic Design Services,3727822
4,Sole Proprietorship,TX - 30,DALLAS,4/15/20,Unanswered,10,"Texas Capital Bank, National Association",$56313-150000,522291,N,Unanswered,TX,Unanswered,75223.0,Consumer Lending,3960482


In [3]:
# getting the concise summary of the dataframe
loan_df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048575 entries, 0 to 1048574
Data columns (total 16 columns):
 #   Column         Non-Null Count    Dtype  
---  ------         --------------    -----  
 0   BusinessType   1047759 non-null  object 
 1   CD             1048575 non-null  object 
 2   City           1048564 non-null  object 
 3   DateApproved   1048575 non-null  object 
 4   Gender         1048575 non-null  object 
 5   JobsRetained   1048575 non-null  int64  
 6   Lender         1048575 non-null  object 
 7   LoanRange      1048575 non-null  object 
 8   NAICSCode      1048575 non-null  int64  
 9   NonProfit      1048575 non-null  object 
 10  RaceEthnicity  1048575 non-null  object 
 11  State          1048575 non-null  object 
 12  Veteran        1048575 non-null  object 
 13  Zip            1048567 non-null  float64
 14  Industry       1048575 non-null  object 
 15  Index          1048575 non-null  int64  
dtypes: float64(1), int64(3), object(12)
memory usage: 128.

Univariate and Multivariate Analysis of the Train data is done on a separate notebook("Univariate and Multivariate Analysis.ipynb") using pandas profiling. The Analysis report is attached along with.

# Feature Engineering

In [4]:
## Converting NAICSCode to object type
def tostr(loan_df):
    loan_df['NAICSCode']=loan_df['NAICSCode'].apply(str)
    

In [5]:
'''Extracting the first two digits of the NAICSCode to consider the overall industry it caters to inorder to reduce 
its cardinality '''
def split(x):
    x=x[:2]
    return x

def split_NAICSCode(loan_df):
    loan_df['NAICSCode']=loan_df['NAICSCode'].apply(split)
    



In [6]:
loan_df_train['BusinessType'].value_counts()/len(loan_df_train['BusinessType'])

Limited  Liability Company(LLC)        0.289340
Corporation                            0.289310
Subchapter S Corporation               0.148990
Sole Proprietorship                    0.143256
Non-Profit Organization                0.038332
Self-Employed Individuals              0.036140
Independent Contractors                0.024522
Partnership                            0.015036
Limited Liability Partnership          0.006907
Professional Association               0.004790
Cooperative                            0.001333
Non-Profit Childcare Center            0.000563
Trust                                  0.000269
Employee Stock Ownership Plan(ESOP)    0.000181
Tenant in Common                       0.000127
Joint Venture                          0.000117
Rollover as Business Start-Ups (ROB    0.000009
Name: BusinessType, dtype: float64

In [7]:
## Creating a new category "others" for BusinessType category values having percentage less than 1%
## replacing Nan values with "others" in BusinessType column
col_others=['Limited Liability Partnership',          
'Professional Association',              
'Cooperative',                            
'Non-Profit Childcare Center',            
'Trust',                                  
'Employee Stock Ownership Plan(ESOP)',
'Tenant in Common',
'Joint Venture','Rollover as Business Start-Ups (ROB']

def merger(x):
    if x in col_others:
        return "others"
    else:
        return x

def merger_BusinessType(loan_df):
    loan_df['BusinessType']=loan_df['BusinessType'].apply(merger)


def replace_nan(loan_df):
    loan_df['BusinessType'].fillna('others',inplace=True)


In [9]:
## Creating a new category "lender_others" for Lender category values having percentage less than 0.5%
def lender_cols(loan_df):
    
    percent_lender=loan_df['Lender'].value_counts()/len(loan_df['Lender'])*100
    lender_df=pd.DataFrame(percent_lender)
    lender_df.reset_index()
    lender_df=lender_df[lender_df['Lender']>=0.5]
    lender_df.reset_index(inplace=True)
    lender_col=lender_df['index']
    lender_col=list(lender_col)
    lender_col
    
    return lender_col
    
def lender1_merger(x):
    
    if x in lender_col:
        return x
    else:
        return "lender_others"

def lender_merger(loan_df):
    
    global lender_col
    lender_col=lender_cols(loan_df)
    loan_df['Lender']=loan_df['Lender'].apply(lender1_merger)
    return lender_col



In [10]:
## Creating a new column 'days_since_approved' to capture the number of days since the loan has been approved.
def days_approved(loan_df):
    
    loan_df['DateApproved'] = pd.to_datetime(loan_df['DateApproved'])
    loan_df['days_since_approved']=pd.to_datetime('5/26/22')-loan_df['DateApproved']
    loan_df['days_since_approved']= loan_df['days_since_approved'].astype('timedelta64[D]')
    loan_df['days_since_approved']=loan_df['days_since_approved'].astype(int)


In [11]:
## Creating a new column 'loan_amount' to capture the average of each 'LoanRange' column value.
loan_amt={
        '$16300-27300':21800,
        '$7964-16300':12132,
        '$2-5 million':3500000,
        '$0-7964':3982,
        '$56313-150000':103156,
        '$27300-56313':41806,
        '$150,000-350,000':250000,
        '$1-2 million':1500000,
        '$350,000-1 million':675000,
        '$5-10 million':7500000
    }
def loan_amnt(loan_df):

    loan_df['loan_amount']=loan_df['LoanRange'].map(loan_amt)


In [12]:
loan_df_train['RaceEthnicity'].value_counts()/len(loan_df_train['RaceEthnicity'])*100

Unanswered                          88.453854
White                                9.086427
Asian                                1.234056
Hispanic                             0.815202
Black or African American            0.349331
American Indian or Alaska Native     0.060368
Puerto Rican                         0.000572
Eskimo & Aleut                       0.000095
Multi Group                          0.000095
Name: RaceEthnicity, dtype: float64

In [13]:
## Creating a new category "Other race" for RaceEthnicity category values having percentage less than 0.5%

race_ethnic=["Black or African American","American Indian or Alaska Native","Puerto Rican",'Multi Group','Eskimo & Aleut']
def ethnic(x):
    if x in race_ethnic:
        return "Other race"
    else:
        return x

def merger_RaceEthnicity(loan_df):
    
    loan_df['RaceEthnicity']=loan_df['RaceEthnicity'].apply(ethnic)

#merger_RaceEthnicity()

In [14]:
## Dropping unwanted columns: ['CD','City','DateApproved','LoanRange','Zip','Industry','Index']
def drop_columns(loan_df):
    loan_df.drop(['CD','City','DateApproved','LoanRange','Zip','Industry','Index'],axis=1,inplace=True)


In [15]:
## Applying all the transformations to the Train data.
tostr(loan_df_train)
split_NAICSCode(loan_df_train)
merger_BusinessType(loan_df_train)
replace_nan(loan_df_train)
lender_col=lender_cols(loan_df_train)
lender_merger(loan_df_train)
days_approved(loan_df_train)
loan_amnt(loan_df_train)
merger_RaceEthnicity(loan_df_train)
drop_columns(loan_df_train)

In [16]:
loan_df_train.head()

Unnamed: 0,BusinessType,Gender,JobsRetained,Lender,NAICSCode,NonProfit,RaceEthnicity,State,Veteran,days_since_approved,loan_amount
0,Independent Contractors,Unanswered,1,"U.S. Bank, National Association",53,N,Unanswered,CA,Unanswered,756,21800
1,Limited Liability Company(LLC),Male Owned,4,"Bank of America, National Association",42,N,Unanswered,CA,Non-Veteran,756,12132
2,Limited Liability Company(LLC),Unanswered,208,lender_others,54,N,Unanswered,MI,Unanswered,780,3500000
3,Independent Contractors,Male Owned,1,lender_others,54,N,White,PA,Unanswered,701,3982
4,Sole Proprietorship,Unanswered,10,lender_others,52,N,Unanswered,TX,Unanswered,771,103156


# Creating Train,Test Split for Model Training

In [18]:
y=loan_df_train['JobsRetained']
X=loan_df_train.drop(['JobsRetained'],axis=1,inplace=True)
X=loan_df_train

In [19]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048575 entries, 0 to 1048574
Data columns (total 10 columns):
 #   Column               Non-Null Count    Dtype 
---  ------               --------------    ----- 
 0   BusinessType         1048575 non-null  object
 1   Gender               1048575 non-null  object
 2   Lender               1048575 non-null  object
 3   NAICSCode            1048575 non-null  object
 4   NonProfit            1048575 non-null  object
 5   RaceEthnicity        1048575 non-null  object
 6   State                1048575 non-null  object
 7   Veteran              1048575 non-null  object
 8   days_since_approved  1048575 non-null  int32 
 9   loan_amount          1048575 non-null  int64 
dtypes: int32(1), int64(1), object(8)
memory usage: 76.0+ MB


In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [21]:
X_train.shape

(838860, 10)

In [22]:
X_test.shape

(209715, 10)

# Target Encoding

In [24]:
te= TargetEncoder()
te.fit(X_train[['BusinessType', 'Gender', 'Lender', 'NonProfit', 'RaceEthnicity','State', 'Veteran', 'NAICSCode']],y_train)

def get_te(df):
    temp_df = pd.DataFrame(data=te.transform(df[['BusinessType', 'Gender', 'Lender', 'NonProfit', 'RaceEthnicity','State', 'Veteran', 'NAICSCode']]),
                           columns=te.get_feature_names())
    df.drop(columns=['BusinessType', 'Gender', 'Lender', 'NonProfit', 'RaceEthnicity',
                     'State', 'Veteran', 'NAICSCode'], axis=1, inplace=True)
    df.reset_index(drop=True, inplace=True)
    temp_df.reset_index(drop=True, inplace=True)

    df = pd.concat([df, temp_df], axis=1)
    return df

In [25]:
X_train = get_te(X_train)
X_test = get_te(X_test)

In [26]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 838860 entries, 0 to 838859
Data columns (total 10 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   days_since_approved  838860 non-null  int32  
 1   loan_amount          838860 non-null  int64  
 2   BusinessType         838860 non-null  float64
 3   Gender               838860 non-null  float64
 4   Lender               838860 non-null  float64
 5   NonProfit            838860 non-null  float64
 6   RaceEthnicity        838860 non-null  float64
 7   State                838860 non-null  float64
 8   Veteran              838860 non-null  float64
 9   NAICSCode            838860 non-null  float64
dtypes: float64(8), int32(1), int64(1)
memory usage: 60.8 MB


In [27]:
X_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 209715 entries, 0 to 209714
Data columns (total 10 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   days_since_approved  209715 non-null  int32  
 1   loan_amount          209715 non-null  int64  
 2   BusinessType         209715 non-null  float64
 3   Gender               209715 non-null  float64
 4   Lender               209715 non-null  float64
 5   NonProfit            209715 non-null  float64
 6   RaceEthnicity        209715 non-null  float64
 7   State                209715 non-null  float64
 8   Veteran              209715 non-null  float64
 9   NAICSCode            209715 non-null  float64
dtypes: float64(8), int32(1), int64(1)
memory usage: 15.2 MB


In [28]:
X_test

Unnamed: 0,days_since_approved,loan_amount,BusinessType,Gender,Lender,NonProfit,RaceEthnicity,State,Veteran,NAICSCode
0,758,41806,11.231718,10.620822,12.545444,10.880420,11.013397,8.946201,10.785113,5.778003
1,755,12132,11.231718,10.620822,9.601262,10.880420,11.013397,12.438692,10.785113,5.981410
2,777,103156,14.320476,10.620822,12.545444,10.880420,11.013397,8.946201,10.785113,10.823039
3,783,675000,14.320476,14.791903,12.545444,10.880420,14.620401,9.055031,15.875807,12.022776
4,779,250000,11.231718,11.286222,12.545444,10.880420,14.620401,14.529526,14.657271,7.734148
...,...,...,...,...,...,...,...,...,...,...
209710,775,21800,24.301765,10.620822,12.545444,24.365517,11.013397,12.370130,10.785113,7.734148
209711,735,12132,2.828958,10.620822,39.886663,10.880420,11.013397,12.438692,10.785113,7.538701
209712,755,41806,14.320476,10.620822,12.545444,10.880420,11.013397,14.529526,10.785113,14.968642
209713,758,3982,2.828958,10.620822,12.545444,10.880420,11.013397,12.037039,10.785113,14.968642


In [29]:
X_train

Unnamed: 0,days_since_approved,loan_amount,BusinessType,Gender,Lender,NonProfit,RaceEthnicity,State,Veteran,NAICSCode
0,776,21800,2.828958,10.620822,12.545444,10.88042,11.013397,9.212216,10.785113,12.022776
1,757,103156,14.276433,10.620822,12.545444,10.88042,11.013397,12.156997,10.785113,14.968642
2,772,21800,11.231718,10.620822,12.545444,10.88042,11.013397,13.695790,10.785113,14.968642
3,758,12132,11.231718,11.286222,12.545444,10.88042,11.013397,10.026778,10.785113,20.627333
4,771,103156,11.231718,10.620822,12.545444,10.88042,11.013397,10.878829,10.785113,20.088385
...,...,...,...,...,...,...,...,...,...,...
838855,770,12132,11.231718,10.620822,16.958276,10.88042,11.013397,14.529526,10.785113,11.696332
838856,710,103156,14.320476,10.620822,2.805234,10.88042,11.013397,14.529526,10.785113,14.968642
838857,755,12132,14.276433,10.620822,11.473692,10.88042,11.013397,13.823321,10.785113,14.968642
838858,714,41806,14.276433,10.620822,12.545444,10.88042,11.013397,13.783801,10.785113,5.778003


# Data Scaling

In [30]:
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(X_train.to_numpy())

In [31]:
X_train_scaled=pd.DataFrame(df_scaled)
X_train_scaled.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.920455,0.002377,0.069818,0.0,0.314527,0.0,0.0,0.045163,0.0,0.420542
1,0.704545,0.01323,0.565712,0.0,0.314527,0.0,0.0,0.494173,0.0,0.618926
2,0.875,0.002377,0.433818,0.0,0.314527,0.0,0.0,0.728802,0.0,0.618926
3,0.715909,0.001087,0.433818,0.159527,0.314527,0.0,0.0,0.169365,0.0,1.0
4,0.863636,0.01323,0.433818,0.0,0.314527,0.0,0.0,0.299282,0.0,0.963706


In [32]:
df_scaled2 = scaler.transform(X_test.to_numpy())
X_test_scaled=pd.DataFrame(df_scaled2)
X_test_scaled.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.715909,0.005046,0.433818,0.0,0.314527,0.0,0.0,0.004602,0.0,0.0
1,0.681818,0.001087,0.433818,0.0,0.240714,0.0,0.0,0.537125,0.0,0.013698
2,0.931818,0.01323,0.56762,0.0,0.314527,0.0,0.0,0.004602,0.0,0.339748
3,1.0,0.089517,0.56762,1.0,0.314527,0.0,1.0,0.021196,1.0,0.420542
4,0.954545,0.03282,0.433818,0.159527,0.314527,0.0,1.0,0.855927,0.760635,0.131733


In [33]:
## MinMax Scaling(Min:0, Max:500)
y_train=y_train/500
y_test=y_test/500

# Model Building

In [34]:
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=[10]),
    
    layers.BatchNormalization(),
    layers.Dense(128, activation='relu'),
   
    layers.BatchNormalization(),
    layers.Dense(128, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
 
    layers.BatchNormalization(),
    layers.Dense(1,activation='relu'),
])

# Model Compiling

In [None]:
model.compile(optimizer = tensorflow.keras.optimizers.Adam(learning_rate=0.0003), loss = root_mean_squared_log_error2 )

# Model Training

In [None]:
checkpoint_filepath2="/tmp/gg"

In [None]:
import tensorflow as tf
historye=model_test.fit(X_train_scaled,y_train,batch_size = 256, epochs = 100,verbose=1 ,callbacks=[tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath2,
    monitor="loss",
    verbose=0,
    save_best_only=True,
    save_weights_only=True,
    mode="min",
    save_freq="epoch"),tf.keras.callbacks.ReduceLROnPlateau(
    monitor='loss',
    factor=0.8,
    patience=10,
    verbose=0,
    mode='min',
    min_delta=0.001,
    cooldown=0,
    min_lr=0,

)

])

# Loading Model Weights

In [35]:
model.load_weights('submission_weights_model_test.h5')

# Model Prediction

In [36]:
## Prediction on X_test
y_pred=model.predict(X_test_scaled,verbose=1)
score=mean_squared_log_error(y_test*500, np.round(y_pred*500))
score=np.sqrt(score)
print("RMSLE : ",score)

RMSLE :  0.6752526998239937


# Model Prediction on the separate Test csv

In [37]:
# reading test data
loan_df_test=pd.read_csv('2-Test.csv')
loan_df_test.head()

Unnamed: 0,BusinessType,CD,City,DateApproved,Gender,Lender,LoanRange,NAICSCode,NonProfit,RaceEthnicity,State,Veteran,Zip,Industry,Index
0,Subchapter S Corporation,FL - 16,SARASOTA,4/30/20,Unanswered,Regions Bank,$0-7964,531210,N,Unanswered,FL,Unanswered,34236.0,Offices of Real Estate Agents and Brokers,1705962
1,Corporation,AR - 02,NORTH LITTLE ROCK,4/28/20,Male Owned,Eagle Bank and Trust Company,$7964-16300,812320,N,Unanswered,AR,Unanswered,72114.0,Drycleaning and Laundry Services (except Coin-...,770689
2,Corporation,TX - 15,DONNA,4/13/20,Unanswered,Texas Regional Bank,$27300-56313,454390,N,Unanswered,TX,Unanswered,78537.0,Other Direct Selling Establishments,4016874
3,Limited Liability Company(LLC),LA - 01,MANDEVILLE,5/8/20,Unanswered,American Bank & Trust Company,$0-7964,722515,N,Unanswered,LA,Unanswered,70471.0,Snack and Nonalcoholic Beverage Bars,2306105
4,Limited Liability Company(LLC),WI - 03,PLOVER,4/27/20,Unanswered,BMO Harris Bank National Association,$16300-27300,812112,N,Unanswered,WI,Unanswered,54467.0,Beauty Salons,4517956


Preprocessing Test data

In [38]:
# Preprocessing
tostr(loan_df_test)
split_NAICSCode(loan_df_test)
merger_BusinessType(loan_df_test)
replace_nan(loan_df_test)
lender_col=lender_cols(loan_df_test)
lender_merger(loan_df_test)
days_approved(loan_df_test)
loan_amnt(loan_df_test)
merger_RaceEthnicity(loan_df_test)
drop_columns(loan_df_test)

In [39]:
loan_df_test

Unnamed: 0,BusinessType,Gender,Lender,NAICSCode,NonProfit,RaceEthnicity,State,Veteran,days_since_approved,loan_amount
0,Subchapter S Corporation,Unanswered,Regions Bank,53,N,Unanswered,FL,Unanswered,756,3982
1,Corporation,Male Owned,lender_others,81,N,Unanswered,AR,Unanswered,758,12132
2,Corporation,Unanswered,lender_others,45,N,Unanswered,TX,Unanswered,773,41806
3,Limited Liability Company(LLC),Unanswered,lender_others,72,N,Unanswered,LA,Unanswered,748,3982
4,Limited Liability Company(LLC),Unanswered,lender_others,81,N,Unanswered,WI,Unanswered,759,21800
...,...,...,...,...,...,...,...,...,...,...
349995,Limited Liability Company(LLC),Unanswered,"Bank of America, National Association",54,N,Unanswered,IL,Unanswered,756,21800
349996,Sole Proprietorship,Unanswered,lender_others,56,N,Unanswered,GA,Unanswered,757,3982
349997,Subchapter S Corporation,Unanswered,lender_others,23,N,Unanswered,GA,Unanswered,759,21800
349998,Subchapter S Corporation,Unanswered,lender_others,53,N,Unanswered,MN,Unanswered,772,41806


In [40]:
# Target encoding Test data
X_submit=get_te(loan_df_test)

In [41]:
# Scaling test data
X_submit_scaled = scaler.transform(X_submit.to_numpy())
X_submit_scaled=pd.DataFrame(X_submit_scaled)
X_submit_scaled.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.693182,0.0,0.565712,0.0,0.167385,0.0,0.0,0.004602,0.0,0.013698
1,0.715909,0.001087,0.56762,1.0,0.314527,0.0,0.0,0.099104,0.0,0.131733
2,0.886364,0.005046,0.56762,0.0,0.314527,0.0,0.0,0.494173,0.0,0.100332
3,0.602273,0.0,0.433818,0.0,0.314527,0.0,0.0,0.476798,0.0,1.0
4,0.727273,0.002377,0.433818,0.0,0.314527,0.0,0.0,0.475882,0.0,0.131733


In [46]:
# Prediction on test data
y_pred=model.predict(X_submit_scaled,verbose=1)
y_pred=np.round(y_pred*500)



In [47]:
test_data_prediction=pd.DataFrame(y_pred,columns=['JobsRetained_PRED'],dtype=int)
test_data_prediction.head()

Unnamed: 0,JobsRetained_PRED
0,0
1,2
2,4
3,2
4,2


In [48]:
sub=pd.read_csv('3-Submission.csv')
sub.head()

Unnamed: 0,Index,JobsRetained_PRED
0,1705962,
1,770689,
2,4016874,
3,2306105,
4,4517956,


In [49]:
sub['JobsRetained_PRED']=test_data_prediction["JobsRetained_PRED"]
sub.head()

Unnamed: 0,Index,JobsRetained_PRED
0,1705962,0
1,770689,2
2,4016874,4
3,2306105,2
4,4517956,2


Conclusion: 
The 'JobsRetained_PRED' column in the 3-Submission.csv file is populated with the number of jobs retained as predicted by our model.