In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV

import os

# hide warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
Id_train=pd.read_csv("loan_data_train.csv")

In [3]:
Id_train.head(1)

Unnamed: 0,ID,Amount.Requested,Amount.Funded.By.Investors,Interest.Rate,Loan.Length,Loan.Purpose,Debt.To.Income.Ratio,State,Home.Ownership,Monthly.Income,FICO.Range,Open.CREDIT.Lines,Revolving.CREDIT.Balance,Inquiries.in.the.Last.6.Months,Employment.Length
0,79542.0,25000,25000,18.49%,60 months,debt_consolidation,27.56%,VA,MORTGAGE,8606.56,720-724,11,15210,3.0,5 years


In [4]:
Id_train.shape

(2200, 15)

In [5]:
Id_train.columns

Index(['ID', 'Amount.Requested', 'Amount.Funded.By.Investors', 'Interest.Rate',
       'Loan.Length', 'Loan.Purpose', 'Debt.To.Income.Ratio', 'State',
       'Home.Ownership', 'Monthly.Income', 'FICO.Range', 'Open.CREDIT.Lines',
       'Revolving.CREDIT.Balance', 'Inquiries.in.the.Last.6.Months',
       'Employment.Length'],
      dtype='object')

In [6]:
Id_test=pd.read_csv("loan_data_test.csv")

In [7]:
Id_test.head(1)

Unnamed: 0,ID,Amount.Requested,Amount.Funded.By.Investors,Loan.Length,Loan.Purpose,Debt.To.Income.Ratio,State,Home.Ownership,Monthly.Income,FICO.Range,Open.CREDIT.Lines,Revolving.CREDIT.Balance,Inquiries.in.the.Last.6.Months,Employment.Length
0,20093,5000,5000,60 months,moving,12.59%,NY,RENT,4416.67,690-694,13,7686,0,< 1 year


In [8]:
Id_test.shape

(300, 14)

In [9]:
Id_test.columns

Index(['ID', 'Amount.Requested', 'Amount.Funded.By.Investors', 'Loan.Length',
       'Loan.Purpose', 'Debt.To.Income.Ratio', 'State', 'Home.Ownership',
       'Monthly.Income', 'FICO.Range', 'Open.CREDIT.Lines',
       'Revolving.CREDIT.Balance', 'Inquiries.in.the.Last.6.Months',
       'Employment.Length'],
      dtype='object')

In [10]:
Id_test['Interest.Rate']=np.nan #create new column in test data named "Interest.Rate"
Id_train['data']='train' #to separate test and training data
Id_test['data']='test'
Id_test=Id_test[Id_train.columns] # to make sure column name would be in same location in both dataframe
Id_all=pd.concat([Id_train,Id_test],axis=0) #concate train and test

In [11]:
Id_all.head()

Unnamed: 0,ID,Amount.Requested,Amount.Funded.By.Investors,Interest.Rate,Loan.Length,Loan.Purpose,Debt.To.Income.Ratio,State,Home.Ownership,Monthly.Income,FICO.Range,Open.CREDIT.Lines,Revolving.CREDIT.Balance,Inquiries.in.the.Last.6.Months,Employment.Length,data
0,79542.0,25000,25000.0,18.49%,60 months,debt_consolidation,27.56%,VA,MORTGAGE,8606.56,720-724,11,15210,3.0,5 years,train
1,75473.0,19750,19750.0,17.27%,60 months,debt_consolidation,13.39%,NY,MORTGAGE,6737.5,710-714,14,19070,3.0,4 years,train
2,67265.0,2100,2100.0,14.33%,36 months,major_purchase,3.50%,LA,OWN,1000.0,690-694,13,893,1.0,< 1 year,train
3,80167.0,28000,28000.0,16.29%,36 months,credit_card,19.62%,NV,MORTGAGE,7083.33,710-714,12,38194,1.0,10+ years,train
4,17240.0,24250,17431.82,12.23%,60 months,credit_card,23.79%,OH,MORTGAGE,5833.33,730-734,6,31061,2.0,10+ years,train


In [12]:
Id_all.shape

(2500, 16)

In [13]:
Id_all=Id_all.drop('ID',axis=1)

In [14]:
Id_all.shape

(2500, 15)

# Cleaning Data

In [15]:
Id_all['Interest.Rate'] = Id_all['Interest.Rate'].str.replace("%",'')
Id_all['Interest.Rate'] = Id_all['Interest.Rate'].apply(pd.to_numeric)
Id_all['Debt.To.Income.Ratio'] = Id_all['Debt.To.Income.Ratio'].str.replace("%",'')
Id_all['Debt.To.Income.Ratio'] = Id_all['Debt.To.Income.Ratio'].apply(pd.to_numeric)

In [16]:
Id_all.dtypes

Amount.Requested                   object
Amount.Funded.By.Investors         object
Interest.Rate                     float64
Loan.Length                        object
Loan.Purpose                       object
Debt.To.Income.Ratio              float64
State                              object
Home.Ownership                     object
Monthly.Income                    float64
FICO.Range                         object
Open.CREDIT.Lines                  object
Revolving.CREDIT.Balance           object
Inquiries.in.the.Last.6.Months    float64
Employment.Length                  object
data                               object
dtype: object

In [17]:
# Id_all['Employment.Length']=Id_all['Employment.Length'].str.replace('< 1','0')
# Id_all['Employment.Length']=Id_all['Employment.Length'].str.replace(r'\D',"")
# Id_all['Employment.Length'].apply(pd.to_numeric).head()
# Id_all['Employment.Length'].head()

In [18]:
Id_all['Employment.Length']=Id_all['Employment.Length'].str.replace('years',"")
Id_all['Employment.Length']=Id_all['Employment.Length'].str.replace('year',"")
Id_all['Employment.Length']=np.where(Id_all['Employment.Length'].str[:2]=="10",10,Id_all['Employment.Length'])
Id_all['Employment.Length']=np.where(Id_all['Employment.Length'].str[0]=="<",0,Id_all['Employment.Length'])
Id_all['Employment.Length']=pd.to_numeric(Id_all['Employment.Length'],errors='coerce')
Id_all['Employment.Length'].head()

0     5.0
1     4.0
2     0.0
3    10.0
4    10.0
Name: Employment.Length, dtype: float64

In [19]:
Id_all.dtypes

Amount.Requested                   object
Amount.Funded.By.Investors         object
Interest.Rate                     float64
Loan.Length                        object
Loan.Purpose                       object
Debt.To.Income.Ratio              float64
State                              object
Home.Ownership                     object
Monthly.Income                    float64
FICO.Range                         object
Open.CREDIT.Lines                  object
Revolving.CREDIT.Balance           object
Inquiries.in.the.Last.6.Months    float64
Employment.Length                 float64
data                               object
dtype: object

In [20]:
# Id_all['Employment.Length']=Id_all['Employment.Length'].fillna(np.mean(Id_all['Employment.Length']))

In [21]:
# new data frame with split value columns 
k= Id_all["FICO.Range"].str.split("-", expand = True).astype(float)
Id_all['fico']=0.5*(k[0]+k[1])
del Id_all['FICO.Range']
Id_all['fico'].head()

0    722.0
1    712.0
2    692.0
3    712.0
4    732.0
Name: fico, dtype: float64

In [22]:
Id_all.dtypes

Amount.Requested                   object
Amount.Funded.By.Investors         object
Interest.Rate                     float64
Loan.Length                        object
Loan.Purpose                       object
Debt.To.Income.Ratio              float64
State                              object
Home.Ownership                     object
Monthly.Income                    float64
Open.CREDIT.Lines                  object
Revolving.CREDIT.Balance           object
Inquiries.in.the.Last.6.Months    float64
Employment.Length                 float64
data                               object
fico                              float64
dtype: object

In [23]:
# Id_all['Loan.Length']=Id_all['Loan.Length'].str.replace(r'\D',"")
# Id_all['Loan.Length'].head()

In [24]:
Id_all['Loan.Length']=Id_all['Loan.Length'].str.replace(' months',"")
Id_all['Loan.Length']=pd.to_numeric(Id_all['Loan.Length'],errors='coerce')

In [25]:
Id_all['Loan.Length'].head()

0    60.0
1    60.0
2    36.0
3    36.0
4    60.0
Name: Loan.Length, dtype: float64

In [26]:
for col in ['Amount.Requested','Open.CREDIT.Lines','Revolving.CREDIT.Balance']:
    Id_all[col]=pd.to_numeric(Id_all[col],errors='coerce') #because of . it shows error so ignore that error

In [27]:
Id_all.isnull().sum()

Amount.Requested                    5
Amount.Funded.By.Investors          1
Interest.Rate                     300
Loan.Length                         2
Loan.Purpose                        1
Debt.To.Income.Ratio                1
State                               1
Home.Ownership                      1
Monthly.Income                      3
Open.CREDIT.Lines                   9
Revolving.CREDIT.Balance            5
Inquiries.in.the.Last.6.Months      3
Employment.Length                  80
data                                0
fico                                0
dtype: int64

In [28]:
Id_all.dtypes

Amount.Requested                  float64
Amount.Funded.By.Investors         object
Interest.Rate                     float64
Loan.Length                       float64
Loan.Purpose                       object
Debt.To.Income.Ratio              float64
State                              object
Home.Ownership                     object
Monthly.Income                    float64
Open.CREDIT.Lines                 float64
Revolving.CREDIT.Balance          float64
Inquiries.in.the.Last.6.Months    float64
Employment.Length                 float64
data                               object
fico                              float64
dtype: object

In [29]:
Id_all['Loan.Purpose'].value_counts()

debt_consolidation    1307
credit_card            444
other                  200
home_improvement       152
major_purchase         101
small_business          87
car                     50
wedding                 39
medical                 30
moving                  29
vacation                21
house                   20
educational             15
renewable_energy         4
Name: Loan.Purpose, dtype: int64

In [30]:
for col in ['Amount.Requested','Open.CREDIT.Lines','Revolving.CREDIT.Balance']:
    Id_all[col]=pd.to_numeric(Id_all[col],errors='coerce')

In [31]:
del Id_all['Amount.Funded.By.Investors'] #as it is future value so can delete

In [32]:
cat_cols=Id_all.select_dtypes(['object']).columns
cat_cols

Index(['Loan.Purpose', 'State', 'Home.Ownership', 'data'], dtype='object')

In [33]:
cat_cols=cat_cols[:-1]
cat_cols

Index(['Loan.Purpose', 'State', 'Home.Ownership'], dtype='object')

In [37]:
for col in cat_cols:
    freqs=Id_all[col].value_counts()
    k=freqs.index[freqs>20][:-1]
    for cat in k:
        name=col+'_'+cat
        Id_all[name]=(Id_all[col]==cat).astype(int)
    del Id_all[col]
    print(col)

Loan.Purpose
State
Home.Ownership


In [43]:
Id_all.head()

Unnamed: 0,Amount.Requested,Interest.Rate,Loan.Length,Debt.To.Income.Ratio,Monthly.Income,Open.CREDIT.Lines,Revolving.CREDIT.Balance,Inquiries.in.the.Last.6.Months,Employment.Length,data,...,State_MO,State_NV,State_OR,State_SC,State_WI,State_KY,State_LA,State_OK,Home.Ownership_MORTGAGE,Home.Ownership_RENT
0,25000.0,18.49,60.0,27.56,8606.56,11.0,15210.0,3.0,5.0,train,...,0,0,0,0,0,0,0,0,1,0
1,19750.0,17.27,60.0,13.39,6737.5,14.0,19070.0,3.0,4.0,train,...,0,0,0,0,0,0,0,0,1,0
2,2100.0,14.33,36.0,3.5,1000.0,13.0,893.0,1.0,0.0,train,...,0,0,0,0,0,0,1,0,0,0
3,28000.0,16.29,36.0,19.62,7083.33,12.0,38194.0,1.0,10.0,train,...,0,1,0,0,0,0,0,0,1,0
4,24250.0,12.23,60.0,23.79,5833.33,6.0,31061.0,2.0,10.0,train,...,0,0,0,0,0,0,0,0,1,0


In [45]:
Id_all.dtypes

Amount.Requested                   float64
Interest.Rate                      float64
Loan.Length                        float64
Debt.To.Income.Ratio               float64
Monthly.Income                     float64
Open.CREDIT.Lines                  float64
Revolving.CREDIT.Balance           float64
Inquiries.in.the.Last.6.Months     float64
Employment.Length                  float64
data                                object
fico                               float64
Loan.Purpose_debt_consolidation      int32
Loan.Purpose_credit_card             int32
Loan.Purpose_other                   int32
Loan.Purpose_home_improvement        int32
Loan.Purpose_major_purchase          int32
Loan.Purpose_small_business          int32
Loan.Purpose_car                     int32
Loan.Purpose_wedding                 int32
Loan.Purpose_medical                 int32
Loan.Purpose_moving                  int32
State_CA                             int32
State_NY                             int32
State_TX   

In [49]:
Id_all.isnull().sum()

Amount.Requested                     5
Interest.Rate                      300
Loan.Length                          2
Debt.To.Income.Ratio                 1
Monthly.Income                       3
Open.CREDIT.Lines                    9
Revolving.CREDIT.Balance             5
Inquiries.in.the.Last.6.Months       3
Employment.Length                   80
data                                 0
fico                                 0
Loan.Purpose_debt_consolidation      0
Loan.Purpose_credit_card             0
Loan.Purpose_other                   0
Loan.Purpose_home_improvement        0
Loan.Purpose_major_purchase          0
Loan.Purpose_small_business          0
Loan.Purpose_car                     0
Loan.Purpose_wedding                 0
Loan.Purpose_medical                 0
Loan.Purpose_moving                  0
State_CA                             0
State_NY                             0
State_TX                             0
State_FL                             0
State_IL                 

In [68]:
Id_all['Amount.Requested']=Id_all['Amount.Requested'].fillna(Id_all['Amount.Requested'].mean())

In [69]:
Id_all['Loan.Length']=Id_all['Loan.Length'].fillna(Id_all['Loan.Length'].mode())

In [70]:
Id_all['Debt.To.Income.Ratio']=Id_all['Debt.To.Income.Ratio'].fillna(Id_all['Debt.To.Income.Ratio'].mean())

In [71]:
Id_all['Monthly.Income']=Id_all['Monthly.Income'].fillna(Id_all['Monthly.Income'].mean())

In [72]:
Id_all['Open.CREDIT.Lines']=Id_all['Open.CREDIT.Lines'].fillna(Id_all['Open.CREDIT.Lines'].mean())

In [73]:
Id_all['Revolving.CREDIT.Balance']=Id_all['Revolving.CREDIT.Balance'].fillna(Id_all['Revolving.CREDIT.Balance'].mean())

In [74]:
Id_all['Inquiries.in.the.Last.6.Months']=Id_all['Inquiries.in.the.Last.6.Months'].fillna(Id_all['Inquiries.in.the.Last.6.Months'].mean())

In [75]:
Id_all['Employment.Length']=Id_all['Employment.Length'].fillna(Id_all['Employment.Length'].mean())

In [76]:
Id_all.isnull().sum()

Amount.Requested                     0
Interest.Rate                      300
Loan.Length                          0
Debt.To.Income.Ratio                 0
Monthly.Income                       0
Open.CREDIT.Lines                    0
Revolving.CREDIT.Balance             0
Inquiries.in.the.Last.6.Months       0
Employment.Length                    0
data                                 0
fico                                 0
Loan.Purpose_debt_consolidation      0
Loan.Purpose_credit_card             0
Loan.Purpose_other                   0
Loan.Purpose_home_improvement        0
Loan.Purpose_major_purchase          0
Loan.Purpose_small_business          0
Loan.Purpose_car                     0
Loan.Purpose_wedding                 0
Loan.Purpose_medical                 0
Loan.Purpose_moving                  0
State_CA                             0
State_NY                             0
State_TX                             0
State_FL                             0
State_IL                 

In [None]:
# #Sir's code 
# for col in data.columns:
#     if (col not in ['Interest.Rate','data']) & (data[col].isnull().sum()>0):
#         data.loc[data[col].isnull(),col]=data.loc[data['data']=='train',col].mean()

# Model Building

In [97]:
Id_train=Id_all[Id_all['data']=='train']
Id_train=Id_train.drop('data',axis=1)
Id_train.head()

Unnamed: 0,Amount.Requested,Interest.Rate,Loan.Length,Debt.To.Income.Ratio,Monthly.Income,Open.CREDIT.Lines,Revolving.CREDIT.Balance,Inquiries.in.the.Last.6.Months,Employment.Length,fico,...,State_MO,State_NV,State_OR,State_SC,State_WI,State_KY,State_LA,State_OK,Home.Ownership_MORTGAGE,Home.Ownership_RENT
0,25000.0,18.49,60.0,27.56,8606.56,11.0,15210.0,3.0,5.0,722.0,...,0,0,0,0,0,0,0,0,1,0
1,19750.0,17.27,60.0,13.39,6737.5,14.0,19070.0,3.0,4.0,712.0,...,0,0,0,0,0,0,0,0,1,0
2,2100.0,14.33,36.0,3.5,1000.0,13.0,893.0,1.0,0.0,692.0,...,0,0,0,0,0,0,1,0,0,0
3,28000.0,16.29,36.0,19.62,7083.33,12.0,38194.0,1.0,10.0,712.0,...,0,1,0,0,0,0,0,0,1,0
4,24250.0,12.23,60.0,23.79,5833.33,6.0,31061.0,2.0,10.0,732.0,...,0,0,0,0,0,0,0,0,1,0


In [114]:
Id_test=Id_all[Id_all['data']=='test']
Id_test=Id_test.drop('data',axis=1)
x_test=Id_test.drop('Interest.Rate',axis=1)
y_test=Id_test['Interest.Rate']
x_test.head()

Unnamed: 0,Amount.Requested,Loan.Length,Debt.To.Income.Ratio,Monthly.Income,Open.CREDIT.Lines,Revolving.CREDIT.Balance,Inquiries.in.the.Last.6.Months,Employment.Length,fico,Loan.Purpose_debt_consolidation,...,State_MO,State_NV,State_OR,State_SC,State_WI,State_KY,State_LA,State_OK,Home.Ownership_MORTGAGE,Home.Ownership_RENT
0,5000.0,60.0,12.59,4416.67,13.0,7686.0,0.0,0.0,692.0,0,...,0,0,0,0,0,0,0,0,0,1
1,18000.0,60.0,4.93,5258.5,6.0,11596.0,0.0,10.0,712.0,1,...,0,0,0,0,0,0,0,0,0,1
2,7200.0,60.0,25.16,3750.0,13.0,7283.0,0.0,6.0,752.0,1,...,0,0,0,0,0,0,1,0,1,0
3,7200.0,36.0,17.27,3416.67,14.0,4838.0,0.0,10.0,792.0,1,...,0,0,0,0,0,0,0,0,1,0
4,22000.0,60.0,18.28,6083.33,9.0,20181.0,0.0,8.0,722.0,1,...,0,0,0,0,0,0,0,0,1,0


In [187]:
x_train=Id_train.drop('Interest.Rate',axis=1)
y_train=Id_train['Interest.Rate']
x_train.head(2)

Unnamed: 0,Amount.Requested,Loan.Length,Debt.To.Income.Ratio,Monthly.Income,Open.CREDIT.Lines,Revolving.CREDIT.Balance,Inquiries.in.the.Last.6.Months,Employment.Length,fico,Loan.Purpose_debt_consolidation,...,State_MO,State_NV,State_OR,State_SC,State_WI,State_KY,State_LA,State_OK,Home.Ownership_MORTGAGE,Home.Ownership_RENT
0,25000.0,60.0,27.56,8606.56,11.0,15210.0,3.0,5.0,722.0,1,...,0,0,0,0,0,0,0,0,1,0
1,19750.0,60.0,13.39,6737.5,14.0,19070.0,3.0,4.0,712.0,1,...,0,0,0,0,0,0,0,0,1,0


In [100]:
y_train.head()

0    18.49
1    17.27
2    14.33
3    16.29
4    12.23
Name: Interest.Rate, dtype: float64

In [101]:
x_train.shape

(2200, 49)

In [102]:
x_train.dtypes

Amount.Requested                   float64
Loan.Length                        float64
Debt.To.Income.Ratio               float64
Monthly.Income                     float64
Open.CREDIT.Lines                  float64
Revolving.CREDIT.Balance           float64
Inquiries.in.the.Last.6.Months     float64
Employment.Length                  float64
fico                               float64
Loan.Purpose_debt_consolidation      int32
Loan.Purpose_credit_card             int32
Loan.Purpose_other                   int32
Loan.Purpose_home_improvement        int32
Loan.Purpose_major_purchase          int32
Loan.Purpose_small_business          int32
Loan.Purpose_car                     int32
Loan.Purpose_wedding                 int32
Loan.Purpose_medical                 int32
Loan.Purpose_moving                  int32
State_CA                             int32
State_NY                             int32
State_TX                             int32
State_FL                             int32
State_IL   

In [119]:
y_train.dtypes

dtype('float64')

In [120]:
y_test.dtypes

dtype('float64')

In [103]:
y_train.shape

(2200,)

In [104]:
from sklearn import linear_model
from sklearn.linear_model import LinearRegression

In [105]:
lr = LinearRegression()
lr.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [108]:
# higher the alpha value, more restriction on the coefficients; 
# low alpha > more generalization, coefficients are barely
rr = Ridge(alpha=0.01) 
# restricted and in this case linear and ridge regression resembles
rr.fit(x_train, y_train)

Ridge(alpha=0.01, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [188]:
from sklearn.model_selection import train_test_split
x_train_train,x_train_test,y_train_train,y_train_test=train_test_split(x_train,y_train,test_size=0.3,random_state=3)

print(len(x_train_train))
print(len(y_train_train))
print(len(x_train_test))
print(len(y_train_test))

1540
1540
660
660


In [135]:
train_score=lr.score(x_train_train, y_train_train)
test_score=lr.score(x_train_test, y_train_test)
print(train_score)
print(test_score)

0.7709673640878852
0.7499700770438501


In [136]:
Ridge_train_score=rr.score(x_train_train, y_train_train)
Ridge_test_score=rr.score(x_train_test, y_train_test)
print(Ridge_train_score)
print(Ridge_test_score)

0.770967560715764
0.7499695963242432


In [137]:
from sklearn.linear_model import Lasso
from sklearn.linear_model import LinearRegression

In [168]:
lasso = Lasso(alpha=.007)
lasso.fit(x_train_train,y_train_train)
train_score=lasso.score(x_train_train,y_train_train)
test_score=lasso.score(x_train_test,y_train_test)
coeff_used = np.sum(lasso.coef_!=0)
print("training score:", train_score)
print("test score: ", test_score)

training score: 0.7696347625240441
test score:  0.7429693755412867


training score: 0.6784804932466961


test score:  0.6425978274075081


In [146]:
print("number of features used: ", coeff_used)

number of features used:  5


In [184]:
import warnings
warnings.filterwarnings('ignore')
import statsmodels.api as sm

x_constant = sm.add_constant(x_train_train)
lin_reg=sm.OLS(y_train_train,x_constant).fit()
lin_reg.summary()

0,1,2,3
Dep. Variable:,Interest.Rate,R-squared:,0.773
Model:,OLS,Adj. R-squared:,0.765
Method:,Least Squares,F-statistic:,103.3
Date:,"Thu, 17 Oct 2019",Prob (F-statistic):,0.0
Time:,14:44:53,Log-Likelihood:,-3252.4
No. Observations:,1540,AIC:,6605.0
Df Residuals:,1490,BIC:,6872.0
Df Model:,49,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,67.7286,1.273,53.206,0.000,65.232,70.226
Amount.Requested,0.0002,8.4e-06,19.506,0.000,0.000,0.000
Loan.Length,0.1334,0.006,22.457,0.000,0.122,0.145
Debt.To.Income.Ratio,5.285e-05,0.008,0.006,0.995,-0.016,0.016
Monthly.Income,-2.445e-05,1.42e-05,-1.726,0.085,-5.22e-05,3.34e-06
Open.CREDIT.Lines,-0.0330,0.013,-2.501,0.013,-0.059,-0.007
Revolving.CREDIT.Balance,-3.707e-06,3.06e-06,-1.211,0.226,-9.71e-06,2.3e-06
Inquiries.in.the.Last.6.Months,0.3504,0.044,7.878,0.000,0.263,0.438
Employment.Length,0.0175,0.015,1.137,0.256,-0.013,0.048

0,1,2,3
Omnibus:,92.738,Durbin-Watson:,2.005
Prob(Omnibus):,0.0,Jarque-Bera (JB):,142.449
Skew:,0.49,Prob(JB):,1.17e-31
Kurtosis:,4.123,Cond. No.,727000.0


In [189]:
predicted_ir=lr.predict(x_train_test)

In [190]:
from sklearn.metrics import mean_absolute_error

In [191]:
mean_absolute_error(y_train_test,predicted_ir)

1.6315269944969077

In [193]:
from sklearn.linear_model import Ridge,Lasso
from sklearn.model_selection import GridSearchCV

In [196]:
lambdas=np.linspace(1,100,100)
params={'alpha':lambdas}
model=Ridge(fit_intercept=True)
grid_search=GridSearchCV(model,param_grid=params,cv=10,scoring='neg_mean_absolute_error')
grid_search.fit(x_train_train,y_train_train)

GridSearchCV(cv=10, error_score='raise-deprecating',
             estimator=Ridge(alpha=1.0, copy_X=True, fit_intercept=True,
                             max_iter=None, normalize=False, random_state=None,
                             solver='auto', tol=0.001),
             iid='warn', n_jobs=None,
             param_grid={'alpha': array([  1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,  11.,
        12.,  13.,  14.,  15.,  16.,  17.,  18.,  19.,  20.,  21.,  22.,
        23.,  24.,  25.,  26.,  27.,  28.,  29.,  30.,  31.,  32.,...
        34.,  35.,  36.,  37.,  38.,  39.,  40.,  41.,  42.,  43.,  44.,
        45.,  46.,  47.,  48.,  49.,  50.,  51.,  52.,  53.,  54.,  55.,
        56.,  57.,  58.,  59.,  60.,  61.,  62.,  63.,  64.,  65.,  66.,
        67.,  68.,  69.,  70.,  71.,  72.,  73.,  74.,  75.,  76.,  77.,
        78.,  79.,  80.,  81.,  82.,  83.,  84.,  85.,  86.,  87.,  88.,
        89.,  90.,  91.,  92.,  93.,  94.,  95.,  96.,  97.,  98.,  99.,
       100.]

In [197]:
grid_search.best_estimator_

Ridge(alpha=100.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [198]:
grid_search.cv_results_

{'mean_fit_time': array([0.00654626, 0.00576146, 0.0047895 , 0.00460916, 0.00594423,
        0.00535777, 0.00492554, 0.00656214, 0.00522509, 0.00490563,
        0.00711641, 0.00430651, 0.00601461, 0.00648949, 0.00597813,
        0.00592029, 0.00554214, 0.00767515, 0.00493453, 0.00510094,
        0.00473509, 0.00430281, 0.0040134 , 0.00372157, 0.00456398,
        0.00630376, 0.00537193, 0.00568445, 0.0057734 , 0.00601923,
        0.00492983, 0.00507741, 0.00978835, 0.00603404, 0.00682693,
        0.00507019, 0.00449126, 0.00546274, 0.00622315, 0.0050957 ,
        0.00598683, 0.00588024, 0.00482428, 0.00571661, 0.00592988,
        0.00452638, 0.00547168, 0.00553081, 0.00672255, 0.00512791,
        0.00502298, 0.0057791 , 0.00581117, 0.00663245, 0.00647783,
        0.00552437, 0.00602117, 0.00646076, 0.00532749, 0.00419605,
        0.0041543 , 0.00521865, 0.00454865, 0.00639613, 0.00615575,
        0.00421946, 0.00588832, 0.00719328, 0.00445833, 0.00602427,
        0.00535166, 0.00543442,