# Problem Statment<a href="#Problem-Statment" class="anchor-link">¶</a>

A Company wants to automate the loan eligibility process based on
customer details provided while filling online application form. The
details filled by the customer are Gender, Marital Status, Education,
Number of Dependents, Income of self and co applicant, Required Loan
Amount, Required Loan Term, Credit History and others. The requirements
are as follows:

1.)Check eligibility of the Customer given the inputs described
above.(Classification)

# Read Data<a href="#Read-Data" class="anchor-link">¶</a>

In \[1\]:

    from pandas import read_csv
    trd = read_csv("Desktop/Projects/Loan/training_set.csv")
    tsd = read_csv("Desktop/Projects/Loan/testing_set.csv")

# Profile<a href="#Profile" class="anchor-link">¶</a>

In \[2\]:

    trd.head(2)

Out\[2\]:

|     | Loan_ID  | Gender | Married | Dependents | Education | Self_Employed | ApplicantIncome | CoapplicantIncome | LoanAmount | Loan_Amount_Term | Credit_History | Property_Area | Loan_Status |
|-----|----------|--------|---------|------------|-----------|---------------|-----------------|-------------------|------------|------------------|----------------|---------------|-------------|
| 0   | LP001002 | Male   | No      | 0          | Graduate  | No            | 5849.0          | 0.0               | NaN        | 360.0            | 1.0            | Urban         | Y           |
| 1   | LP001003 | Male   | Yes     | 1          | Graduate  | No            | NaN             | 1508.0            | 128.0      | 360.0            | 1.0            | Rural         | N           |

In \[3\]:

    tsd.head(2)

Out\[3\]:

|     | Loan_ID  | Gender | Married | Dependents | Education | Self_Employed | ApplicantIncome | CoapplicantIncome | LoanAmount | Loan_Amount_Term | Credit_History | Property_Area |
|-----|----------|--------|---------|------------|-----------|---------------|-----------------|-------------------|------------|------------------|----------------|---------------|
| 0   | LP001015 | Male   | Yes     | 0          | Graduate  | No            | 5720            | 0                 | 110.0      | 360.0            | 1.0            | Urban         |
| 1   | LP001022 | Male   | Yes     | 1          | Graduate  | No            | 3076            | 1500              | 126.0      | 360.0            | 1.0            | Urban         |

In \[4\]:

    trd.isna().sum()

Out\[4\]:

    Loan_ID               0
    Gender               15
    Married               3
    Dependents           15
    Education             1
    Self_Employed        32
    ApplicantIncome       2
    CoapplicantIncome     1
    LoanAmount           22
    Loan_Amount_Term     14
    Credit_History       50
    Property_Area         0
    Loan_Status           0
    dtype: int64

In \[5\]:

    tsd.isna().sum()

Out\[5\]:

    Loan_ID               0
    Gender               11
    Married               0
    Dependents           10
    Education             0
    Self_Employed        23
    ApplicantIncome       0
    CoapplicantIncome     0
    LoanAmount            5
    Loan_Amount_Term      6
    Credit_History       29
    Property_Area         0
    dtype: int64

# Missing Data Treatment<a href="#Missing-Data-Treatment" class="anchor-link">¶</a>

In \[6\]:

    for i in tsd.columns:
        if(trd[i].dtypes == "object"):
            x = trd[i].mode()[0]
            trd[i] = trd[i].fillna(x)
            tsd[i] = tsd[i].fillna(x)
        else:
            x = trd[i].mean()
            trd[i] = trd[i].fillna(x)
            tsd[i] = tsd[i].fillna(x)

# Outliers in Data<a href="#Outliers-in-Data" class="anchor-link">¶</a>

In \[7\]:

    cat = []
    con = []
    for i in trd.columns:
        if(trd[i].dtypes == "object"):
            cat.append(i)
        else:
            con.append(i)

In \[8\]:

    cat

Out\[8\]:

    ['Loan_ID',
     'Gender',
     'Married',
     'Dependents',
     'Education',
     'Self_Employed',
     'Property_Area',
     'Loan_Status']

In \[9\]:

    con

Out\[9\]:

    ['ApplicantIncome',
     'CoapplicantIncome',
     'LoanAmount',
     'Loan_Amount_Term',
     'Credit_History']

In \[10\]:

    from sklearn.preprocessing import StandardScaler
    ss = StandardScaler()
    import pandas as pd
    trdconss = pd.DataFrame(ss.fit_transform(trd[con]),columns=con)

In \[ \]:

     

In \[11\]:

    outliers = []
    for i in trdconss.columns:
        outliers.extend(list(trdconss[(trdconss[i]<-3)|(trdconss[i]>3)].index))
        
    from numpy import unique
    outliers = unique(outliers)

In \[12\]:

    trd = trd.drop(index = outliers,axis=0)

In \[13\]:

    trd.shape

Out\[13\]:

    (577, 13)

In \[14\]:

    trd.index = range(0,trd.shape[0])

# EDA<a href="#EDA" class="anchor-link">¶</a>

In \[15\]:

    trd[con].skew()

Out\[15\]:

    ApplicantIncome      2.145688
    CoapplicantIncome    1.351359
    LoanAmount           1.113132
    Loan_Amount_Term    -2.077031
    Credit_History      -1.921860
    dtype: float64

In \[16\]:

    def ANOVA(df,cat,con):
        from statsmodels.formula.api import ols
        from statsmodels.stats.anova import anova_lm
        rel = con + " ~ " + cat
        model = ols(rel,df).fit()
        Q = anova_lm(model)
        pval = round(Q.iloc[0:1,4][0],4)
        return pval

In \[17\]:

    for i in con:
        p = ANOVA(trd,"Loan_Status",i)
        print("Loan_Status vs",i," Pval ---->",p)

    Loan_Status vs ApplicantIncome  Pval ----> 0.8943
    Loan_Status vs CoapplicantIncome  Pval ----> 0.2821
    Loan_Status vs LoanAmount  Pval ----> 0.3975
    Loan_Status vs Loan_Amount_Term  Pval ----> 0.6636
    Loan_Status vs Credit_History  Pval ----> 0.0

# Preprocessing<a href="#Preprocessing" class="anchor-link">¶</a>

In \[18\]:

    X_trd = trd.drop(labels=["Loan_ID","Loan_Status"],axis=1)
    Y_trd = trd[["Loan_Status"]]

    cat = []
    con = []
    for i in X_trd.columns:
        if(X_trd[i].dtypes == "object"):
            cat.append(i)
        else:
            con.append(i)

    from sklearn.preprocessing import MinMaxScaler
    mm = MinMaxScaler()
    x1_trd = pd.DataFrame(mm.fit_transform(X_trd[con]),columns=con)
    x2_trd = pd.get_dummies(X_trd[cat])
    Xnew_trd = x1_trd.join(x2_trd)

In \[19\]:

    from sklearn.model_selection import train_test_split
    xtrain,xtest,ytrain,ytest=train_test_split(Xnew_trd,Y_trd,test_size=0.2,random_state=21)

# Logistic<a href="#Logistic" class="anchor-link">¶</a>

In \[20\]:

    from warnings import filterwarnings
    filterwarnings("ignore")

In \[21\]:

    from sklearn.linear_model import LogisticRegression
    lr = LogisticRegression()
    model = lr.fit(xtrain,ytrain)
    tr_pred = model.predict(xtrain)
    ts_pred = model.predict(xtest)
    from sklearn.metrics import accuracy_score
    tr_acc = accuracy_score(ytrain,tr_pred)
    ts_acc = accuracy_score(ytest,ts_pred)

In \[22\]:

    tr_acc

Out\[22\]:

    0.8156182212581344

In \[23\]:

    ts_acc

Out\[23\]:

    0.8448275862068966

# Tree<a href="#Tree" class="anchor-link">¶</a>

In \[24\]:

    def modeller(mo):
        model = mo.fit(xtrain,ytrain)
        tr_pred = model.predict(xtrain)
        ts_pred = model.predict(xtest)
        from sklearn.metrics import accuracy_score
        tr_acc = round(accuracy_score(ytrain,tr_pred),2)
        ts_acc = round(accuracy_score(ytest,ts_pred),2)
        return tr_acc,ts_acc

In \[25\]:

    from sklearn.tree import DecisionTreeClassifier
    dtc = DecisionTreeClassifier(random_state=21)
    modeller(dtc)

Out\[25\]:

    (1.0, 0.68)

In \[26\]:

    for i in range(2,20):
        dtc = DecisionTreeClassifier(random_state=21,max_depth=i)
        print(i,modeller(dtc))

    2 (0.81, 0.84)
    3 (0.83, 0.83)
    4 (0.84, 0.81)
    5 (0.85, 0.8)
    6 (0.87, 0.8)
    7 (0.9, 0.77)
    8 (0.92, 0.73)
    9 (0.95, 0.71)
    10 (0.97, 0.69)
    11 (0.98, 0.7)
    12 (0.99, 0.7)
    13 (1.0, 0.7)
    14 (1.0, 0.68)
    15 (1.0, 0.68)
    16 (1.0, 0.68)
    17 (1.0, 0.68)
    18 (1.0, 0.68)
    19 (1.0, 0.68)

In \[27\]:

    from sklearn.neighbors import KNeighborsClassifier
    knc = KNeighborsClassifier()
    modeller(knc)

Out\[27\]:

    (0.81, 0.75)

In \[28\]:

    for i in range(2,20):
        knc = KNeighborsClassifier(n_neighbors=i)
        print(i,modeller(knc))

    2 (0.9, 0.69)
    3 (0.85, 0.75)
    4 (0.86, 0.72)
    5 (0.81, 0.75)
    6 (0.8, 0.72)
    7 (0.77, 0.7)
    8 (0.79, 0.71)
    9 (0.76, 0.73)
    10 (0.76, 0.72)
    11 (0.75, 0.74)
    12 (0.76, 0.74)
    13 (0.74, 0.75)
    14 (0.74, 0.76)
    15 (0.74, 0.77)
    16 (0.74, 0.76)
    17 (0.72, 0.77)
    18 (0.73, 0.76)
    19 (0.72, 0.77)

In \[ \]:

     

# Train the model with entire data in training set<a href="#Train-the-model-with-entire-data-in-training-set"
class="anchor-link">¶</a>

In \[29\]:

    dtc = DecisionTreeClassifier(random_state=21,max_depth=3)
    finmodel = dtc.fit(Xnew_trd,Y_trd)

# Data preparation for testing set<a href="#Data-preparation-for-testing-set" class="anchor-link">¶</a>

In \[30\]:

    xtrain.columns

Out\[30\]:

    Index(['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount',
           'Loan_Amount_Term', 'Credit_History', 'Gender_Female', 'Gender_Male',
           'Married_No', 'Married_Yes', 'Dependents_0', 'Dependents_1',
           'Dependents_2', 'Dependents_3+', 'Education_Graduate',
           'Education_Not Graduate', 'Self_Employed_No', 'Self_Employed_Yes',
           'Property_Area_Rural', 'Property_Area_Semiurban',
           'Property_Area_Urban'],
          dtype='object')

In \[31\]:

    X_tsd = tsd.drop(labels=["Loan_ID"],axis=1)

    cat = []
    con = []
    for i in X_tsd.columns:
        if(X_tsd[i].dtypes == "object"):
            cat.append(i)
        else:
            con.append(i)

In \[32\]:

    X1_tsd = pd.DataFrame(mm.transform(X_tsd[con]),columns=con)
    X2_tsd = pd.get_dummies(X_tsd[cat])
    Xnew_tsd = X1_tsd.join(X2_tsd)

In \[33\]:

    #Xnew_tsd[Xnew_trd.columns]

In \[34\]:

    pred_LS = finmodel.predict(Xnew_tsd)

# Final DF<a href="#Final-DF" class="anchor-link">¶</a>

In \[35\]:

    Q = tsd[["Loan_ID"]]
    Q["Loan_Status"]=pred_LS

In \[36\]:

    Q.to_csv("Desktop/vaibhav_loan.csv")

In \[37\]:

    Q.head()

Out\[37\]:

|     | Loan_ID  | Loan_Status |
|-----|----------|-------------|
| 0   | LP001015 | Y           |
| 1   | LP001022 | Y           |
| 2   | LP001031 | Y           |
| 3   | LP001035 | Y           |
| 4   | LP001051 | Y           |

# Jinko X amt ka loan nai mila, unko kitne ka loan mil sakta hai?<a
href="#Jinko-X-amt-ka-loan-nai-mila,-unko-kitne-ka-loan-mil-sakta-hai?"
class="anchor-link">¶</a>

In \[38\]:

    Q['Amt']=tsd.LoanAmount

In \[39\]:

    pred_set = Q[Q.Loan_Status == "N"]

In \[40\]:

    pred_set

Out\[40\]:

|     | Loan_ID  | Loan_Status | Amt        |
|-----|----------|-------------|------------|
| 7   | LP001056 | N           | 147.000000 |
| 13  | LP001094 | N           | 166.000000 |
| 35  | LP001203 | N           | 176.000000 |
| 55  | LP001313 | N           | 130.000000 |
| 58  | LP001323 | N           | 176.000000 |
| 63  | LP001347 | N           | 108.000000 |
| 66  | LP001352 | N           | 135.000000 |
| 67  | LP001358 | N           | 130.000000 |
| 69  | LP001361 | N           | 188.000000 |
| 80  | LP001420 | N           | 163.000000 |
| 82  | LP001445 | N           | 149.000000 |
| 84  | LP001450 | N           | 131.000000 |
| 94  | LP001496 | N           | 123.000000 |
| 101 | LP001542 | N           | 146.412162 |
| 106 | LP001563 | N           | 119.000000 |
| 117 | LP001611 | N           | 80.000000  |
| 118 | LP001613 | N           | 104.000000 |
| 119 | LP001622 | N           | 213.000000 |
| 123 | LP001652 | N           | 187.000000 |
| 124 | LP001655 | N           | 300.000000 |
| 126 | LP001662 | N           | 71.000000  |
| 140 | LP001785 | N           | 150.000000 |
| 142 | LP001789 | N           | 139.000000 |
| 147 | LP001817 | N           | 199.000000 |
| 153 | LP001853 | N           | 117.000000 |
| 161 | LP001906 | N           | 84.000000  |
| 165 | LP001923 | N           | 170.000000 |
| 166 | LP001933 | N           | 120.000000 |
| 168 | LP001950 | N           | 94.000000  |
| 173 | LP001979 | N           | 159.000000 |
| ... | ...      | ...         | ...        |
| 192 | LP002069 | N           | 180.000000 |
| 193 | LP002070 | N           | 128.000000 |
| 196 | LP002090 | N           | 114.000000 |
| 198 | LP002099 | N           | 104.000000 |
| 200 | LP002105 | N           | 108.000000 |
| 211 | LP002168 | N           | 200.000000 |
| 222 | LP002245 | N           | 80.000000  |
| 224 | LP002256 | N           | 187.000000 |
| 229 | LP002286 | N           | 125.000000 |
| 235 | LP002316 | N           | 176.000000 |
| 236 | LP002321 | N           | 117.000000 |
| 239 | LP002329 | N           | 66.000000  |
| 241 | LP002339 | N           | 105.000000 |
| 243 | LP002346 | N           | 125.000000 |
| 245 | LP002355 | N           | 150.000000 |
| 250 | LP002383 | N           | 142.000000 |
| 255 | LP002399 | N           | 123.000000 |
| 266 | LP002442 | N           | 112.000000 |
| 268 | LP002450 | N           | 49.000000  |
| 273 | LP002495 | N           | 130.000000 |
| 274 | LP002496 | N           | 94.000000  |
| 278 | LP002551 | N           | 176.000000 |
| 293 | LP002609 | N           | 88.000000  |
| 301 | LP002651 | N           | 125.000000 |
| 311 | LP002747 | N           | 153.000000 |
| 317 | LP002774 | N           | 67.000000  |
| 325 | LP002802 | N           | 95.000000  |
| 339 | LP002858 | N           | 162.000000 |
| 346 | LP002879 | N           | 133.000000 |
| 354 | LP002921 | N           | 158.000000 |

61 rows × 3 columns

# Data Prep<a href="#Data-Prep" class="anchor-link">¶</a>

In \[41\]:

    X_trd = trd.drop(labels=["Loan_ID","LoanAmount"],axis=1)
    Y_trd = trd[["LoanAmount"]]

    cat = []
    con = []
    for i in X_trd.columns:
        if(X_trd[i].dtypes == "object"):
            cat.append(i)
        else:
            con.append(i)

    from sklearn.preprocessing import MinMaxScaler
    mm = MinMaxScaler()
    x1_trd = pd.DataFrame(mm.fit_transform(X_trd[con]),columns=con)
    x2_trd = pd.get_dummies(X_trd[cat])
    Xnew_trd = x1_trd.join(x2_trd)

In \[42\]:

    Xnew_trd.head()

Out\[42\]:

|     | ApplicantIncome | CoapplicantIncome | Loan_Amount_Term | Credit_History | Gender_Female | Gender_Male | Married_No | Married_Yes | Dependents_0 | Dependents_1 | ... | Dependents_3+ | Education_Graduate | Education_Not Graduate | Self_Employed_No | Self_Employed_Yes | Property_Area_Rural | Property_Area_Semiurban | Property_Area_Urban | Loan_Status_N | Loan_Status_Y |
|-----|-----------------|-------------------|------------------|----------------|---------------|-------------|------------|-------------|--------------|--------------|-----|---------------|--------------------|------------------------|------------------|-------------------|---------------------|-------------------------|---------------------|---------------|---------------|
| 0   | 0.277770        | 0.000000          | 0.6              | 1.0            | 0             | 1           | 1          | 0           | 1            | 0            | ... | 0             | 1                  | 0                      | 1                | 0                 | 0                   | 0                       | 1                   | 0             | 1             |
| 1   | 0.256155        | 0.167929          | 0.6              | 1.0            | 0             | 1           | 0          | 1           | 0            | 1            | ... | 0             | 1                  | 0                      | 1                | 0                 | 1                   | 0                       | 0                   | 1             | 0             |
| 2   | 0.138909        | 0.000000          | 0.6              | 1.0            | 0             | 1           | 0          | 1           | 1            | 0            | ... | 0             | 1                  | 0                      | 0                | 1                 | 0                   | 0                       | 1                   | 0             | 1             |
| 3   | 0.118585        | 0.262584          | 0.6              | 1.0            | 0             | 1           | 0          | 1           | 1            | 0            | ... | 0             | 0                  | 1                      | 1                | 0                 | 0                   | 0                       | 1                   | 0             | 1             |
| 4   | 0.285129        | 0.000000          | 0.6              | 1.0            | 0             | 1           | 1          | 0           | 1            | 0            | ... | 0             | 1                  | 0                      | 1                | 0                 | 0                   | 0                       | 1                   | 0             | 1             |

5 rows × 21 columns

In \[43\]:

    Y_trd.head()

Out\[43\]:

|     | LoanAmount |
|-----|------------|
| 0   | 146.412162 |
| 1   | 128.000000 |
| 2   | 66.000000  |
| 3   | 120.000000 |
| 4   | 141.000000 |

In \[44\]:

    from sklearn.model_selection import train_test_split
    xtrain,xtest,ytrain,ytest=train_test_split(Xnew_trd,Y_trd,test_size=0.2,random_state=21)

# Model<a href="#Model" class="anchor-link">¶</a>

In \[45\]:

    def modeller(mo):
        model = mo.fit(xtrain,ytrain)
        tr_pred = model.predict(xtrain)
        ts_pred = model.predict(xtest)
        from sklearn.metrics import mean_squared_error
        tr_err = round(mean_squared_error(ytrain,tr_pred),2)
        ts_err = round(mean_squared_error(ytest,ts_pred),2)
        return tr_err,ts_err

# OLS<a href="#OLS" class="anchor-link">¶</a>

In \[46\]:

    from statsmodels.api import add_constant,OLS
    xconst = add_constant(xtrain)
    ols = OLS(ytrain,xconst).fit()
    ols.summary()

Out\[46\]:

|                   |                  |                     |          |
|-------------------|------------------|---------------------|----------|
| Dep. Variable:    | LoanAmount       | R-squared:          | 0.417    |
| Model:            | OLS              | Adj. R-squared:     | 0.398    |
| Method:           | Least Squares    | F-statistic:        | 22.76    |
| Date:             | Sat, 25 Mar 2023 | Prob (F-statistic): | 6.42e-44 |
| Time:             | 10:28:13         | Log-Likelihood:     | -2392.4  |
| No. Observations: | 461              | AIC:                | 4815\.   |
| Df Residuals:     | 446              | BIC:                | 4877\.   |
| Df Model:         | 14               |                     |          |
| Covariance Type:  | nonrobust        |                     |          |

OLS Regression Results

|                         |          |         |        |          |         |         |
|-------------------------|----------|---------|--------|----------|---------|---------|
|                         | coef     | std err | t      | P\>\|t\| | \[0.025 | 0.975\] |
| const                   | 13.9879  | 2.534   | 5.520  | 0.000    | 9.008   | 18.968  |
| ApplicantIncome         | 200.1983 | 14.762  | 13.561 | 0.000    | 171.186 | 229.211 |
| CoapplicantIncome       | 132.7207 | 12.243  | 10.841 | 0.000    | 108.660 | 156.781 |
| Loan_Amount_Term        | 19.8254  | 12.588  | 1.575  | 0.116    | -4.913  | 44.564  |
| Credit_History          | 7.3222   | 6.990   | 1.048  | 0.295    | -6.415  | 21.060  |
| Gender_Female           | 3.1186   | 3.360   | 0.928  | 0.354    | -3.485  | 9.722   |
| Gender_Male             | 10.8692  | 2.987   | 3.639  | 0.000    | 5.000   | 16.739  |
| Married_No              | 5.7381   | 2.912   | 1.970  | 0.049    | 0.014   | 11.462  |
| Married_Yes             | 8.2498   | 2.740   | 3.010  | 0.003    | 2.864   | 13.636  |
| Dependents_0            | -3.9844  | 3.483   | -1.144 | 0.253    | -10.829 | 2.860   |
| Dependents_1            | 3.8337   | 4.570   | 0.839  | 0.402    | -5.148  | 12.815  |
| Dependents_2            | 1.2483   | 4.572   | 0.273  | 0.785    | -7.736  | 10.233  |
| Dependents_3+           | 12.8903  | 5.764   | 2.236  | 0.026    | 1.562   | 24.218  |
| Education_Graduate      | 10.9738  | 2.902   | 3.781  | 0.000    | 5.270   | 16.677  |
| Education_Not Graduate  | 3.0141   | 2.811   | 1.072  | 0.284    | -2.510  | 8.538   |
| Self_Employed_No        | 4.8488   | 3.099   | 1.565  | 0.118    | -1.241  | 10.938  |
| Self_Employed_Yes       | 9.1390   | 3.736   | 2.446  | 0.015    | 1.796   | 16.482  |
| Property_Area_Rural     | 3.5874   | 3.278   | 1.094  | 0.274    | -2.856  | 10.030  |
| Property_Area_Semiurban | 8.5047   | 2.987   | 2.847  | 0.005    | 2.634   | 14.375  |
| Property_Area_Urban     | 1.8958   | 3.073   | 0.617  | 0.538    | -4.145  | 7.936   |
| Loan_Status_N           | 12.6528  | 2.777   | 4.556  | 0.000    | 7.195   | 18.110  |
| Loan_Status_Y           | 1.3351   | 3.211   | 0.416  | 0.678    | -4.975  | 7.645   |

|                |        |                   |          |
|----------------|--------|-------------------|----------|
| Omnibus:       | 30.410 | Durbin-Watson:    | 1.948    |
| Prob(Omnibus): | 0.000  | Jarque-Bera (JB): | 106.592  |
| Skew:          | 0.111  | Prob(JB):         | 7.14e-24 |
| Kurtosis:      | 5.345  | Cond. No.         | 2.31e+17 |

  
  
Notes:  
\[1\] Standard Errors assume that the covariance matrix of the errors is
correctly specified.  
\[2\] The smallest eigenvalue is 5.29e-32. This might indicate that
there are  
strong multicollinearity problems or that the design matrix is singular.

In \[47\]:

    ols.rsquared_adj

Out\[47\]:

    0.39835688550313697

In \[48\]:

    col = ols.pvalues.sort_values().index[-1]

In \[49\]:

    col

Out\[49\]:

    'Dependents_2'

In \[50\]:

    Xnew_trd = Xnew_trd.drop(labels=[col],axis=1)
    from sklearn.model_selection import train_test_split
    xtrain,xtest,ytrain,ytest=train_test_split(Xnew_trd,Y_trd,test_size=0.2,random_state=21)
    from statsmodels.api import add_constant,OLS
    xconst = add_constant(xtrain)
    ols = OLS(ytrain,xconst).fit()
    col = ols.pvalues.sort_values().index[-1]
    print(ols.rsquared_adj,col)

    0.39835688550313675 Dependents_1

In \[51\]:

    from sklearn.linear_model import LinearRegression
    lm = LinearRegression()
    modeller(lm)

Out\[51\]:

    (1884.62, 1942.26)

In \[52\]:

    from sklearn.tree import DecisionTreeRegressor
    dtr = DecisionTreeRegressor(random_state=21,max_depth=2)
    modeller(dtr)

Out\[52\]:

    (1891.73, 2555.54)

In \[53\]:

    tr = []
    ts = []
    for i in range(2,20):
        dtr = DecisionTreeRegressor(random_state=21,max_depth=i)
        tre,tse = modeller(dtr)
        tr.append(tre)
        ts.append(tse)

In \[54\]:

    import matplotlib.pyplot as plt
    plt.plot(tr)
    plt.plot(ts)

Out\[54\]:

    [<matplotlib.lines.Line2D at 0x11ccc2c50>]

In \[55\]:

    dtr = DecisionTreeRegressor(random_state=21,max_depth=2)
    modeller(dtr)

Out\[55\]:

    (1891.73, 2555.54)

In \[56\]:

    from sklearn.ensemble import RandomForestRegressor
    rfr = RandomForestRegressor(random_state=21,max_depth=2)
    modeller(rfr)

Out\[56\]:

    (1722.79, 2220.47)

# Final Pred<a href="#Final-Pred" class="anchor-link">¶</a>

In \[57\]:

    tsd['Loan_Status']=pred_LS

In \[58\]:

    tsdnew = tsd.drop(labels=["Loan_ID","LoanAmount"],axis=1)

In \[59\]:

    cat = []
    con = []
    for i in X_trd.columns:
        if(X_trd[i].dtypes == "object"):
            cat.append(i)
        else:
            con.append(i)

    from sklearn.preprocessing import MinMaxScaler
    mm = MinMaxScaler()
    x1_trd = pd.DataFrame(mm.fit_transform(X_trd[con]),columns=con)
    x2_trd = pd.get_dummies(X_trd[cat])
    Xnew_trd = x1_trd.join(x2_trd)

In \[60\]:

    from sklearn.linear_model import LinearRegression
    lm = LinearRegression()
    model = lm.fit(Xnew_trd,Y_trd)

In \[61\]:

    cat = []
    con = []
    for i in tsdnew.columns:
        if(tsdnew[i].dtypes == "object"):
            cat.append(i)
        else:
            con.append(i)

    from sklearn.preprocessing import MinMaxScaler
    mm = MinMaxScaler()
    x1_trd = pd.DataFrame(mm.fit_transform(tsdnew[con]),columns=con)
    x2_trd = pd.get_dummies(tsdnew[cat])
    Xnew_tsd = x1_trd.join(x2_trd)

In \[62\]:

    len(Xnew_tsd.columns)

Out\[62\]:

    21

In \[63\]:

    len(Xnew_trd.columns)

Out\[63\]:

    21

In \[64\]:

    predcited_loanAmt = model.predict(Xnew_tsd)

In \[65\]:

    Q['predicted_LoanAmt']= predcited_loanAmt

In \[66\]:

    len(Q[(Q.Loan_Status == "N")&(Q.Amt>Q.predicted_LoanAmt)])

Out\[66\]:

    52

In \[67\]:

    len(Q[(Q.Loan_Status == "N")&(Q.Amt<=Q.predicted_LoanAmt)])

Out\[67\]:

    9

# Agar loan reject ho chuka hai aur agar loan term 20 saal se kam hai, to thode jyada term ke liye loan milega kya, aur kitna milega<a
href="#Agar-loan-reject-ho-chuka-hai-aur-agar-loan-term-20-saal-se-kam-hai,-to-thode-jyada-term-ke-liye-loan-milega-kya,-aur-kitna-milega"
class="anchor-link">¶</a>

In \[68\]:

    Q.shape

Out\[68\]:

    (367, 4)

In \[69\]:

    Q = tsd[(tsd.Loan_Status == "N")&(tsd.Loan_Amount_Term<=240)]
    Q.index = range(0,Q.shape[0])
    Q = Q.drop(labels=["Loan_Status","Loan_ID","Loan_Amount_Term"],axis=1)
    cat = []
    con = []
    for i in Q.columns:
        if(Q[i].dtypes == "object"):
            cat.append(i)
        else:
            con.append(i)

    from sklearn.preprocessing import MinMaxScaler
    mm = MinMaxScaler()
    x1_trd = pd.DataFrame(mm.fit_transform(Q[con]),columns=con)
    x2_trd = pd.get_dummies(Q[cat])
    Xnew_tsd = x1_trd.join(x2_trd)

In \[ \]:

     

In \[ \]:

     

In \[ \]:

     

In \[ \]:

     

In \[70\]:

    X_trd = trd.drop(labels=["Loan_Status","Loan_ID","Loan_Amount_Term"],axis=1)
    Y_trd = trd[["Loan_Amount_Term"]]
    cat = []
    con = []
    for i in X_trd.columns:
        if(X_trd[i].dtypes == "object"):
            cat.append(i)
        else:
            con.append(i)

    from sklearn.preprocessing import MinMaxScaler
    mm = MinMaxScaler()
    x1_trd = pd.DataFrame(mm.fit_transform(X_trd[con]),columns=con)
    x2_trd = pd.get_dummies(X_trd[cat])
    Xnew_trd = x1_trd.join(x2_trd)

    from sklearn.model_selection import train_test_split
    xtrain,xtest,ytrain,ytest=train_test_split(Xnew_trd,Y_trd,test_size=0.2,random_state=21)

In \[71\]:

    from sklearn.linear_model import LinearRegression
    lm = LinearRegression()
    modeller(lm)

Out\[71\]:

    (2398.94, 2934.12)

In \[72\]:

    from sklearn.tree import DecisionTreeRegressor
    dtr = DecisionTreeRegressor(random_state=21,max_depth=2)
    modeller(dtr)

Out\[72\]:

    (2428.25, 3503.81)

In \[73\]:

    from sklearn.tree import DecisionTreeRegressor
    dtr = DecisionTreeRegressor(random_state=21,max_depth=3)
    modeller(dtr)

Out\[73\]:

    (2267.38, 3649.06)

In \[74\]:

    from sklearn.tree import DecisionTreeRegressor
    dtr = DecisionTreeRegressor(random_state=21,max_depth=4)
    modeller(dtr)

Out\[74\]:

    (2105.68, 4161.1)

In \[75\]:

    from sklearn.tree import DecisionTreeRegressor
    dtr = DecisionTreeRegressor(random_state=21,max_depth=5)
    modeller(dtr)

Out\[75\]:

    (1861.66, 4086.04)

In \[76\]:

    from sklearn.tree import DecisionTreeRegressor
    dtr = DecisionTreeRegressor(random_state=21,max_depth=9)
    modeller(dtr)

Out\[76\]:

    (502.45, 4967.66)

In \[77\]:

    from sklearn.neighbors import KNeighborsRegressor
    knr = KNeighborsRegressor(n_neighbors=3)
    modeller(knr)

Out\[77\]:

    (1590.91, 3734.69)

In \[78\]:

    from sklearn.neighbors import KNeighborsRegressor
    knr = KNeighborsRegressor(n_neighbors=39)
    modeller(knr)

Out\[78\]:

    (2472.69, 3016.97)

In \[79\]:

    from sklearn.ensemble import AdaBoostRegressor
    abr = AdaBoostRegressor(DecisionTreeRegressor(max_depth=2,random_state=21),n_estimators=6)
    modeller(abr)

Out\[79\]:

    (2441.39, 3449.98)

In \[80\]:

    lm.predict(Xnew_tsd)

    ---------------------------------------------------------------------------
    ValueError                                Traceback (most recent call last)
    <ipython-input-80-21b6fade62f0> in <module>
    ----> 1 lm.predict(Xnew_tsd)

    /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/_base.py in predict(self, X)
        236             Returns predicted values.
        237         """
    --> 238         return self._decision_function(X)
        239 
        240     _preprocess_data = staticmethod(_preprocess_data)

    /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/_base.py in _decision_function(self, X)
        220         X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
        221         return safe_sparse_dot(X, self.coef_.T,
    --> 222                                dense_output=True) + self.intercept_
        223 
        224     def predict(self, X):

    /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/utils/validation.py in inner_f(*args, **kwargs)
         61             extra_args = len(args) - len(all_args)
         62             if extra_args <= 0:
    ---> 63                 return f(*args, **kwargs)
         64 
         65             # extra_args > 0

    /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/utils/extmath.py in safe_sparse_dot(a, b, dense_output)
        150             ret = np.dot(a, b)
        151     else:
    --> 152         ret = a @ b
        153 
        154     if (sparse.issparse(a) and sparse.issparse(b)

    ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 19 is different from 14)

In \[81\]:

    len(Xnew_tsd.columns)

Out\[81\]:

    14

In \[82\]:

    len(Xnew_trd.columns)

Out\[82\]:

    19

In \[ \]:

     

In \[83\]:

    for i in ['Dependents_1', 'Gender_Female', 'Self_Employed_Yes', 'Dependents_2', 'Property_Area_Rural']:
        Xnew_tsd[i]=0

In \[91\]:

    knr = KNeighborsRegressor(n_neighbors=3)
    model = knr.fit(Xnew_trd,Y_trd)

    for i in model.predict(Xnew_tsd):
        print(i[0])

    360.0
    240.0
    300.0
    340.0
    400.0

In \[ \]:

     

In \[87\]:

    Y_trd.Loan_Amount_Term.value_counts()

Out\[87\]:

    360.0    492
    180.0     41
    342.0     14
    480.0     14
    300.0     12
    240.0      4
    Name: Loan_Amount_Term, dtype: int64

In \[ \]:

     

In \[ \]: