##Importing libraries

In [169]:
import pandas as pd
import numpy as np

##Loading dataset

In [170]:
df = pd.read_csv('/content/train.csv')
df.drop(columns='Loan_ID',  inplace=True)

##Taking care of object type missing data & selecting target attribute

In [171]:
dfo = df.select_dtypes(include=object).dropna(axis=0)

y = df.Loan_Status.iloc[dfo.index].values

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

dfo.drop(columns='Loan_Status', axis=1)

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,Property_Area
0,Male,No,0,Graduate,No,Urban
1,Male,Yes,1,Graduate,No,Rural
2,Male,Yes,0,Graduate,Yes,Urban
3,Male,Yes,0,Not Graduate,No,Urban
4,Male,No,0,Graduate,No,Urban
...,...,...,...,...,...,...
609,Female,No,0,Graduate,No,Rural
610,Male,Yes,3+,Graduate,No,Rural
611,Male,Yes,1,Graduate,No,Urban
612,Male,Yes,2,Graduate,No,Urban


##Selecting numeric columns

In [172]:
dfn = df.select_dtypes(include=['int16', 'int32', 'int64', 'float16', 'float32', 'float64']).iloc[dfo.index, :]

##Final dataset

In [173]:
final_df = pd.concat([dfo, dfn], axis=1)

##Importing libraries

In [174]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

##Pipeline

In [175]:
numeric_transformer = Pipeline(
    steps=[('imputer', SimpleImputer(strategy='mean')),('scaler', StandardScaler())]
)

categorical_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer(
    transformers = [
                    ('num', numeric_transformer, dfn.columns),
                    ('cat',categorical_transformer, dfo.columns)
    ]
)

In [176]:
clf = Pipeline(
    steps=[("preprocessor", preprocessor), ("classifier", SVC())]
)

X_train, X_test, y_train, y_test = train_test_split(final_df, y, test_size=0.2, random_state=0)

clf.fit(X_train, y_train)

##Confusion matrix & score

In [177]:
from sklearn.metrics import confusion_matrix
print('Confusion matrix :')
print(confusion_matrix(y_test, clf.predict(X_test)))
print('Accuracy :', clf.score(X_test, y_test))

Confusion matrix :
[[31  0]
 [ 0 80]]
Accuracy : 1.0


##Cross validation score & standard deviation

In [178]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = clf, X = X_train, y = y_train, cv = 3)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

Accuracy: 97.97 %
Standard Deviation: 0.55 %


##Prediction

In [179]:
if clf.predict(final_df.iloc[4:5, :])[0] == 1:
    print('You are eligible for the loan')
else:
    print('You are not eligible for the loan')

You are eligible for the loan
