## Ans(6):

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder,OneHotEncoder,StandardScaler

#### Step 1: Load and Preprocess the Dataset

In [2]:
# Load the dataset
data = pd.read_csv('LoanApprovalPrediction.csv')
data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0.0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1.0,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0.0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0.0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0.0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 598 entries, 0 to 597
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Loan_ID            598 non-null    object 
 1   Gender             598 non-null    object 
 2   Married            598 non-null    object 
 3   Dependents         586 non-null    float64
 4   Education          598 non-null    object 
 5   Self_Employed      598 non-null    object 
 6   ApplicantIncome    598 non-null    int64  
 7   CoapplicantIncome  598 non-null    float64
 8   LoanAmount         577 non-null    float64
 9   Loan_Amount_Term   584 non-null    float64
 10  Credit_History     549 non-null    float64
 11  Property_Area      598 non-null    object 
 12  Loan_Status        598 non-null    object 
dtypes: float64(5), int64(1), object(7)
memory usage: 60.9+ KB


In [4]:
data.shape

(598, 13)

In [5]:
data.isnull().sum()


Loan_ID               0
Gender                0
Married               0
Dependents           12
Education             0
Self_Employed         0
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           21
Loan_Amount_Term     14
Credit_History       49
Property_Area         0
Loan_Status           0
dtype: int64

In [6]:
data.isnull().sum().sum()

96

In [7]:
data['Dependents'] = data['Dependents'].fillna(data['Dependents'].mean())
data['LoanAmount'] = data['LoanAmount'].fillna(data['LoanAmount'].mean())
data['Loan_Amount_Term'] = data['Loan_Amount_Term'].fillna(data['Loan_Amount_Term'].mean())
data['Credit_History'] = data['Credit_History'].fillna(data['Credit_History'].mean())

In [8]:
data.isnull().sum().sum()

0

In [9]:
data.shape

(598, 13)

In [10]:
data.columns

Index(['Loan_ID', 'Gender', 'Married', 'Dependents', 'Education',
       'Self_Employed', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount',
       'Loan_Amount_Term', 'Credit_History', 'Property_Area', 'Loan_Status'],
      dtype='object')

In [11]:
# Dropping Loan_ID column
data.drop(['Loan_ID'],axis=1,inplace=True)

In [12]:
categorical_columns = ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area','Credit_History','Loan_Amount_Term']
#categorical_columns = ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area','Loan_Amount_Term']

numerical_columns = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount']
print(categorical_columns)
print(numerical_columns)

['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area', 'Credit_History', 'Loan_Amount_Term']
['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount']


In [13]:
#Convert categorical variables to numerical representations
# In this case, we need to encode the feature column (input variable)

One_Hot_Encoder = OneHotEncoder(drop='first',sparse=False)
X_train_encode = One_Hot_Encoder.fit_transform(data[['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area','Credit_History','Loan_Amount_Term']])
X_test_encode = One_Hot_Encoder.transform(data[['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area','Credit_History','Loan_Amount_Term']])

In [14]:
X_train_encode.shape

(598, 22)

In [15]:
#Convert categorical variables to numerical representations
# In this case, we need to encode the 'NObeyesdad' column (target variable)
label_encoder = LabelEncoder()
y_train_encode = label_encoder.fit_transform(data['Loan_Status'])
y_test_encode = label_encoder.transform(data['Loan_Status'])

In [16]:
y_train_encode.shape

(598,)

#### Step 2: Split the Dataset into Training and Testing Sets

In [17]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_train_encode, y_train_encode, test_size=0.2, random_state=42)

#### Step 3: Build and Evaluate Classification Models

In [19]:
# Initialize the classification models
models = [
    LogisticRegression(),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    SVC()
]

# Train and evaluate each model
for model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f'{model.__class__.__name__} Accuracy: {accuracy}')

LogisticRegression Accuracy: 0.8083333333333333
DecisionTreeClassifier Accuracy: 0.775
RandomForestClassifier Accuracy: 0.825
SVC Accuracy: 0.8166666666666667


### Select the model with the highest accuracy


In [20]:
lst=[]
for model in models:
    lst.append(model)
best_model = max(lst[0], lst[1], lst[2], lst[3], key=lambda x: x.score(X_test, y_test))
print(best_model)

RandomForestClassifier()


#### Save the model

In [34]:
import pickle
for model in models:
    file1=model.__class__.__name__+'.pkl'
    pickle.dump(model,open(file1,'wb'))
    open(file1,'wb').close()

In [21]:
X_train[100]

array([1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
       0., 0., 0., 1., 0.])

In [34]:
import numpy as np
for model in models:
    print(model.predict([[0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1.]]))

[0]
[0]
[0]
[1]


In [47]:
pd.__version__

'1.4.4'