In [32]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

In [33]:
df=pd.read_csv('/content/sample_data/loan_data_1.csv')

In [34]:
df.head()

Unnamed: 0,Income,CreditScore,EmploymentType,LoanAmount,LoanApproved
0,35795,558,Salaried,1571091,1
1,20860,678,Business,768234,1
2,96820,685,Self-Employed,478480,1
3,74886,612,Self-Employed,132711,0
4,26265,688,Salaried,891971,1


In [35]:
df.describe()

Unnamed: 0,Income,CreditScore,LoanAmount,LoanApproved
count,100.0,100.0,100.0,100.0
mean,72368.89,701.47,1014775.0,0.63
std,28730.338066,84.649182,583829.5,0.485237
min,20769.0,551.0,102869.0,0.0
25%,47934.25,630.0,532037.5,0.0
50%,78018.0,705.0,959160.5,1.0
75%,96619.0,774.5,1547936.0,1.0
max,119299.0,844.0,1979072.0,1.0


In [36]:
df.shape

(100, 5)

In [37]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Income          100 non-null    int64 
 1   CreditScore     100 non-null    int64 
 2   EmploymentType  100 non-null    object
 3   LoanAmount      100 non-null    int64 
 4   LoanApproved    100 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 4.0+ KB


In [38]:
#Features & Target
X=df.drop('LoanApproved',axis=1)
y=df['LoanApproved']

In [39]:
#Train Test Spilt
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [40]:
# Preprocessing
numeric_features = ['Income',	'CreditScore','LoanAmount']
categorical_features = ['EmploymentType']
preprocessor = ColumnTransformer(transformers=[('num', SimpleImputer(strategy='mean'), numeric_features),
                                               ('cat',OneHotEncoder(handle_unknown='ignore'), categorical_features)])

In [41]:
#create the pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('scaler', StandardScaler()),
                           ('classifier', LogisticRegression())])

In [42]:
pipeline.fit(X_train, y_train)

In [43]:
y_pred = pipeline.predict(X_test)
print('Actual values',y_test.values)
print('Predicted values',y_pred)

Actual values [0 0 1 1 0 1 1 1 1 1 1 1 0 1 1 1 0 0 0 1]
Predicted values [0 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1]


In [49]:
confusion_matrix=confusion_matrix(y_test,y_pred)
print(confusion_matrix)

[[ 2  5]
 [ 1 12]]


In [50]:
accuracy_score=accuracy_score(y_test,y_pred)
print(accuracy_score)


0.7


In [57]:
#predict for a new customer
income=int(input('Enter income'))
credit_score=int(input('Enter credit score'))
loan_amount=int(input('Enter loan amount'))
employment_type=input('Enter employment type')
new_customer=pd.DataFrame({'Income':[income],'CreditScore':[credit_score],'LoanAmount':[loan_amount],'EmploymentType':[employment_type]})
prediction=pipeline.predict(new_customer)
print(prediction)
if prediction[0]==1:
  print('Loan Approved')
else:
  print('Loan Not Approved')


Enter income990
Enter credit score789
Enter loan amount4566
Enter employment typeBusiness
[1]
Loan Approved


* Use: employee_promotion_decision_tree.csv from gitHub: https://github.com/salman1256/aiml_dec_2025/blob/master/Day-12/employee_promotion_decision_tree.csv
* Your task is to predict whether an employee will get a promotion base on following features using ML-pipeline
* Experience_Years
* Performance_Rating
* Training_Hours