In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Read the CSV file
df = pd.read_csv('phone_buyers.csv')

# Create label encoders for categorical variables
le_income = LabelEncoder()
le_criminal = LabelEncoder()
le_exp = LabelEncoder()
le_loan = LabelEncoder()

# Transform categorical variables to numerical
df['Income_encoded'] = le_income.fit_transform(df['Income (in K)'])
df['Criminal_encoded'] = le_criminal.fit_transform(df['Criminal Record'])
df['EXP_encoded'] = le_exp.fit_transform(df['EXP'])
df['Loan_encoded'] = le_loan.fit_transform(df['Loan Approved?'])

def calculate_probability(df, income, criminal, exp):
    total_records = len(df)
    
    # Prior probability P(Yes) and P(No)
    p_yes = len(df[df['Loan Approved?'] == 'Yes']) / total_records
    p_no = len(df[df['Loan Approved?'] == 'No']) / total_records
    
    # Calculate conditional probabilities for Yes
    p_income_yes = len(df[(df['Income (in K)'] == income) & (df['Loan Approved?'] == 'Yes')]) / len(df[df['Loan Approved?'] == 'Yes'])
    p_criminal_yes = len(df[(df['Criminal Record'] == criminal) & (df['Loan Approved?'] == 'Yes')]) / len(df[df['Loan Approved?'] == 'Yes'])
    p_exp_yes = len(df[(df['EXP'] == exp) & (df['Loan Approved?'] == 'Yes')]) / len(df[df['Loan Approved?'] == 'Yes'])
    
    # Calculate conditional probabilities for No
    p_income_no = len(df[(df['Income (in K)'] == income) & (df['Loan Approved?'] == 'No')]) / len(df[df['Loan Approved?'] == 'No'])
    p_criminal_no = len(df[(df['Criminal Record'] == criminal) & (df['Loan Approved?'] == 'No')]) / len(df[df['Loan Approved?'] == 'No'])
    p_exp_no = len(df[(df['EXP'] == exp) & (df['Loan Approved?'] == 'No')]) / len(df[df['Loan Approved?'] == 'No'])
    
    # Calculate final probabilities using Naive Bayes
    p_yes_final = p_yes * p_income_yes * p_criminal_yes * p_exp_yes
    p_no_final = p_no * p_income_no * p_criminal_no * p_exp_no
    
    # Normalize probabilities
    total = p_yes_final + p_no_final
    p_yes_final = p_yes_final / total
    p_no_final = p_no_final / total
    
    return p_yes_final, p_no_final

# Test case: Income: 30-70, Criminal Record: Yes, EXP: >5
income = '30-70'
criminal = 'Yes'
exp = '>5'

p_yes, p_no = calculate_probability(df, income, criminal, exp)

print(f"\nProbabilities for Income: {income}, Criminal Record: {criminal}, EXP: {exp}")
print(f"Probability of Loan Approval (Yes): {p_yes:.4f}")
print(f"Probability of Loan Rejection (No): {p_no:.4f}")

if p_yes > p_no:
    print("\nPrediction: Loan will be APPROVED")
else:
    print("\nPrediction: Loan will be REJECTED")


Probabilities for Income: 30-70, Criminal Record: Yes, EXP: >5
Probability of Loan Approval (Yes): 0.7500
Probability of Loan Rejection (No): 0.2500

Prediction: Loan will be APPROVED
