In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, make_scorer
from sklearn.preprocessing import LabelEncoder

class RandomForestTuner:
    def __init__(self, objective_function, param_grid, cv=5):
        self.objective_function = objective_function
        self.param_grid = param_grid
        self.cv = cv

    def tune(self):
        clf = RandomForestClassifier()
        grid_search = RandomizedSearchCV(clf, self.param_grid, cv=self.cv, scoring=make_scorer(accuracy_score))
        grid_search.fit(X_train, y_train)
        best_hyperparameters = grid_search.best_params_
        best_accuracy = grid_search.best_score_
        return best_hyperparameters, best_accuracy

def preprocess_data(data):
    # Convert categorical columns to numerical using Label Encoding
    label_encoder = LabelEncoder()
    categorical_cols = ['Gender', 'Married', 'Education', 'Self_Employed', 'Property_Area']
    for col in categorical_cols:
        data[col] = label_encoder.fit_transform(data[col])

    # Replace '3+' in Dependents column with 3
    data['Dependents'] = data['Dependents'].replace('3+', 3)

    return data

def objective_function(**hyperparameters):
    max_depth = hyperparameters['max_depth']
    n_estimators = hyperparameters['n_estimators']
    min_samples_split = hyperparameters['min_samples_split']

    clf = RandomForestClassifier(max_depth=max_depth, n_estimators=n_estimators, min_samples_split=min_samples_split)
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    return accuracy

if __name__ == "__main__":
    # Load dataset
    data = pd.read_csv('loan.csv')

    # Preprocess data
    data = data.dropna()
    data = preprocess_data(data)

    # Define features and target
    X = data.drop(columns=['Loan_Status', 'Loan_ID'])
    y = data['Loan_Status']

    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Define hyperparameter grid for Random Forest
    param_grid = {
        'n_estimators': [50, 100, 150, 200],
        'max_depth': [3, 5, 7, 9, 11],
        'min_samples_split': [2, 5, 10]
    }

    # Hyperparameter tuning
    tuner = RandomForestTuner(objective_function, param_grid)
    best_hyperparameters, best_accuracy = tuner.tune()

    print("Best Hyperparameters:", best_hyperparameters)
    print("Best Accuracy:", best_accuracy)


In [None]:


def predict_loan_status(model, input_data):
    # Load the input data into a DataFrame
    input_df = pd.DataFrame(input_data, index=[0])

    # Preprocess the input data (similar to how you preprocessed the training data)
    input_df.replace({'Married': {'No': 0, 'Yes': 1},
                      'Gender': {'Male': 1, 'Female': 0},
                      'Education': {'Graduate': 1, 'Not Graduate': 0},
                      'Self_Employed': {'No': 0, 'Yes': 1},
                      'Property_Area': {'Rural': 2, 'Semiurban': 1, 'Urban': 0},
                      'Dependents': {'3+': 4}}, inplace=True)

    # Make predictions
    prediction = model.predict(input_df)

    return prediction[0]  # Return the prediction for the first row

# Load dataset
data = pd.read_csv('loan.csv')

# dropping the missing values in train dataset
data = data.dropna()
# convert categorical columns to numerical values
data.replace({'Married': {'No': 0, 'Yes': 1},
              'Gender': {'Male': 1, 'Female': 0},
              'Education': {'Graduate': 1, 'Not Graduate': 0},
              'Self_Employed': {'No': 0, 'Yes': 1},
              'Loan_Status': {'Y': 1, 'N': 0},
              'Property_Area': {'Rural': 2, 'Semiurban': 1, 'Urban': 0},
              'Dependents': {'3+': 4}}, inplace=True)
# replacing the values of +3 to 4
data = data.replace(to_replace='3+', value=4)

# Preprocess data (e.g., handle missing values, encode categorical variables)
# For simplicity, we assume preprocessing has been done and features/target are ready to use
X = data.drop(columns=['Loan_Status', 'Loan_ID'])  # Features
y = data['Loan_Status']  # Target

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train Random Forest classifier
clf = RandomForestClassifier(max_depth=5, n_estimators=100, min_samples_split=2)
clf.fit(X_train, y_train)

# Input data from user
input_data = {
    'Gender': input("Enter gender (Male/Female): "),
    'Married': input("Are you married? (Yes/No): "),
    'Dependents': input("Enter number of dependents: "),
    'Education': input("Are you a graduate? (Graduate/Not Graduate): "),
    'Self_Employed': input("Are you self-employed? (Yes/No): "),
    'ApplicantIncome': float(input("Enter applicant income: ")),
    'CoapplicantIncome': float(input("Enter coapplicant income: ")),
    'LoanAmount': float(input("Enter loan amount: ")),
    'Loan_Amount_Term': float(input("Enter loan amount term: ")),
    'Credit_History': float(input("Enter credit history (1 for Yes, 0 for No): ")),
    'Property_Area': input("Enter property area (Rural/Semiurban/Urban): ")
}

# Predict loan status
prediction = predict_loan_status(clf, input_data)
print("Predicted Loan Status:", prediction)

if prediction== 1:
    print("Congratulations! Your loan is likely to be approved.")
else:
    print("Sorry, your loan is likely to be rejected.")

Enter gender (Male/Female): 1
Are you married? (Yes/No): 2
Enter number of dependents: 1
Are you a graduate? (Graduate/Not Graduate): 1
Are you self-employed? (Yes/No): 1
Enter applicant income: 1
Enter coapplicant income: 1
Enter loan amount: 1
Enter loan amount term: 1
Enter credit history (1 for Yes, 0 for No): 1
Enter property area (Rural/Semiurban/Urban): 1
Predicted Loan Status: 0
Sorry, your loan is likely to be rejected.
