# Advanced Model - Random Forest
This notebook creates an improved Random Forest model for loan approval prediction.

In [5]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score

In [6]:
# Load the cleaned data
import os
# Get the project root directory (parent of notebooks)
project_root = os.path.dirname(os.getcwd())
data_path = os.path.join(project_root, 'data', 'processed', 'cleaned.csv')
df = pd.read_csv(data_path)
X, y = df.drop('loan_approved', axis=1), df['loan_approved']
print(f"Dataset shape: {df.shape}")
print(f"Features: {list(X.columns)}")
print(f"\nCategorical columns: {X.select_dtypes(include=['object']).columns.tolist()}")
print(f"Numerical columns: {X.select_dtypes(include=['int64', 'float64']).columns.tolist()}")

Dataset shape: (2000, 7)
Features: ['city', 'income', 'credit_score', 'loan_amount', 'years_employed', 'points']

Categorical columns: ['city']
Numerical columns: ['income', 'credit_score', 'loan_amount', 'years_employed', 'points']


In [7]:
# Improved model - Random Forest with preprocessing
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

# Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

# Create preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numerical_cols),
        ('cat', OneHotEncoder(drop='first'), categorical_cols)
    ])

# Create pipeline with preprocessing and Random Forest
from sklearn.pipeline import Pipeline
rf_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('clf', RandomForestClassifier(n_estimators=100, random_state=42))
])

rf_pipeline.fit(X, y)
pred = rf_pipeline.predict(X)

print(f"RF Accuracy: {accuracy_score(y, pred)}")
print(f"RF F1: {f1_score(y, pred)}")

RF Accuracy: 1.0
RF F1: 1.0


In [8]:
# Save the Random Forest pipeline
model_path = os.path.join(project_root, 'models', 'randomforest.pkl')
joblib.dump(rf_pipeline, model_path)
print(f"Random Forest pipeline saved to {model_path}")

Random Forest pipeline saved to m:\projects\repos\loan-approval-project-mlfinal\models\randomforest.pkl
