<a href="https://colab.research.google.com/github/sakshishinde075/Predicting-Loan-Approval-with-R-/blob/main/Loan_Approval.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1) Install & load packages (will install caret if missing)
packages <- c("caret","class","ggplot2","dplyr","readr","caTools","e1071","recipes")
to_install <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(to_install)) {
  install.packages(to_install, repos = "https://cloud.r-project.org", dependencies = TRUE)
}
invisible(lapply(packages, require, character.only = TRUE))

# 2) (Optional) If you don't have a dataset, create a small synthetic one:
if(!file.exists("loan_data.csv")) {
  set.seed(123)
  n <- 600
  data <- data.frame(
    Gender = sample(c("Male","Female"), n, replace = TRUE),
    Married = sample(c("Yes","No"), n, replace = TRUE),
    Education = sample(c("Graduate","Not Graduate"), n, replace = TRUE),
    Self_Employed = sample(c("Yes","No"), n, replace = TRUE),
    ApplicantIncome = round(rnorm(n, 5000, 2000)),
    CoapplicantIncome = round(rnorm(n, 1500, 900)),
    LoanAmount = round(rnorm(n, 140, 50)),
    Loan_Amount_Term = sample(c(360, 120, 180, 240), n, replace = TRUE),
    Credit_History = sample(c(1,0), n, replace = TRUE, prob = c(0.8,0.2)),
    Property_Area = sample(c("Urban","Rural","Semiurban"), n, replace = TRUE),
    Loan_Status = sample(c("Y","N"), n, replace = TRUE, prob = c(0.7,0.3))
  )
} else {
  data <- read.csv("loan_data.csv", stringsAsFactors = FALSE)
}

# 3) Basic preprocessing / imputation
get_mode <- function(x){
  x <- x[!is.na(x)]
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

# Impute numeric with median
num_cols <- names(data)[sapply(data, is.numeric)]
for(col in num_cols){
  data[[col]][is.na(data[[col]])] <- median(data[[col]], na.rm = TRUE)
}

# Impute categorical with mode and convert to factor
cat_cols <- names(data)[sapply(data, function(x) is.character(x) || is.factor(x))]
for(col in cat_cols){
  data[[col]][is.na(data[[col]])] <- get_mode(data[[col]])
  data[[col]] <- as.factor(data[[col]])
}

# Ensure target is factor
data$Loan_Status <- as.factor(data$Loan_Status)

# Feature engineering (optional)
if(all(c("ApplicantIncome","CoapplicantIncome") %in% names(data))){
  data$Total_Income <- data$ApplicantIncome + data$CoapplicantIncome
}

# 4) Prepare predictors for KNN (KNN needs numeric inputs)
predictors <- setdiff(names(data), c("Loan_Status","Loan_ID")) # drop Loan_ID if exists
# Use caret's dummyVars to convert factor -> numeric dummies (safe)
dmy <- caret::dummyVars(~ ., data = data[, predictors], fullRank = TRUE)
X <- as.data.frame(predict(dmy, newdata = data[, predictors]))

# Scale numeric columns
X_scaled <- as.data.frame(scale(X))

# 5) Train-test split
set.seed(123)
if("createDataPartition" %in% ls("package:caret")) {
  trainIndex <- caret::createDataPartition(data$Loan_Status, p = 0.8, list = FALSE)
  train_x <- X_scaled[trainIndex, ]
  test_x  <- X_scaled[-trainIndex, ]
  train_y <- data$Loan_Status[trainIndex]
  test_y  <- data$Loan_Status[-trainIndex]
} else {
  library(caTools)
  sp <- sample.split(data$Loan_Status, SplitRatio = 0.8)
  train_x <- X_scaled[sp, ]
  test_x  <- X_scaled[!sp, ]
  train_y <- data$Loan_Status[sp]
  test_y  <- data$Loan_Status[!sp]
}

# 6) Fit KNN (k = 5 by default) and evaluate
library(class)
k_val <- 5
knn_pred <- knn(train = train_x, test = test_x, cl = train_y, k = k_val)

if("confusionMatrix" %in% ls("package:caret")) {
  print(caret::confusionMatrix(knn_pred, test_y))
} else {
  print(table(Predicted = knn_pred, Actual = test_y))
  acc <- mean(knn_pred == test_y)
  cat("Accuracy:", round(acc, 4), "\n")
}

# 7) Tuning k with caret (if available)
if("train" %in% ls("package:caret")) {
  set.seed(123)
  ctrl <- trainControl(method = "cv", number = 5)
  knn_cv <- train(x = train_x, y = train_y,
                  method = "knn",
                  trControl = ctrl,
                  tuneLength = 10)
  print(knn_cv)
  plot(knn_cv)
} else {
  cat("caret::train not available — skipping caret-based tuning (but main KNN already ran).\n")
}


Installing packages into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘rbibutils’, ‘R.methodsS3’, ‘R.oo’, ‘R.utils’, ‘Rdpack’, ‘shape’, ‘future.apply’, ‘numDeriv’, ‘progressr’, ‘SQUAREM’, ‘R.cache’, ‘TH.data’, ‘abind’, ‘listenv’, ‘parallelly’, ‘profileModel’, ‘minqa’, ‘nloptr’, ‘reformulas’, ‘lazyeval’, ‘plotrix’, ‘diagram’, ‘lava’, ‘styler’, ‘classInt’, ‘labelled’, ‘gplots’, ‘libcoin’, ‘matrixStats’, ‘multcomp’, ‘DEoptimR’, ‘magic’, ‘lpSolve’, ‘linprog’, ‘RcppProgress’, ‘future’, ‘warp’, ‘iterators’, ‘brglm’, ‘gtools’, ‘lme4’, ‘qvcalc’, ‘rex’, ‘Formula’, ‘plotmo’, ‘prodlim’, ‘combinat’, ‘questionr’, ‘ROCR’, ‘mvtnorm’, ‘modeltools’, ‘strucchange’, ‘coin’, ‘zoo’, ‘sandwich’, ‘ROSE’, ‘robustbase’, ‘sfsmisc’, ‘geometry’, ‘BH’, ‘DiceDesign’, ‘sfd’, ‘globals’, ‘furrr’, ‘slider’, ‘RcppEigen’, ‘RcppArmadillo’, ‘modelenv’, ‘foreach’, ‘ModelMetrics’, ‘plyr’, ‘pROC’, ‘reshape2’, ‘BradleyTerry2’, ‘covr’, ‘Cubist’, ‘earth’, ‘ellipse’, ‘fastICA’, ‘gam’