In [1]:
import pandas as pd
import numpy as np

# -----------------------------
# Load raw dataset
# -----------------------------
df = pd.read_csv("Loan_Data.csv")

# -----------------------------
# Drop ID column
# -----------------------------
df.drop(columns=["Loan_ID"], inplace=True)

# -----------------------------
# Handle Missing Values
# -----------------------------
# Numerical columns
num_cols = [
    "ApplicantIncome", "CoapplicantIncome",
    "LoanAmount", "Loan_Amount_Term", "Credit_History"
]

for col in num_cols:
    df[col].fillna(df[col].median(), inplace=True)

# Categorical columns
cat_cols = [
    "Gender", "Married", "Dependents",
    "Education", "Self_Employed", "Property_Area"
]

for col in cat_cols:
    df[col].fillna(df[col].mode()[0], inplace=True)

# -----------------------------
# Encode Binary Columns
# -----------------------------
df["Gender"] = df["Gender"].map({"Male": 1, "Female": 0})
df["Married"] = df["Married"].map({"Yes": 1, "No": 0})
df["Education"] = df["Education"].map({"Graduate": 1, "Not Graduate": 0})
df["Self_Employed"] = df["Self_Employed"].map({"Yes": 1, "No": 0})

# -----------------------------
# Dependents (convert "3+" → 3)
# -----------------------------
df["Dependents"] = df["Dependents"].replace("3+", 3).astype(int)

# -----------------------------
# Property Area Encoding
# -----------------------------
df["Property_Area"] = df["Property_Area"].map({
    "Rural": 0,
    "Semiurban": 1,
    "Urban": 2
})

# -----------------------------
# Target Variable
# -----------------------------
df["Loan_Status"] = df["Loan_Status"].map({"Y": 1, "N": 0})

# -----------------------------
# Save cleaned dataset
# -----------------------------
df.to_csv("converted_credit.csv", index=False)

print("✅ Dataset cleaned and saved as converted_credit.csv")
print(df.head())


✅ Dataset cleaned and saved as converted_credit.csv
   Gender  Married  Dependents  Education  Self_Employed  ApplicantIncome  \
0       1        0           0          1              0             5849   
1       1        1           1          1              0             4583   
2       1        1           0          1              1             3000   
3       1        1           0          0              0             2583   
4       1        0           0          1              0             6000   

   CoapplicantIncome  LoanAmount  Loan_Amount_Term  Credit_History  \
0                0.0       128.0             360.0             1.0   
1             1508.0       128.0             360.0             1.0   
2                0.0        66.0             360.0             1.0   
3             2358.0       120.0             360.0             1.0   
4                0.0       141.0             360.0             1.0   

   Property_Area  Loan_Status  
0              2            1  


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values