In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv("loaners.csv")

# Display the first few rows
df.head()

In [None]:
# Check dataset information
df.info()

# Show basic statistics for numerical columns
df.describe()

In [None]:
# Check for missing values
missing_values = df.isnull().sum()
missing_values[missing_values > 0]  # Show only columns with missing values

In [None]:
# Check for zero or negative values in key financial columns
financial_cols = ["Loan Amount", "Funded Amount", "Funded Amount Investor", 
                  "Total Received Interest", "Total Received Late Fee", 
                  "Recoveries", "Collection Recovery Fee"]

(df[financial_cols] <= 0).sum()

In [None]:
# Check unique values in some categorical columns
categorical_cols = ["Employment Duration", "Home Ownership", "Verification Status", 
                    "Loan Status", "Application Type"]

for col in categorical_cols:
    print(f"Unique values in {col}:")
    print(df[col].unique())
    print("-" * 40)

In [None]:
# Fix column mismatch by swapping values
df["Fixed Home Ownership"] = df["Employment Duration"]
df["Fixed Employment Duration"] = df["Home Ownership"]

# Drop old incorrect columns and rename new ones
df.drop(columns=["Employment Duration", "Home Ownership"], inplace=True)
df.rename(columns={"Fixed Home Ownership": "Home Ownership",
                   "Fixed Employment Duration": "Employment Duration"}, inplace=True)

# Check unique values again
print("Unique values in Home Ownership (after fix):", df["Home Ownership"].unique())
print("Unique values in Employment Duration (after fix):", df["Employment Duration"].unique())


In [None]:
# Check the first 20 unique values in Employment Duration
df["Employment Duration"].unique()[:20]

In [None]:
# Check unique values for all object (text) columns
for col in df.select_dtypes(include=["object"]).columns:
    print(f"Unique values in {col}:")
    print(df[col].unique()[:20])  # Show first 20 unique values
    print("-" * 40)
