In [None]:
import pandas as pd
import os

# Step 1: Load the previously cleaned dataset
cleaned_file_csv = '/Users/peterscheinsohn/Downloads/loans_club_cleaned.csv'

# Ensure the file exists
if not os.path.exists(cleaned_file_csv):
    print(f"Error: The file {cleaned_file_csv} does not exist.")
    exit()

# Load the dataset
df = pd.read_csv(cleaned_file_csv, low_memory=False)

# Step 2: Replace null values with the median for numeric columns
numeric_columns = df.select_dtypes(include=[float, int]).columns
for col in numeric_columns:
    df[col].fillna(df[col].median(), inplace=True)

# Step 3: Replace null values with "unknown" for object columns
object_columns = df.select_dtypes(include=[object]).columns
for col in object_columns:
    df[col].fillna("unknown", inplace=True)

# Step 4: Create a new column avg_fico_score by combining fico_range_low and fico_range_high
df['avg_fico_score'] = (df['fico_range_low'] + df['fico_range_high']) / 2

# Step 5: Save the cleaned dataset to a new CSV file in the Downloads folder
double_cleaned_file_csv = '/Users/peterscheinsohn/Downloads/loans_double_cleaned.csv'
df.to_csv(double_cleaned_file_csv, index=False)

print(f"Double cleaned dataset saved to {double_cleaned_file_csv}")