ValueError: could not convert string to float: '3+'

In [2]:
# train_and_save.py
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import pickle

# 1. Read & basic cleaning (adjust filename if needed)
df = pd.read_csv("loan status.csv")
df = df.dropna()   # or do more careful imputation if you prefer

# convert '3+' -> 4 (you used this earlier)
if 'Dependents' in df.columns:
    df['Dependents'] = df['Dependents'].replace('3+', '4').astype(int)

# ensure numeric columns are numeric
for col in ['ApplicantIncome','CoapplicantIncome','LoanAmount','Loan_Amount_Term','Credit_History']:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

df = df.dropna()  # remove rows that turned NaN after coercion

# 2. Label encode categorical cols and save mappings
categorical_cols = ['Gender','Married','Education','Self_Employed','Property_Area']
encoders = {}
for col in categorical_cols:
    if col in df.columns:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col].astype(str))
        # save mapping: original_value -> encoded_int
        encoders[col] = {str(cls): int(code) for cls, code in zip(le.classes_, le.transform(le.classes_))}

# encode target and save mapping
le_target = LabelEncoder()
df['Loan_Status'] = le_target.fit_transform(df['Loan_Status'].astype(str))
target_mapping = {str(cls): int(code) for cls, code in zip(le_target.classes_, le_target.transform(le_target.classes_))}

# 3. Prepare X, y and train
X = df.drop(columns=['Loan_ID','Loan_Status'])
y = df['Loan_Status']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# 4. Save artifacts
with open('logistic_model.pkl', 'wb') as f:
    pickle.dump(model, f)

with open('model_columns.pkl', 'wb') as f:
    pickle.dump(list(X.columns), f)   # ordered list of columns

with open('encoders.pkl', 'wb') as f:
    pickle.dump(encoders, f)

with open('target_mapping.pkl', 'wb') as f:
    pickle.dump(target_mapping, f)

print("Saved: logistic_model.pkl, model_columns.pkl, encoders.pkl, target_mapping.pkl")


Saved: logistic_model.pkl, model_columns.pkl, encoders.pkl, target_mapping.pkl
