In [5]:
import pandas as pd
import joblib

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# --------------------
# Load data
# --------------------
df = pd.read_csv("../data/data.csv")

# Encode categorical column (as in notebook)
df["gender"] = pd.factorize(df["gender"])[0]

# Drop missing values
df = df.dropna()

# --------------------
# Split features and target
# --------------------
X = df.drop(columns=["sno", "target"])
y = df["target"]


X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

# --------------------
# Train model
# --------------------
model = LogisticRegression(
    solver="liblinear",
    max_iter=1000,
    class_weight="balanced"
)
model.fit(X_train, y_train)

# --------------------
# Evaluate
# --------------------
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Validation Accuracy: {accuracy:.4f}")

# --------------------
# Save model
# --------------------
joblib.dump(model, "../api/model.joblib")
print("Model saved as model.joblib")


Validation Accuracy: 0.8475
Model saved as model.joblib
