In [None]:
# import os
# import pandas as pd
# import numpy as np
# import psycopg2
# import pickle
# from dotenv import load_dotenv
# from sqlalchemy import create_engine
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.metrics import classification_report, accuracy_score
# from imblearn.over_sampling import SMOTE

# # ✅ Load environment variables
# load_dotenv()

# # ✅ Database Connection
# DATABASE_URL = os.getenv("DATABASE_URL")
# engine = create_engine(DATABASE_URL)

# def fetch_data():
#     """Fetch data dynamically from PostgreSQL."""
#     conn = engine.connect()
#     query = """
#         SELECT user_id, user_name, user_email, last_order_date, total_orders, total_spent,
#                avg_order_frequency, last_login_date, total_logins, total_time_spent,
#                avg_time_per_session, abandoned_cart_count, product_id, product_name, 
#                product_price FROM user_full_dataset;
#     """
#     df = pd.read_sql(query, conn)
#     conn.close()
#     return df

# def preprocess_data(df):
#     """Preprocess the dataset dynamically."""
#     df["last_order_date"] = pd.to_datetime(df["last_order_date"])
#     df["last_login_date"] = pd.to_datetime(df["last_login_date"])
#     today = pd.Timestamp.today()
#     df["recency_days"] = (today - df["last_order_date"]).dt.days.fillna(999).astype(int)
#     df["days_since_last_login"] = (today - df["last_login_date"]).dt.days.fillna(999).astype(int)
#     churn_threshold = 90
#     df["churn"] = (df["recency_days"] > churn_threshold).astype(int)
#     df = df.drop(columns=["last_order_date", "last_login_date", "user_name", "user_email", "product_name", "product_id", "product_price"])
#     return df

# def train_model():
#     """Train the model dynamically whenever data updates."""
#     df = fetch_data()
#     df = preprocess_data(df)
#     X = df.drop(columns=["churn", "user_id"])
#     y = df["churn"]
#     X = X.apply(pd.to_numeric, errors='coerce').fillna(0)
#     if len(y.unique()) > 1 and y.value_counts().min() >= 2:
#         smote = SMOTE(random_state=42, k_neighbors=min(2, y.value_counts().min() - 1))
#         X, y = smote.fit_resample(X, y)
#     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
#     scaler = StandardScaler()
#     X_train = scaler.fit_transform(X_train)
#     X_test = scaler.transform(X_test)
#     model = RandomForestClassifier(random_state=42)
#     model.fit(X_train, y_train)
#     y_pred = model.predict(X_test)
#     print(f"\n✅ Model Accuracy: {accuracy_score(y_test, y_pred):.2f}")
#     print("\nClassification Report:\n", classification_report(y_test, y_pred))
#     return model, scaler
# with open("customer_churn_model.pkl", "wb") as model_file:
#     pickle.dump(model, model_file)

# # Save the scaler (important for preprocessing new data)
# with open("scaler.pkl", "wb") as scaler_file:
#     pickle.dump(scaler, scaler_file)

# print("✅ Model and scaler saved successfully!")
# model, scaler = train_model()
import os
import pickle
import numpy as np
import pandas as pd
import psycopg2
from dotenv import load_dotenv
from sqlalchemy import create_engine
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from flask import Flask, request, jsonify

# ✅ Load environment variables
load_dotenv()
DATABASE_URL = os.getenv("DATABASE_URL")

# ✅ Database Connection
engine = create_engine(DATABASE_URL)

def fetch_data():
    """Fetches customer data from PostgreSQL."""
    query = """
        SELECT user_id, total_orders, total_spent, avg_order_frequency,
               total_logins, total_time_spent, avg_time_per_session,
               abandoned_cart_count, recency_days 
        FROM user_full_dataset;
    """
    df = pd.read_sql(query, engine)
    return df

def preprocess_data(df):
    """Preprocesses data for model training."""
    # Handle missing values
    df.fillna(0, inplace=True)

    # Label Churn: Customers who haven't ordered/logged in for 60+ days
    df["churn"] = (df["recency_days"] > 60).astype(int)

    # Features for model training
    features = ["total_orders", "total_spent", "avg_order_frequency",
                "total_logins", "total_time_spent", "avg_time_per_session",
                "abandoned_cart_count", "recency_days"]

    X = df[features]
    y = df["churn"]

    return X, y

# ✅ Fetch & Preprocess Data
df = fetch_data()
X, y = preprocess_data(df)

# ✅ Handle Class Imbalance
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# ✅ Standardize Features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_resampled)

# ✅ Train Model
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_resampled, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# ✅ Evaluate Model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# ✅ Save Model & Scaler
with open("churn_model.pkl", "wb") as f:
    pickle.dump(model, f)
with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

print("✅ Model and scaler saved successfully!")

# ✅ Create Flask API for Predictions
app = Flask(__name__)

@app.route("/predict-churn", methods=["POST"])
def predict_churn():
    try:
        data = request.get_json()
        df = pd.DataFrame(data)

        # Load scaler
        with open("scaler.pkl", "rb") as f:
            scaler = pickle.load(f)

        # Load model
        with open("churn_model.pkl", "rb") as f:
            model = pickle.load(f)

        # Select required features
        features = ["total_orders", "total_spent", "avg_order_frequency",
                    "total_logins", "total_time_spent", "avg_time_per_session",
                    "abandoned_cart_count", "recency_days"]
        df = df[features]

        # Standardize data
        df_scaled = scaler.transform(df)

        # Predict churn
        predictions = model.predict(df_scaled)
        return jsonify({"predictions": predictions.tolist()})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

if __name__ == "__main__":
    app.run(debug=True, port=5000)


  df.fillna(0, inplace=True)


TypeError: float() argument must be a string or a real number, not 'Timedelta'