In [6]:
# ✅ STEP 1: Install & Import Required Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# ✅ STEP 2: Upload the dataset in Google Colab
from google.colab import files
uploaded = files.upload()

# Load the uploaded CSV
df = pd.read_csv("/content/ecommerce_returns_synthetic_data.csv")

# ✅ STEP 3: Data Cleaning & Feature Engineering
df["Order_Date"] = pd.to_datetime(df["Order_Date"])
df["Return_Date"] = pd.to_datetime(df["Return_Date"])
df["Is_Returned"] = df["Return_Status"].apply(lambda x: 1 if x == "Returned" else 0)
df["Days_to_Return"] = df["Days_to_Return"].fillna(0)

# Drop unnecessary columns
df_model = df.drop(columns=[
    "Order_ID", "Product_ID", "User_ID", "Order_Date", "Return_Date", "Return_Status", "Return_Reason"
])

# ✅ STEP 4: Prepare data for model
X = df_model.drop(columns=["Is_Returned"])
y = df_model["Is_Returned"]

# Identify feature types
categorical_cols = X.select_dtypes(include="object").columns.tolist()
numeric_cols = X.select_dtypes(include=["int64", "float64"]).columns.tolist()

# Preprocessing pipeline
preprocessor = ColumnTransformer([
    ("num", StandardScaler(), numeric_cols),
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols),
])

pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("classifier", LogisticRegression(max_iter=1000))
])

# ✅ STEP 5: Train/test split and model training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
pipeline.fit(X_train, y_train)

# ✅ STEP 6: Predict return probabilities
return_probs = pipeline.predict_proba(X)[:, 1]
df["Return_Probability"] = return_probs

# (DO NOT export yet – see next script)
print("✅ Prediction complete. 'Return_Probability' column added.")


Saving ecommerce_returns_synthetic_data.csv to ecommerce_returns_synthetic_data (1).csv
✅ Prediction complete. 'Return_Probability' column added.


In [7]:
# ✅ Export products with high return risk (e.g., probability > 0.7)
high_risk_df = df[df["Return_Probability"] > 0.7]

# Save to CSV
high_risk_df.to_csv("high_risk_products.csv", index=False)

# Download the file (Colab)
from google.colab import files
files.download("high_risk_products.csv")

print("✅ High-risk products exported and ready to download.")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ High-risk products exported and ready to download.
