# E-commerce Return Rate Reduction - Python Analysis
This notebook performs data cleaning, exploratory analysis, and builds a logistic regression model on e-commerce return data.

In [None]:
# 📦 Required Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.preprocessing import LabelEncoder

In [None]:
# 📥 Step 1: Load Dataset
df = pd.read_csv("ecommerce_returns_500rows.csv")

In [None]:
# 🧹 Step 2: Data Cleaning
df['order_date'] = pd.to_datetime(df['order_date'])
df['return_date'] = pd.to_datetime(df['return_date'])
data = df.copy()

In [None]:
# 📊 Step 3: Exploratory Data Analysis (EDA)
category_returns = data.groupby('category')['is_returned'].mean().sort_values(ascending=False)
print("Category-wise return rate:\n", category_returns)

supplier_returns = data.groupby('supplier')['is_returned'].mean().sort_values(ascending=False)
print("\nSupplier-wise return rate:\n", supplier_returns)

sns.barplot(x=category_returns.index, y=category_returns.values)
plt.title("Return Rate by Category")
plt.ylabel("Return %")
plt.xticks(rotation=45)
plt.show()

In [None]:
# 🧠 Step 4: Encode Categorical Variables
label_cols = ['category', 'supplier', 'region', 'channel']
le = LabelEncoder()
for col in label_cols:
    data[col] = le.fit_transform(data[col])

In [None]:
# 🎯 Step 5: Features & Target
X = data[['category', 'supplier', 'price', 'quantity', 'region', 'channel']]
y = data['is_returned']

In [None]:
# 🪜 Step 6: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# 🧪 Step 7: Logistic Regression Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

In [None]:
# 📈 Step 8: Prediction & Evaluation
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("ROC AUC Score:", roc_auc_score(y_test, y_proba))