In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Generate synthetic data (1000 users)
np.random.seed(42)
n = 1000

data = {
    'Administrative': np.random.randint(0, 5, n),
    'Administrative_Duration': np.random.uniform(0, 200, n),
    'Informational': np.random.randint(0, 5, n),
    'Informational_Duration': np.random.uniform(0, 100, n),
    'ProductRelated': np.random.randint(1, 50, n),
    'ProductRelated_Duration': np.random.uniform(10, 3000, n),
    'BounceRates': np.random.uniform(0.01, 0.2, n),
    'ExitRates': np.random.uniform(0.01, 0.3, n),
    'PageValues': np.random.uniform(0, 50, n),
    'SpecialDay': np.random.uniform(0, 1, n),
}

# Step 2: Create a target variable (simulate logic)
df = pd.DataFrame(data)
df['Revenue'] = ((df['PageValues'] > 20) & (df['ProductRelated_Duration'] > 500)).astype(int)

# Step 3: Split features and target
X = df.drop('Revenue', axis=1)
y = df['Revenue']

# Step 4: Preprocessing
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 5: Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=1)

# Step 6: Train model
model = RandomForestClassifier(n_estimators=100, random_state=1)
model.fit(X_train, y_train)

# Step 7: Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Step 8: Predict on new user behavior
sample = np.array([[1, 50, 0, 10, 30, 1800, 0.05, 0.1, 25, 0.3]])
sample_scaled = scaler.transform(sample)
print("New Customer Purchase Prediction:", model.predict(sample_scaled)[0])