# E-commerce Behavior Analysis: Main Analysis

This notebook implements the core components of our e-commerce analysis system, including:
1. Recommendation System
2. User Segmentation
3. A/B Testing
4. Advanced Analytics

In [4]:
# from google.colab import drive
# drive.mount('/content/drive')

In [5]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from scipy import stats
import plotly.express as px
import plotly.graph_objects as go

# Import custom modules
import sys
sys.path.append('../src')
from recommendation_system import RecommendationSystem
from user_segmentation import UserSegmentation
from ab_testing import ABTesting

## 1. Data Loading and Preparation

In [7]:
# Load the dataset
df = pd.read_csv('../data/raw/2019-Oct.csv')

# Display basic information
print("Dataset Shape:", df.shape)
df.head()

Dataset Shape: (42448764, 9)


Unnamed: 0,event_time,event_type,product_id,category_id,category_code,brand,price,user_id,user_session
0,2019-10-01 00:00:00 UTC,view,44600062,2103807459595387724,,shiseido,35.79,541312140,72d76fde-8bb3-4e00-8c23-a032dfed738c
1,2019-10-01 00:00:00 UTC,view,3900821,2053013552326770905,appliances.environment.water_heater,aqua,33.2,554748717,9333dfbd-b87a-4708-9857-6336556b0fcc
2,2019-10-01 00:00:01 UTC,view,17200506,2053013559792632471,furniture.living_room.sofa,,543.1,519107250,566511c2-e2e3-422b-b695-cf8e6e792ca8
3,2019-10-01 00:00:01 UTC,view,1307067,2053013558920217191,computers.notebook,lenovo,251.74,550050854,7c90fc70-0e80-4590-96f3-13c02c18c713
4,2019-10-01 00:00:04 UTC,view,1004237,2053013555631882655,electronics.smartphone,apple,1081.98,535871217,c6bd7419-2748-4c56-95b4-8cec9ff8b80d


## 2. Recommendation System Implementation

In [8]:
# Initialize and fit the recommendation system
rec_system = RecommendationSystem(use_gpu=True, batch_size=50000, timeout=7200)  # Adjust batch_size based on your memory

rec_system.fit(df)

# Get recommendations for a sample user
sample_user = df['user_id'].iloc[0]
recommendations = rec_system.get_hybrid_recommendations(sample_user, n_recommendations=5)
print(f"Recommendations for user {sample_user}:")
print(recommendations)

Initial memory usage: 0.36 GB
Creating mappings...
Creating user mappings...
Creating item mappings...
Processing data in batches...


Processing batches:   0%|          | 0/849 [00:00<?, ?it/s]

Memory usage after batch 0: 0.75 GB
Memory usage after batch 10: 0.74 GB
Memory usage after batch 20: 0.76 GB
Memory usage after batch 30: 0.71 GB
Memory usage after batch 40: 0.64 GB
Memory usage after batch 50: 0.57 GB
Memory usage after batch 60: 0.52 GB
Memory usage after batch 70: 0.52 GB
Memory usage after batch 80: 0.61 GB
Memory usage after batch 90: 0.62 GB
Memory usage after batch 100: 0.65 GB
Memory usage after batch 110: 0.66 GB
Memory usage after batch 120: 0.58 GB
Memory usage after batch 130: 0.66 GB
Memory usage after batch 140: 0.67 GB
Memory usage after batch 150: 0.64 GB
Memory usage after batch 160: 0.62 GB
Memory usage after batch 170: 0.66 GB
Memory usage after batch 180: 0.70 GB
Memory usage after batch 190: 0.62 GB
Memory usage after batch 200: 0.68 GB
Memory usage after batch 210: 0.62 GB
Memory usage after batch 220: 0.79 GB
Memory usage after batch 230: 0.72 GB
Memory usage after batch 240: 0.72 GB
Memory usage after batch 250: 0.74 GB
Memory usage after batc

Processing item chunks:   0%|          | 0/334 [00:00<?, ?it/s]

NameError: name 'start_time' is not defined

## 3. User Segmentation Analysis

In [None]:
# Initialize and fit user segmentation
user_seg = UserSegmentation(n_clusters=4)
user_seg.fit(df)

# Get cluster characteristics
cluster_chars = user_seg.get_cluster_characteristics()
print("Cluster Characteristics:")
cluster_chars.head()

## 4. A/B Testing Analysis

In [None]:
# Example A/B test setup
ab_test = ABTesting()
# ab_test.load_data(control_data, treatment_data)
# ab_test.calculate_metrics(['conversion_rate', 'avg_order_value', 'session_duration'])
# report = ab_test.generate_report(['conversion_rate', 'avg_order_value', 'session_duration'])
# print(report)

## 5. Conclusions and Next Steps

Summarize findings and outline next steps here.