In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

data = pd.read_csv('/content/drive/MyDrive/DATASETS/ecommerce_customer_data.csv')
data.head()

Unnamed: 0,User_ID,Gender,Age,Location,Device_Type,Product_Browsing_Time,Total_Pages_Viewed,Items_Added_to_Cart,Total_Purchases
0,1,Female,23,Ahmedabad,Mobile,60,30,1,0
1,2,Male,25,Kolkata,Tablet,30,38,9,4
2,3,Male,32,Bangalore,Desktop,37,13,5,0
3,4,Male,35,Delhi,Mobile,7,20,10,3
4,5,Male,27,Bangalore,Tablet,35,20,8,2


In [3]:
#Summary statistics for numeric columns
numeric_summary = data.describe()
numeric_summary

Unnamed: 0,User_ID,Age,Product_Browsing_Time,Total_Pages_Viewed,Items_Added_to_Cart,Total_Purchases
count,500.0,500.0,500.0,500.0,500.0,500.0
mean,250.5,26.276,30.74,27.182,5.15,2.464
std,144.481833,5.114699,15.934246,13.071596,3.203127,1.740909
min,1.0,18.0,5.0,5.0,0.0,0.0
25%,125.75,22.0,16.0,16.0,2.0,1.0
50%,250.5,26.0,31.0,27.0,5.0,2.0
75%,375.25,31.0,44.0,38.0,8.0,4.0
max,500.0,35.0,60.0,50.0,10.0,5.0


In [4]:
categorical_summary = data.describe(include='object')
categorical_summary

Unnamed: 0,Gender,Location,Device_Type
count,500,500,500
unique,2,8,3
top,Male,Kolkata,Mobile
freq,261,71,178


In [5]:
#Histogram for 'Age'
fig = px.histogram(data, x='Age', title='Distribution of Age')
fig.show()

In [6]:
#bar chart for 'Gender'
gender_counts = data['Gender'].value_counts().reset_index()
gender_counts.columns = ['Gender', 'Count']
fig = px.bar(gender_counts, x='Gender', y='Count', title='Gender Distribution')
fig.show()

In [7]:
#Product_Browsing_Time vs Total_Pages_Viewed
fig = px.scatter(data, x='Product_Browsing_Time', y='Total_Pages_Viewed',
                 title='Product Browsing Time vs Total Pages Viewed',
                 trendline='ols')
fig.show()

In [8]:
# Grouped Analysis
gender_grouped = data.groupby('Gender')['Total_Pages_Viewed'].mean().reset_index()
gender_grouped.columns = ['Gender', 'Average_Total_Pages_Viewed']
fig = px.bar(gender_grouped, x='Gender', y='Average_Total_Pages_Viewed',
             title='Average Total Pages Viewed by Gender')
fig.show()

In [9]:
devices_grouped = data.groupby('Device_Type')['Total_Pages_Viewed'].mean().reset_index()
devices_grouped.columns = ['Device_Type', 'Average_Total_Pages_Viewed']
fig = px.bar(devices_grouped, x='Device_Type', y='Average_Total_Pages_Viewed',
             title='Average Total Pages Viewed by Devices')
fig.show()

In [10]:
data['CLV'] = (data['Total_Purchases'] * data['Total_Pages_Viewed']) / data['Age']

data['Segment'] = pd.cut(data['CLV'], bins=[1, 2.5, 5, float('inf')],
                         labels=['Low Value', 'Medium Value', 'High Value'])

segment_counts = data['Segment'].value_counts().reset_index()
segment_counts.columns = ['Segment', 'Count']

# Create a bar chart to visualize the customer segments
fig = px.bar(segment_counts, x='Segment', y='Count',
             title='Customer Segmentation by CLV')
fig.update_xaxes(title='Segment')
fig.update_yaxes(title='Number of Customers')
fig.show()

In [11]:
# Funnel analysis
funnel_data = data[['Product_Browsing_Time', 'Items_Added_to_Cart', 'Total_Purchases']]
funnel_data = funnel_data.groupby(['Product_Browsing_Time', 'Items_Added_to_Cart']).sum().reset_index()

fig = px.funnel(funnel_data, x='Product_Browsing_Time', y='Items_Added_to_Cart', title='Conversion Funnel')
fig.show()

In [12]:
# Calculate churn rate
data['Churned'] = data['Total_Purchases'] == 0

churn_rate = data['Churned'].mean()
print(churn_rate)

0.198
