In [1]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
from datetime import datetime


In [10]:
def load_and_process_data(file_path):
    """Load and process the customer data."""
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Convert date column to datetime
    df['date'] = pd.to_datetime(df['date'])
    
    # Sort by date
    df = df1.sort_values('date')
    
    # Create additional time-based features
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    df['month_name'] = df['date'].dt.strftime('%b')
    
    return df

def calculate_kpis(df):
    """Calculate key performance indicators."""
    kpis = {
        'total_days': len(df),
        'avg_customers': round(df['Customers'].mean(), 2),
        'max_customers': df['Customers'].max(),
        'min_customers': df['Customers'].min()
    }
    return kpis

def create_time_series(df):
    """Create main time series plot."""
    fig = go.Figure()
    
    fig.add_trace(
        go.Scatter(
            x=df['date'],
            y=df['Customers'],
            mode='lines',
            name='Daily Customers',
            line=dict(color='#1f77b4')
        )
    )
    
    fig.update_layout(
        title='Daily Customer Trend',
        xaxis_title='Date',
        yaxis_title='Number of Customers',
        height=400,
        showlegend=True,
        hovermode='x unified'
    )
    
    # Add range slider
    fig.update_xaxes(rangeslider_visible=True)
    
    return fig

def create_monthly_pattern(df):
    """Create monthly seasonality pattern plot."""
    monthly_avg = df1.groupby('month_name')['Customers'].mean().round(2).reset_index()
    monthly_avg['month_num'] = df.groupby('month_name')['month'].first().values
    monthly_avg = monthly_avg.sort_values('month_num')
    
    fig = go.Figure()
    
    fig.add_trace(
        go.Bar(
            x=monthly_avg['month_name'],
            y=monthly_avg['Customers'],
            name='Average Customers',
            marker_color='#2ca02c'
        )
    )
    
    fig.update_layout(
        title='Monthly Seasonality Pattern',
        xaxis_title='Month',
        yaxis_title='Average Number of Customers',
        height=400,
        showlegend=True
    )
    
    return fig

def create_yearly_averages(df):
    """Create yearly averages plot."""
    yearly_avg = df.groupby('year')['Customers'].mean().round(2).reset_index()
    
    fig = go.Figure()
    
    fig.add_trace(
        go.Bar(
            x=yearly_avg['year'],
            y=yearly_avg['Customers'],
            name='Average Customers',
            marker_color='#9467bd'
        )
    )
    
    fig.update_layout(
        title='Yearly Average Customers',
        xaxis_title='Year',
        yaxis_title='Average Number of Customers',
        height=400,
        showlegend=True
    )
    
    return fig

def create_dashboard(df):
    """Create the complete dashboard."""
    # Calculate KPIs
    kpis = calculate_kpis(df)
    
    # Create subplot figure
    fig = make_subplots(
        rows=3, cols=1,
        subplot_titles=('Daily Customer Trend', 'Monthly Seasonality Pattern', 'Yearly Average Customers'),
        vertical_spacing=0.1,
        row_heights =[0.5, 0.25, 0.25]
    )
    
    # Add time series trace
    fig.add_trace(
        go.Scatter(
            x=df['date'],
            y=df['Customers'],
            mode='lines',
            name='Daily Customers',
            line=dict(color='#1f77b4')
        ),
        row=1, col=1
    )
    
    # Add monthly pattern
    monthly_avg = df.groupby('month_name')['Customers'].mean().round(2).reset_index()
    monthly_avg['month_num'] = df.groupby('month_name')['month'].first().values
    monthly_avg = monthly_avg.sort_values('month_num')
    
    fig.add_trace(
        go.Bar(
            x=monthly_avg['month_name'],
            y=monthly_avg['Customers'],
            name='Monthly Average',
            marker_color='#2ca02c'
        ),
        row=2, col=1
    )
    
    # Add yearly averages
    yearly_avg = df.groupby('year')['Customers'].mean().round(2).reset_index()
    
    fig.add_trace(
        go.Bar(
            x=yearly_avg['year'],
            y=yearly_avg['Customers'],
            name='Yearly Average',
            marker_color='#9467bd'
        ),
        row=3, col=1
    )
    
    # Update layout
    fig.update_layout(
        height=1200,
        showlegend=True,
        title_text="Customer Analytics Dashboard",
        hovermode='x unified'
    )
    
    # Add range slider to time series
    fig.update_xaxes(rangeslider_visible=True, row=1, col=1)
    
    # Update axes labels
    fig.update_xaxes(title_text="Date", row=1, col=1)
    fig.update_xaxes(title_text="Month", row=2, col=1)
    fig.update_xaxes(title_text="Year", row=3, col=1)
    
    fig.update_yaxes(title_text="Number of Customers", row=1, col=1)
    fig.update_yaxes(title_text="Average Customers", row=2, col=1)
    fig.update_yaxes(title_text="Average Customers", row=3, col=1)
    
    return fig



In [3]:
df = load_and_process_data('Customers_Parlour.csv')

In [4]:
# Calculate KPIs
kpis = calculate_kpis(df)
print("\nKey Performance Indicators:")
print(f"Total Days: {kpis['total_days']}")
print(f"Average Customers: {kpis['avg_customers']}")
print(f"Maximum Customers: {kpis['max_customers']}")
print(f"Minimum Customers: {kpis['min_customers']}")


Key Performance Indicators:
Total Days: 730
Average Customers: 34.51
Maximum Customers: 49
Minimum Customers: 20


In [6]:
time_series_fig=create_time_series(df)
time_series_fig.show()

In [7]:
monthly_pattern_fig = create_monthly_pattern(df)
monthly_pattern_fig.show()

In [8]:
yearly_averages_fig = create_yearly_averages(df)
yearly_averages_fig.show()

In [None]:
dashboard_fig = create_dashboard(df)
dashboard_fig.show()

In [None]:
   # Create individual plots
    monthly_pattern_fig = create_monthly_pattern(df)
    yearly_averages_fig = create_yearly_averages(df)
    
    # Create dashboard
    dashboard_fig = create_dashboard(df)
    
    # Save plots to HTML files
    time_series_fig.write_html("time_series.html")
    monthly_pattern_fig.write_html("monthly_pattern.html")
    yearly_averages_fig.write_html("yearly_averages.html")
    dashboard_fig.write_html("dashboard.html")

if __name__ == "__main__":
    main()

In [15]:
dow_stats = df1.groupby('book_dow').agg({
    'noshow': ['count', 'sum']
}).reset_index()
dow_stats.columns = ['day', 'total_bookings', 'no_shows']
dow_stats['no_show_rate'] = (dow_stats['no_shows'] / dow_stats['total_bookings']) * 100

dow_stats

Unnamed: 0,day,total_bookings,no_shows,no_show_rate
0,Friday,405,46,11.358025
1,Monday,1,0,0.0
2,Saturday,393,40,10.178117
3,Sunday,54,14,25.925926
4,Thursday,417,46,11.031175
5,Tuesday,391,41,10.485934
6,Wednesday,291,37,12.714777


In [14]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import numpy as np
from datetime import datetime

# Read the dataset
df1 = pd.read_csv('hair_salon_no_show_wrangled_df.csv')

# Create figure with secondary y-axis
fig = make_subplots(
    rows=3, cols=2,
    subplot_titles=('No-show Rates by Day of Week', 
                   'No-show Rates by Time of Day',
                   'No-show Rates by Service Category',
                   'Customer History Impact on No-shows',
                   'Booking Patterns Over Time',
                   'Revenue Impact of No-shows'),
    vertical_spacing=0.15,
    horizontal_spacing=0.12
)

# 1. No-show rates by day of week
dow_stats = df1.groupby('book_dow').agg({
    'noshow': ['count', 'sum']
}).reset_index()
dow_stats.columns = ['day', 'total_bookings', 'no_shows']
dow_stats['no_show_rate'] = (dow_stats['no_shows'] / dow_stats['total_bookings']) * 100

# Sort days properly
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
dow_stats['day'] = pd.Categorical(dow_stats['day'], categories=day_order, ordered=True)
dow_stats = dow_stats.sort_values('day')

fig.add_trace(
    go.Bar(x=dow_stats['day'], 
           y=dow_stats['no_show_rate'],
           name='No-show Rate',
           marker_color='#1f77b4'),
    row=1, col=1
)

# 2. No-show rates by time of day
tod_stats = df1.groupby('book_tod').agg({
    'noshow': ['count', 'sum']
}).reset_index()
tod_stats.columns = ['time', 'total_bookings', 'no_shows']
tod_stats['no_show_rate'] = (tod_stats['no_shows'] / tod_stats['total_bookings']) * 100

fig.add_trace(
    go.Bar(x=tod_stats['time'], 
           y=tod_stats['no_show_rate'],
           name='Time of Day',
           marker_color='#2ca02c'),
    row=1, col=2
)

# 3. No-show rates by service category
category_stats = df1.groupby('book_category').agg({
    'noshow': ['count', 'sum']
}).reset_index()
category_stats.columns = ['category', 'total_bookings', 'no_shows']
category_stats['no_show_rate'] = (category_stats['no_shows'] / category_stats['total_bookings']) * 100

fig.add_trace(
    go.Bar(x=category_stats['category'], 
           y=category_stats['no_show_rate'],
           name='Service Category',
           marker_color='#ff7f0e'),
    row=2, col=1
)

# 4. Impact of customer history on no-shows
fig.add_trace(
    go.Scatter(x=df1['last_cumbook'], 
               y=df1['noshow'],
               mode='markers',
               name='Previous Bookings',
               marker=dict(
                   size=8,
                   color='#9467bd',
                   opacity=0.6
               )),
    row=2, col=2
)

# 5. Calculate booking patterns
df1['recency_bucket'] = pd.qcut(df1['recency'], q=10, duplicates = "drop")


grouplist = df1["recency"].unique().tolist()
grouplist.sort()
group_mapping = {g:i for i, g in enumerate(grouplist)}
df1['recency_bucket'] = df1['recency'].apply(lambda x: group_mapping[x])


recency_stats = df1.groupby('recency_bucket').agg({
    'noshow': ['count', 'sum']
}).reset_index()
recency_stats.columns = ['recency', 'total_bookings', 'no_shows']
recency_stats['no_show_rate'] = (recency_stats['no_shows'] / recency_stats['total_bookings']) * 100

fig.add_trace(
    go.Scatter(x=recency_stats['recency'], 
               y=recency_stats['no_show_rate'],
               mode='lines+markers',
               name='Recency Impact',
               line=dict(color='#d62728', width=2)),
    row=3, col=1
)

# 6. Revenue impact
revenue_impact = df1.groupby('noshow').agg({
    'last_receipt_tot': ['mean', 'count']
}).reset_index()
revenue_impact.columns = ['noshow', 'avg_revenue', 'count']

fig.add_trace(
    go.Bar(x=['Show', 'No-show'], 
           y=revenue_impact['avg_revenue'],
           name='Average Revenue',
           marker_color='#8c564b'),
    row=3, col=2
)

# Update layout
fig.update_layout(
    height=1200,
    width=1000,
    showlegend=False,
    title_text="Salon Booking No-Show Analysis Dashboard",
    title_x=0.5,
    title_font=dict(size=24)
)

# Update axes labels
fig.update_xaxes(title_text="Day of Week", row=1, col=1)
fig.update_xaxes(title_text="Time of Day", row=1, col=2)
fig.update_xaxes(title_text="Service Category", row=2, col=1)
fig.update_xaxes(title_text="Previous Bookings", row=2, col=2)
fig.update_xaxes(title_text="Customer Recency Percentile", row=3, col=1)
fig.update_xaxes(title_text="Booking Status", row=3, col=2)

fig.update_yaxes(title_text="No-show Rate (%)", row=1, col=1)
fig.update_yaxes(title_text="No-show Rate (%)", row=1, col=2)
fig.update_yaxes(title_text="No-show Rate (%)", row=2, col=1)
fig.update_yaxes(title_text="No-show (1=Yes, 0=No)", row=2, col=2)
fig.update_yaxes(title_text="No-show Rate (%)", row=3, col=1)
fig.update_yaxes(title_text="Average Revenue ($)", row=3, col=2)

# Generate summary statistics
summary_stats = {
    'Overall No-show Rate': f"{(df1['noshow'].mean() * 100):.1f}%",
    'Total Bookings': len(df1),
    'Total No-shows': df1['noshow'].sum(),
    'Highest Risk Day': dow_stats.loc[dow_stats['no_show_rate'].idxmax(), 'day'],
    'Highest Risk Time': tod_stats.loc[tod_stats['no_show_rate'].idxmax(), 'time'],
    'Average Revenue Loss per No-show': f"${revenue_impact.loc[revenue_impact['noshow'] == 1, 'avg_revenue'].iloc[0]:.2f}"
}

print("\nSummary Statistics:")
for key, value in summary_stats.items():
    print(f"{key}: {value}")

# Save the figure
fig.write_html("salon_analysis_dashboard.html")


Summary Statistics:
Overall No-show Rate: 11.5%
Total Bookings: 1952
Total No-shows: 224
Highest Risk Day: Sunday
Highest Risk Time: afternoon
Average Revenue Loss per No-show: $39.58
