In [1]:
import matplotlib.pyplot as plt 
import pandas as pd 
import seaborn as sns

In [2]:
# Set up visualization style
plt.style.use("ggplot")
sns.set_palette("pastel")

In [3]:
###LOADING THE DATASET
df = pd.read_csv("Beauty Service Booking Survey.csv")
df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'Beauty Service Booking Survey.csv'

In [None]:
# Display basic information about the dataset
print(df.info())

In [None]:
# Plot the distribution of age groups
plt.figure(figsize=(10, 6))
sns.countplot(data=df, x='Age group?')
plt.title('Distribution of Age Groups')
plt.xlabel('Age Group')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

In [41]:
# Convert all values to lowercase to eliminate case sensitivity issues
df["Region of Residence"] = df["Region of Residence"].str.strip().str.lower()

# Capitalize the first letter of each word for a cleaner look
df["Region of Residence"] = df["Region of Residence"].str.title()

In [43]:
# Define a dictionary of incorrect values and their correct versions
corrections = {
    "Alexnadria": "Alexandria",  # Fixing typo
}

# Apply corrections
df["Region of Residence"] = df["Region of Residence"].replace(corrections)

In [None]:
plt.figure(figsize=(10, 6))

# Order the bars by count (largest to smallest)
sns.countplot(
    data=df, 
    x="Region of Residence", 
    order=df["Region of Residence"].value_counts().index
)

plt.title("Distribution of Regions of Residence")
plt.xlabel("Region of Residence")
plt.ylabel("Count")
plt.xticks(rotation=45)  # Rotate x-axis labels for better readability
plt.show()

In [None]:
# --- PRICING & SUBSCRIPTION INTEREST ---
# Spending per visit
plt.figure(figsize=(8, 5))
sns.countplot(y=df["On average, how much do you typically spend per visit on beauty services"], 
              order=df["On average, how much do you typically spend per visit on beauty services"].value_counts().index)
plt.xlabel("Count")
plt.ylabel("Spending Range (EGP)")
plt.title("Average Spending per Beauty Service Visit")
plt.show()


In [None]:
# Plot the distribution of gender
plt.figure(figsize=(10, 6))
sns.countplot(data=df, x='Gender')
plt.title('Distribution of Gender')
plt.xlabel('Gender')
plt.ylabel('Count')
plt.show()


In [None]:
# Interest in subscription bundles
plt.figure(figsize=(6, 4))
sns.countplot(x=df["Would you be interested in purchasing discounted bundles (e.g., 5 visits for 20% less)?"], 
              order=df["Would you be interested in purchasing discounted bundles (e.g., 5 visits for 20% less)?"].value_counts().index)
plt.xlabel("Interest in Subscription Bundles")
plt.ylabel("Count")
plt.title("Would Customers Buy Discounted Bundles?")
plt.show()


In [None]:
# --- PRICING & SUBSCRIPTION INTEREST ---
# Spending per visit
plt.figure(figsize=(8, 5))
sns.countplot(y=df["On average, how much do you typically spend per visit on beauty services"], 
              order=df["On average, how much do you typically spend per visit on beauty services"].value_counts().index)
plt.xlabel("Count")
plt.ylabel("Spending Range (EGP)")
plt.title("Average Spending per Beauty Service Visit")
plt.show()

In [None]:
# --- FEATURE PREFERENCES FOR ONLINE PLATFORM ---
# Most important features in a booking platform
def extract_features(df, column_name):
    feature_list = df[column_name].dropna().str.split(';').explode()
    return feature_list.value_counts()

features_importance = extract_features(df, "What features would you find most valuable in a beauty service platform?  (Select the most important 3 in your opinion)")

plt.figure(figsize=(12, 6))
sns.barplot(y=features_importance.index, x=features_importance.values)
plt.xlabel("Count")
plt.ylabel("Feature")
plt.title("Most Desired Features in a Beauty Service Platform")
plt.show()

In [None]:
# --- ACTIONABLE INSIGHTS ---
# Summarizing key findings
insights = {
    "Most Popular Regions": df["Region of Residence"].value_counts().idxmax(),
    "Most Common Booking Method": df["How do you usually book your appointments for barbershops/salons?"].value_counts().idxmax(),
    "Biggest Customer Pain Points": extract_features(df, "What are the main challenges you face when booking beauty services? \n(Check all that apply)").idxmax(),
    "Spending Range with Highest Responses": df["On average, how much do you typically spend per visit on beauty services"].value_counts().idxmax(),
    "Top Feature Requested in a Beauty Platform": features_importance.idxmax(),
}

# Display insights
display(insights)

In [None]:
# Standardizing all responses to lowercase and stripping spaces
df["How do you usually book your appointments for barbershops/salons?"] = (
    df["How do you usually book your appointments for barbershops/salons?"]
    .str.strip()
    .str.lower()
)

# Merging variations of "Walk-in"
df["How do you usually book your appointments for barbershops/salons?"] = df[
    "How do you usually book your appointments for barbershops/salons?"
].replace({
    "walk in": "Walk-in",
    "walk-in": "Walk-in",
    "walk in: el zayak yasta": "Walk-in",
})

# Standardizing other booking methods
df["How do you usually book your appointments for barbershops/salons?"] = df[
    "How do you usually book your appointments for barbershops/salons?"
].replace({
    "message the stylist directly": "Direct Message",
    "phone call": "Phone Call",
})

# Now, re-run the visualization
plt.figure(figsize=(12, 6))
sns.countplot(
    data=df, 
    y="How do you usually book your appointments for barbershops/salons?", 
    order=df["How do you usually book your appointments for barbershops/salons?"].value_counts().index,
    palette="magma"
)

plt.title("How Customers Book Their Beauty Appointments")
plt.xlabel("Count")
plt.ylabel("Booking Method")
plt.show()
