In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statistics import mean, median, mode
from dateutil.parser import parse
import re

# 1. Descriptive Statistics

def descriptive_sales_stats(df):
    sales = df['sales'].dropna()
    return {'mean': mean(sales), 'median': median(sales), 'mode': mode(sales)}

def customer_age_stats(df):
    ages = df['age'].dropna()
    return {'mean': mean(ages), 'median': median(ages), 'mode': mode(ages)}

def website_visits_stats(df):
    visits = df['daily_visits'].dropna()
    return {'mean': mean(visits), 'median': median(visits), 'mode': mode(visits)}


# 2. Distribution Analysis

def plot_price_distribution(df):
    plt.hist(df['price'].dropna(), bins=10, color='skyblue', edgecolor='black')
    plt.title('Product Price Distribution')
    plt.xlabel('Price')
    plt.ylabel('Frequency')
    plt.show()

def plot_exam_scores(df):
    plt.hist(df['exam_score'].dropna(), bins=10, color='lightgreen', edgecolor='black')
    plt.title('Exam Score Distribution')
    plt.xlabel('Score')
    plt.ylabel('Count')
    plt.show()

def plot_order_quantities(df):
    plt.hist(df['order_qty'].dropna(), bins=10, color='lightcoral', edgecolor='black')
    plt.title('Order Quantity Distribution')
    plt.xlabel('Quantity')
    plt.ylabel('Frequency')
    plt.show()


# 3. Data Type Validation

def validate_numeric_column(df, column):
    return df[column].apply(lambda x: isinstance(x, (int, float)) or pd.isna(x)).all()

def validate_date_column(df, column):
    def is_valid_date(x):
        try:
            parse(str(x))
            return True
        except:
            return False
    return df[column].apply(is_valid_date).all()

def validate_email_format(df, column):
    email_pattern = re.compile(r'^[\w\.-]+@[\w\.-]+\.\w+$')
    return df[column].dropna().apply(lambda x: bool(email_pattern.match(str(x)))).all()
