In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from matplotlib.dates import DateFormatter

# Enable interactive plot window
%matplotlib qt

# Function to load and preprocess Shopee sales data
def load_and_preprocess_shopee_data():
    try:
        # Load the data
        data = pd.read_csv('shopee_sales_report_2022_to_2024.csv')
        
        # Drop the 'Order Status' column if it exists
        data_cleaned = data.drop(columns=['Order Status'], errors='ignore')
        
        # Fill missing values in 'Revenue (MYR)' with mean if column exists
        if 'Revenue (MYR)' in data_cleaned.columns:
            data_cleaned['Revenue (MYR)'] = data_cleaned['Revenue (MYR)'].fillna(data_cleaned['Revenue (MYR)'].mean())
        
        # Remove duplicates to ensure data integrity
        data_cleaned = data_cleaned.drop_duplicates()

        # Handle outliers in 'Revenue (MYR)' using the IQR method
        if 'Revenue (MYR)' in data_cleaned.columns:
            Q1 = data_cleaned['Revenue (MYR)'].quantile(0.25)
            Q3 = data_cleaned['Revenue (MYR)'].quantile(0.75)
            IQR = Q3 - Q1
            lower_bound = Q1 - 1.5 * IQR
            upper_bound = Q3 + 1.5 * IQR
            data_cleaned = data_cleaned[(data_cleaned['Revenue (MYR)'] >= lower_bound) & (data_cleaned['Revenue (MYR)'] <= upper_bound)]
        
        # Convert 'DateTime' to datetime format for time-based analysis if it exists
        if 'DateTime' in data_cleaned.columns:
            data_cleaned['DateTime'] = pd.to_datetime(data_cleaned['DateTime'], errors='coerce')
        
        # Extract 'MonthYear' (e.g., 2023-01) for grouping by month if 'DateTime' exists
        if 'DateTime' in data_cleaned.columns:
            data_cleaned['MonthYear'] = data_cleaned['DateTime'].dt.to_period('M')

        return data_cleaned
    
    except FileNotFoundError:
        print("Error: The specified file could not be found.")
        return pd.DataFrame()
    except pd.errors.EmptyDataError:
        print("Error: The file is empty.")
        return pd.DataFrame()
    except Exception as e:
        print(f"An error occurred during data loading or preprocessing: {e}")
        return pd.DataFrame()

# Load and preprocess the data
data_cleaned = load_and_preprocess_shopee_data()

# Perform revenue analysis by product category if data is loaded successfully
if not data_cleaned.empty:
    # Group data by 'MonthYear' and 'Product Category' to analyze monthly revenue
    grouped_data = data_cleaned.groupby(['MonthYear', 'Product Category'])['Revenue (MYR)'].sum().unstack().fillna(0)

    # Create a figure and axis object for animated line plot of revenue over time
    fig, ax = plt.subplots(figsize=(10, 6))  # Adjust figure size

    # Generate colors for each product category line
    colormap = plt.get_cmap('tab10')
    colors = [colormap(i) for i in range(len(grouped_data.columns))]

    # Define the animation function for visualizing monthly revenue changes
    def animate(i):
        ax.cla()  # Clear previous lines
        x = grouped_data.index[:i].to_timestamp()  # Convert PeriodIndex to Timestamp for plotting
        
        # Plot each product category with unique color and marker
        for j, category in enumerate(grouped_data.columns):
            y = grouped_data[category][:i]
            ax.plot(x, y, label=category, color=colors[j], marker='o', linestyle='-', alpha=0.8)

        # Set plot titles and labels
        ax.set_ylabel('Revenue (MYR)', fontsize=14)
        ax.set_xlabel('Year-Month', fontsize=14)
        ax.set_title('Revenue Over Time by Product Category', fontsize=16)
        
        # Customize x-axis for readability
        ax.xaxis.set_major_formatter(DateFormatter('%b %Y'))
        plt.xticks(rotation=45, fontsize=12)
        
        # Add a legend to identify product categories
        ax.legend(loc='upper left')

    # Create animation for monthly revenue by category and assign it to a variable to prevent deletion
    global anim  # Declare anim as a global variable
    anim = FuncAnimation(fig, animate, frames=len(grouped_data), interval=500, repeat=False)

    # Adjust layout to ensure labels are visible
    plt.tight_layout()
    plt.subplots_adjust(bottom=0.15, left=0.08)  # Add padding to prevent cutoff
    plt.show()
else:
    print("No data available to display the graph.")