## Compare Data Completeness Over Time

**Description**: Analyze the trend of missing data in `"sales_data.csv"` over several months stored in a "date" column. Visualize missing data rates by month.

In [1]:
# Write your code from here
import pandas as pd
import matplotlib.pyplot as plt
import os

def plot_missing_data_trend(file_path, date_column):
    """
    Calculates and plots monthly missing data rates in a CSV file.

    Parameters:
        file_path (str): Path to the CSV file.
        date_column (str): Name of the column containing dates.

    Returns:
        None (displays plot)
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")

    df = pd.read_csv(file_path)

    if df.empty:
        raise ValueError("The CSV file is empty.")

    if date_column not in df.columns:
        raise ValueError(f"Date column '{date_column}' not found in data.")

    # Convert date column to datetime
    df[date_column] = pd.to_datetime(df[date_column], errors='coerce')
    if df[date_column].isnull().all():
        raise ValueError(f"All values in '{date_column}' could not be parsed as dates.")

    # Extract year-month for grouping
    df['year_month'] = df[date_column].dt.to_period('M')

    cols_to_check = df.columns.difference([date_column, 'year_month'])
    monthly_missing_rate = df.groupby('year_month')[cols_to_check].apply(lambda x: x.isnull().mean().mean())

    # Plot the missing data trend
    monthly_missing_rate.plot(kind='line', marker='o', figsize=(10,6))
    plt.title('Monthly Missing Data Rate Trend')
    plt.xlabel('Month')
    plt.ylabel('Average Missing Data Rate')
    plt.grid(True)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

# Example usage:
if __name__ == "__main__":
    try:
        plot_missing_data_trend('sales_data.csv', 'date')
    except Exception as e:
        print(f"Error: {e}")


Error: File not found: sales_data.csv
