In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Task 1: Import data and set index
df = pd.read_csv('fcc-forum-pageviews.csv', parse_dates=['date'], index_col='date')

# Task 2: Clean the data
def clean_data(df):
    # Calculate the 2.5% and 97.5% quantiles
    quantile_low = df['value'].quantile(0.025)
    quantile_high = df['value'].quantile(0.975)
    
    # Filter out the outliers
    cleaned_df = df[(df['value'] >= quantile_low) & (df['value'] <= quantile_high)]
    return cleaned_df

cleaned_df = clean_data(df.copy())

# Task 3: Draw line plot
def draw_line_plot():
    fig, ax = plt.subplots(figsize=(14, 6))
    ax.plot(cleaned_df.index, cleaned_df['value'], color='red')
    ax.set_title('Daily freeCodeCamp Forum Page Views 5/2016-12/2019')
    ax.set_xlabel('Date')
    ax.set_ylabel('Page Views')
    plt.close()
    return fig

# Task 4: Draw bar plot
def draw_bar_plot():
    # Prepare data for bar plot
    df_bar = cleaned_df.copy()
    df_bar['year'] = df_bar.index.year
    df_bar['month'] = df_bar.index.month_name()
    
    # Group by year and month, calculate mean
    df_bar = df_bar.groupby(['year', 'month'])['value'].mean().unstack()
    
    # Ensure months are in order
    months_order = ['January', 'February', 'March', 'April', 'May', 'June', 
                    'July', 'August', 'September', 'October', 'November', 'December']
    df_bar = df_bar[months_order]
    
    # Plot
    fig = df_bar.plot(kind='bar', figsize=(10, 8)).figure
    plt.xlabel('Years')
    plt.ylabel('Average Page Views')
    plt.legend(title='Months')
    plt.close()
    return fig

# Task 5: Draw box plots
def draw_box_plot():
    # Prepare data for box plots
    df_box = cleaned_df.copy()
    df_box.reset_index(inplace=True)
    df_box['year'] = [d.year for d in df_box['date']]
    df_box['month'] = [d.strftime('%b') for d in df_box['date']]
    
    # Order months correctly
    month_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
                   'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    
    # Create figure with 2 subplots
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 6))
    
    # Year-wise box plot
    sns.boxplot(x='year', y='value', data=df_box, ax=ax1)
    ax1.set_title('Year-wise Box Plot (Trend)')
    ax1.set_xlabel('Year')
    ax1.set_ylabel('Page Views')
    
    # Month-wise box plot
    sns.boxplot(x='month', y='value', data=df_box, order=month_order, ax=ax2)
    ax2.set_title('Month-wise Box Plot (Seasonality)')
    ax2.set_xlabel('Month')
    ax2.set_ylabel('Page Views')
    
    plt.close()
    return fig
print(pd.read csv)

SyntaxError: invalid syntax. Perhaps you forgot a comma? (1929624068.py, line 83)