In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
def analyze_quiz_performance(df):
    """Analyzes quiz performance data."""

    # Data Cleaning and Preprocessing
    df['submitted_at'] = pd.to_datetime(df['submitted_at'])
    df['started_at'] = pd.to_datetime(df['started_at'])
    df['ended_at'] = pd.to_datetime(df['ended_at'])

    # Basic Data Exploration
    print("Data Summary:")
    print(df.describe(include='all'))
    print("\nData Types:")
    print(df.dtypes)
    print("\nMissing Values:")
    print(df.isnull().sum())

    # Performance Analysis
    # 1. Overall Accuracy
    overall_accuracy = df['accuracy'].mean()
    print(f"\nOverall Accuracy: {overall_accuracy:.2%}")

    # 2. Performance by Quiz Topic
    topic_performance = df.groupby('quiz_topic')['accuracy'].mean()
    print("\nPerformance by Quiz Topic:")
    print(topic_performance)

    # 3. Performance by Quiz Difficulty
    difficulty_performance = df.groupby('quiz_difficulty_level')['accuracy'].mean()
    print("\nPerformance by Quiz Difficulty:")
    print(difficulty_performance)

    # 4. Score Distribution
    plt.figure(figsize=(10, 6))
    plt.hist(df['score'], bins=20, edgecolor='black')
    plt.title('Score Distribution')
    plt.xlabel('Score')
    plt.ylabel('Frequency')
    plt.show()

    # 5. Accuracy vs. Speed (Duration)
    plt.figure(figsize=(10, 6))
    plt.scatter(df['duration'], df['accuracy'], alpha=0.5)  # alpha for transparency
    plt.title('Accuracy vs. Quiz Duration')
    plt.xlabel('Quiz Duration (seconds)')
    plt.ylabel('Accuracy')
    plt.show()

    # 6. Top Performing Quizzes
    top_quizzes = df.groupby('quiz_name')['accuracy'].mean().sort_values(ascending=False).head(10)
    print("\nTop 10 Performing Quizzes:")
    print(top_quizzes)

    #7. Mistakes Analysis (If initial_mistake_count and mistakes_corrected are available)
    if 'initial_mistake_count' in df.columns and 'mistakes_corrected' in df.columns:
        df['mistake_correction_rate'] = df['mistakes_corrected'] / df['initial_mistake_count']
        average_correction_rate = df['mistake_correction_rate'].mean()
        print(f"\nAverage Mistake Correction Rate: {average_correction_rate:.2%}")

        plt.figure(figsize=(10,6))
        plt.scatter(df['initial_mistake_count'],df['mistakes_corrected'], alpha=0.5)
        plt.title('Mistakes Corrected vs Initial Mistakes')
        plt.xlabel('Initial Mistake Count')
        plt.ylabel('Mistakes Corrected')
        plt.show()


# Performance Analysis

In [5]:
try:
    df = pd.read_csv('api_Endpoin.csv')
    analyze_quiz_performance(df)
except FileNotFoundError:
    print("Error: 'your_data.csv' not found. Please provide the correct file path.")
except pd.errors.EmptyDataError:
    print("Error: The CSV file is empty.")
except pd.errors.ParserError:
    print("Error: Failed to parse the CSV file. Check its format.")
except KeyError as e:
    print(f"Error: Column '{e.args[0]}' not found in the CSV. Check your column names.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Data Summary:
                   id    quiz_id                       user_id  \
count       14.000000  14.000000                            14   
unique            NaN        NaN                             1   
top               NaN        NaN  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
freq              NaN        NaN                            14   
mean    312368.428571  31.500000                           NaN   
min     195808.000000   6.000000                           NaN   
25%     316613.250000  18.000000                           NaN   
50%     324964.000000  24.500000                           NaN   
75%     332307.500000  51.000000                           NaN   
max     336497.000000  58.000000                           NaN   
std      38889.139814  20.346045                           NaN   

                               submitted_at                     created_at  \
count                                    14                             14   
unique                               