In [None]:
!pip install plotly

In [None]:
# Required Libraries
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from matplotlib.ticker import MaxNLocator
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from collections import defaultdict

# Inline plotting for Jupyter Notebook
%matplotlib inline

In [None]:
# 1. Fetch Data
def fetch_data(url):
    """Fetch data from the given API endpoint."""
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to fetch data from {url}. Status code: {response.status_code}")
        return None

# 2. Process Quiz Data
def process_quiz_data(data):
    """Flatten nested data and process for analysis."""
    if isinstance(data, list):
        flattened_data = []
        for quiz in data:
            quiz_flat = {**quiz}
            if 'quiz' in quiz:
                quiz_flat.update(quiz['quiz'])
            flattened_data.append(quiz_flat)
        data = flattened_data
    elif isinstance(data, dict):
        if 'quiz' in data:
            data.update(data['quiz'])
        data = [data]
    
    df = pd.DataFrame(data)

    # Normalize topic names
    if 'topic' in df.columns:
        df['topic'] = df['topic'].str.strip().str.lower()

    if 'accuracy' in df.columns:
        df['accuracy'] = df['accuracy'].str.rstrip('%').astype(float) / 100
    if 'negative_score' in df.columns:
        df['negative_score'] = df['negative_score'].astype(float)
    if 'final_score' in df.columns:
        df['final_score'] = df['final_score'].astype(float)
    return df

# 3. Analyze Performance
def analyze_performance(df):
    """Analyze performance across topics."""
    grouped = df.groupby('topic')['accuracy'].mean()
    return grouped.to_dict()

# 4. Generate Recommendations
def generate_recommendations(performance):
    """Generate personalized recommendations based on performance."""
    recommendations = []
    for topic, accuracy in performance.items():
        if accuracy < 0.7:
            recommendations.append(f"Focus on improving in {topic.title()}.")
        elif accuracy < 0.9:
            recommendations.append(f"You are doing well in {topic.title()}, but there's room for improvement.")
        else:
            recommendations.append(f"Great job in {topic.title()}! Keep it up.")
    return recommendations

# 5. Visualize Data
def visualize_performance(performance):
    """Plot performance data as a bar chart."""
    topics = list(performance.keys())
    accuracies = list(performance.values())

    plt.figure(figsize=(10, 5))
    plt.bar(topics, accuracies, color='skyblue')
    plt.xticks(rotation=45, ha='right')
    plt.xlabel('Topics')
    plt.ylabel('Accuracy')
    plt.title('Performance by Topic')
    plt.tight_layout()
    plt.show()

# 6. Clustering for Personas
def perform_clustering(df):
    """Cluster students based on performance metrics."""
    features = df[['accuracy', 'final_score', 'speed']].dropna()
    if features.empty:
        print("Not enough data for clustering.")
        return

    kmeans = KMeans(n_clusters=3, random_state=42)
    clusters = kmeans.fit_predict(features)
    df['persona'] = clusters

    # Visualize Clusters
    plt.scatter(features['accuracy'], features['final_score'], c=clusters, cmap='viridis')
    plt.xlabel('Accuracy')
    plt.ylabel('Final Score')
    plt.title('Student Personas (Clusters)')
    plt.colorbar(label='Cluster')
    plt.show()

In [None]:
current_quiz_data = fetch_data("https://api.jsonserve.com/XgAgFJ")
historical_quiz_data = fetch_data("https://api.jsonserve.com/rJvd7g")
quiz_details = fetch_data("https://www.jsonkeeper.com/b/LLQT")

if not (current_quiz_data and historical_quiz_data and quiz_details):
    print("Error in fetching data. Please check the endpoints.")
else:
    # Process data
    current_quiz_df = process_quiz_data(current_quiz_data)
    historical_quiz_df = process_quiz_data(historical_quiz_data)

    # Analyze performance
    performance = analyze_performance(current_quiz_df)
    print("Performance by Topic:", performance)

In [9]:
# Display the data from each dataset (first 5 rows)
def display_data(df, name):
    """Helper function to display the first few rows of a DataFrame."""
    print(f"\n{name} - First 5 Rows:")
    print(df.head())
    print("\n")

# Main Execution
# Fetch data
current_quiz_data = fetch_data("https://api.jsonserve.com/XgAgFJ")
historical_quiz_data = fetch_data("https://api.jsonserve.com/rJvd7g")
quiz_details = fetch_data("https://www.jsonkeeper.com/b/LLQT")

if not (current_quiz_data and historical_quiz_data and quiz_details):
    print("Error in fetching data. Please check the endpoints.")
else:
    # Process data
    current_quiz_df = process_quiz_data(current_quiz_data)
    historical_quiz_df = process_quiz_data(historical_quiz_data)
    quiz_details_df = process_quiz_data(quiz_details)

    # Display the data for review
    display_data(current_quiz_df, "Current Quiz Data")
    display_data(historical_quiz_df, "Historical Quiz Data")
    display_data(quiz_details_df, "Quiz Details")


Current Quiz Data - First 5 Rows:
   id  quiz_id                       user_id                   submitted_at  \
0  51       51  YcDFSO4ZukTJnnFMgRNVwZTE4j42  2025-01-17T15:30:18.027+05:30   
1   6        6  YcDFSO4ZukTJnnFMgRNVwZTE4j42  2025-01-17T15:17:44.042+05:30   
2  51       51  YcDFSO4ZukTJnnFMgRNVwZTE4j42  2025-01-16T20:13:19.682+05:30   
3   6        6  YcDFSO4ZukTJnnFMgRNVwZTE4j42  2025-01-16T20:00:11.562+05:30   
4  51       51  YcDFSO4ZukTJnnFMgRNVwZTE4j42  2025-01-15T20:34:39.462+05:30   

                      created_at                     updated_at  score  \
0  2024-07-12T11:17:37.658+05:30  2024-09-23T18:43:27.373+05:30    108   
1  2024-04-26T15:26:29.886+05:30  2024-06-05T18:27:32.163+05:30     92   
2  2024-07-12T11:17:37.658+05:30  2024-09-23T18:43:27.373+05:30    116   
3  2024-04-26T15:26:29.886+05:30  2024-06-05T18:27:32.163+05:30     36   
4  2024-07-12T11:17:37.658+05:30  2024-09-23T18:43:27.373+05:30     36   

   trophy_level  accuracy speed  ...  exam_id

In [5]:
# Assuming current_quiz_df is already processed and contains the necessary columns
# Group by 'topic' and calculate the average accuracy
performance_by_topic = current_quiz_df.groupby('topic')['accuracy'].mean().sort_values(ascending=False)

# Plotting the data using Seaborn for a nice style
plt.figure(figsize=(12, 6))
sns.barplot(x=performance_by_topic.index, y=performance_by_topic.values, palette='viridis')
plt.xticks(rotation=45, ha='right')
plt.title('Average Accuracy by Topic (Current Quiz Data)')
plt.xlabel('Topic')
plt.ylabel('Average Accuracy')
plt.tight_layout()
plt.show()

NameError: name 'sns' is not defined

<Figure size 1200x600 with 0 Axes>

In [None]:
# Plotting the histogram of the quiz scores
plt.figure(figsize=(10, 6))
sns.histplot(current_quiz_df['score'], kde=True, color='blue', bins=20)
plt.title('Distribution of Quiz Scores (Current Quiz Data)')
plt.xlabel('Score')
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()

In [None]:
# Generate recommendations
recommendations = generate_recommendations(performance)
print("\nPersonalized Recommendations:")
for rec in recommendations:
    print(f"- {rec}")

# Visualize performance
visualize_performance(performance)

# Perform clustering for personas
perform_clustering(current_quiz_df)