In [25]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [26]:
# Fetch Quiz Data
quiz_url = "https://jsonkeeper.com/b/LLQT"
try:
    quiz_data = requests.get(quiz_url, verify=False).json()  # Use verify=False to bypass SSL verification (not recommended for production)
    if quiz_data:
        quiz_df = pd.DataFrame(quiz_data)
    else:
        print("Quiz data is empty or not available.")
except Exception as e:
    print(f"Error fetching Quiz data: {e}")
    quiz_df = pd.DataFrame()  # Empty DataFrame in case of error

# Fetch Quiz Submission Data
quiz_submission_url = "https://api.jsonserve.com/rJvd7g"
try:
    quiz_submission_data = requests.get(quiz_submission_url, verify=False).json()
    if quiz_submission_data:
        quiz_submission_df = pd.DataFrame(quiz_submission_data)
    else:
        print("Quiz Submission data is empty or not available.")
except Exception as e:
    print(f"Error fetching Quiz Submission data: {e}")
    quiz_submission_df = pd.DataFrame()

# Fetch Historical Quiz Data
historical_url = "https://api.jsonserve.com/XgAgFJ"
try:
    historical_data = requests.get(historical_url, verify=False).json()
    if historical_data:
        historical_df = pd.DataFrame(historical_data)
    else:
        print("Historical data is empty or not available.")
except Exception as e:
    print(f"Error fetching Historical data: {e}")
    historical_df = pd.DataFrame()

# Check if dataframes are populated and print the first few rows
if not quiz_df.empty:
    print("Quiz Data (First 5 rows):")
    print(quiz_df.head())
else:
    print("Quiz Data is empty.")

if not quiz_submission_df.empty:
    print("\nQuiz Submission Data (First 5 rows):")
    print(quiz_submission_df.head())
else:
    print("Quiz Submission Data is empty.")

if not historical_df.empty:
    print("\nHistorical Data (First 5 rows):")
    print(historical_df.head())
else:
    print("Historical Data is empty.")

# If data is available, proceed with further analysis
if not historical_df.empty:
    # Let's analyze the student's performance by topics
    def analyze_performance(df):
        if 'topic' in df.columns and 'score' in df.columns:
            topics_performance = df.groupby('topic')['score'].mean()
            return topics_performance
        else:
            print("Expected columns ('topic' and 'score') not found in the historical data.")
            return pd.Series()

    topics_performance = analyze_performance(historical_df)
    if not topics_performance.empty:
        print("\nTopics Performance Analysis")
        print(topics_performance)

    # Feature Engineering
    def generate_features(df):
        if 'score' in df.columns and 'correct_answers' in df.columns and 'total_questions' in df.columns:
            df['avg_score'] = df['score'].mean()
            df['accuracy'] = df['correct_answers'] / df['total_questions']
            return df
        else:
            print("Expected columns for feature engineering not found.")
            return df

    historical_df = generate_features(historical_df)

    # Train a Rank Prediction Model
    def train_rank_predictor(df):
        if 'avg_score' in df.columns and 'accuracy' in df.columns and 'rank' in df.columns:
            features = df[['avg_score', 'accuracy']]  # You can add more features like time spent, etc.
            target = df['rank']  # Assuming 'rank' is a column in historical_df

            X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

            model = RandomForestRegressor(n_estimators=100)
            model.fit(X_train, y_train)

            # Predictions
            y_pred = model.predict(X_test)

            # Evaluate the model
            mse = mean_squared_error(y_test, y_pred)
            print(f"Mean Squared Error: {mse}")

            return model
        else:
            print("Required columns for rank prediction model not found.")
            return None

    rank_predictor = train_rank_predictor(historical_df)

    # Bonus: Predict College Based on Rank
    def predict_college(rank):
        if rank <= 1000:
            return "Top 10 Colleges"
        elif rank <= 5000:
            return "Tier 1 Colleges"
        elif rank <= 10000:
            return "Tier 2 Colleges"
        else:
            return "Tier 3 Colleges"

    # Example prediction for a user
    user_data = {'avg_score': 80, 'accuracy': 0.75}  # Example data based on their quiz performance
    user_df = pd.DataFrame([user_data])
    if rank_predictor:
        predicted_rank = rank_predictor.predict(user_df)
        print(f"Predicted NEET Rank: {predicted_rank[0]}")
        predicted_college = predict_college(predicted_rank[0])
        print(f"Predicted College: {predicted_college}")

    # Visualization (Key Insights)
    if not topics_performance.empty:
        sns.barplot(x=topics_performance.index, y=topics_performance.values)
        plt.title("Average Score by Topic")
        plt.xticks(rotation=90)
        plt.show()




Error fetching Quiz Submission data: Mixing dicts with non-Series may lead to ambiguous ordering.




Quiz Data (First 5 rows):
                                               quiz
banner_id                                      None
coin_count                                       -1
correct_answer_marks                            4.0
created_at            2024-07-03T11:00:08.958+05:30
daily_date                         January 17, 2025
Quiz Submission Data is empty.

Historical Data (First 5 rows):
       id  quiz_id                       user_id  \
0  336497       51  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
1  336448        6  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
2  333330       51  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
3  333242        6  YcDFSO4ZukTJnnFMgRNVwZTE4j42   
4  329504       51  YcDFSO4ZukTJnnFMgRNVwZTE4j42   

                    submitted_at                     created_at  \
0  2025-01-17T15:30:18.027+05:30  2025-01-17T15:30:18.044+05:30   
1  2025-01-17T15:17:44.042+05:30  2025-01-17T15:17:44.056+05:30   
2  2025-01-16T20:13:19.682+05:30  2025-01-16T20:13:19.699+05:30   
3  2025-01-16T20:

