In [None]:
from otdb_fetcher import OTDBFetcher
from file_utils import save_questions

from categories import QuestionCategory
from question_type import QuestionType

categories = [QuestionCategory.HISTORY, QuestionCategory.GENERAL_KNOWLEDGE, QuestionCategory.SCIENCE_NATURE]
qtype = QuestionType.MULTIPLE_CHOICE

for category in categories:        
    questions = OTDBFetcher().fetch_all_questions_in_category(
        category=category,
        question_type=qtype
    )

    filename = f"OTDB_{category.name}.json"
    save_questions(questions, filename)

    print(f"Saved questions for {category.name} to {filename}.")

In [None]:
from collections import Counter
from file_utils import load_questions_from_file

files = ["OTDB_HISTORY.json", "OTDB_GENERAL_KNOWLEDGE.json", "OTDB_SCIENCE_NATURE.json"]

for file in files:
    questions = load_questions_from_file(file)
    difficulties = [q.difficulty for q in questions]
    counts = Counter(difficulties)
    print(f"Difficulty counts for {file}: {counts}")


In [None]:
from collections import Counter
from file_utils import load_questions_from_file
import matplotlib.pyplot as plt
import pandas as pd

# File list
files = ["OTDB_HISTORY.json", "OTDB_GENERAL_KNOWLEDGE.json", "OTDB_SCIENCE_NATURE.json"]

# Initialize a dictionary to hold difficulty counts for each category
difficulty_data = {"Category": [], "Easy": [], "Medium": [], "Hard": []}

# Process each file to count difficulties
for file in files:
    # Extract category name from file name
    category = file.split("_")[1].replace(".json", "").replace("_", " ")
    # Load questions and count difficulties
    questions = load_questions_from_file(file)
    difficulties = [q.difficulty for q in questions]
    counts = Counter(difficulties)
    
    # Append data to difficulty_data
    difficulty_data["Category"].append(category)
    difficulty_data["Easy"].append(counts.get("easy", 0))
    difficulty_data["Medium"].append(counts.get("medium", 0))
    difficulty_data["Hard"].append(counts.get("hard", 0))

# Convert to DataFrame
df = pd.DataFrame(difficulty_data)

# Set the index to Category for plotting
df.set_index("Category", inplace=True)

# Plotting the data as a stacked bar chart
df.plot(kind="bar", stacked=True, figsize=(10, 6))
plt.title("Difficulty Counts by Category")
plt.xlabel("Category")
plt.ylabel("Number of Questions")
plt.xticks(rotation=45)
plt.legend(title="Difficulty")
plt.tight_layout()
plt.show()