In [2]:
import ast

def get_function_calls_and_defs(filename):
    with open(filename, "r") as f:
        tree = ast.parse(f.read())

    # Get function definitions
    defined_functions = {node.name for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)}

    # Get function calls
    called_functions = {node.func.id for node in ast.walk(tree) if isinstance(node, ast.Call) and isinstance(node.func, ast.Name)}

    # Find unused functions
    unused_functions = defined_functions - called_functions

    return defined_functions, called_functions, unused_functions

# Run analysis on converted notebook script
defined, called, unused = get_function_calls_and_defs("solution 3.py")

print("Defined functions:", defined)
print("Called functions:", called)
print("Unused functions (safe to remove):", unused)


Defined functions: {'extract_text_from_file', 'evaluate_user_recommendations', 'create_user_profiles', 'simulate_user_recommendations', 'calculate_profile_similarity', 'calculate_term_frequencies', 'create_diverse_test_queries', 'determine_relevance', 'preprocess_text', 'preprocess_documents', 'search', 'calculate_precision_recall_at_k', 'calculate_precision_recall_curve', 'calculate_average_precision', 'personalized_ranking', 'load_documents', 'calculate_r_precision', 'comprehensive_evaluation', 'build_inverted_index', 'visualize_evaluation_results', 'generate_relevance_judgments', 'calculate_11point_interpolated_precision', 'main_improved', 'calculate_tfidf', 'display_inverted_index'}
Called functions: {'extract_text_from_file', 'evaluate_user_recommendations', 'create_user_profiles', 'simulate_user_recommendations', 'calculate_profile_similarity', 'PorterStemmer', 'calculate_term_frequencies', 'create_diverse_test_queries', 'determine_relevance', 'preprocess_text', 'preprocess_docum

In [1]:
import os
from bs4 import BeautifulSoup

# Base directory where HTML files are stored
base_dir = "bbc_articles_html_backup"

# Output list for markdown content
md_output = []

# Traverse each topic folder
for topic in os.listdir(base_dir):
    topic_path = os.path.join(base_dir, topic)

    # Ensure it's a directory
    if os.path.isdir(topic_path):
        md_output.append(f"### {topic.capitalize()}")  # Topic as Markdown header

        # Process each HTML file in the topic directory
        for file_name in sorted(os.listdir(topic_path)):
            if file_name.endswith(".html"):
                file_path = os.path.join(topic_path, file_name)

                with open(file_path, "r", encoding="utf-8") as file:
                    soup = BeautifulSoup(file, "html.parser")

                    # Extract the title
                    title_tag = soup.find("title")
                    title = title_tag.text.strip() if title_tag else "Untitled"

                    # Extract the URL from meta tag
                    meta_tag = soup.find("meta", attrs={"name": "apple-itunes-app"})
                    content = meta_tag.get("content", "") if meta_tag else ""
                    url = content.split("app-argument=")[-1] if "app-argument=" in content else "No URL"

                    # Append formatted markdown output
                    md_output.append(f"- **[{title}]({url})**")

# Save the output as a Markdown file
output_file = "bbc_articles_urls.md"
with open(output_file, "w", encoding="utf-8") as md_file:
    md_file.write("\n".join(md_output))

print(f"Markdown file '{output_file}' generated successfully!")


Markdown file 'bbc_articles_urls.md' generated successfully!


In [4]:
import re

def convert_bullets_to_numbered(md_file_path, output_file_path):
    """
    Converts a bullet-point markdown list to a numbered markdown list.

    Args:
        md_file_path (str): Path to the input markdown file with bullets.
        output_file_path (str): Path to save the numbered markdown file.
    """
    with open(md_file_path, "r", encoding="utf-8") as file:
        lines = file.readlines()

    topic_count = 0  # Main section counter
    sub_count = 1  # Sub-item counter
    converted_lines = []

    for line in lines:
        # If it's a topic header (### Topic)
        if line.startswith("### "):
            topic_count += 1
            sub_count = 1  # Reset sub-item numbering
            converted_lines.append(f"{topic_count}. {line[4:].strip()}\n\n")  # Convert topic to numbered format

        # If it's a bullet point (starts with "- ")
        elif re.match(r"^\s*- ", line):
            converted_lines.append(f"   {topic_count}.{sub_count} {line.strip()[2:]}\n\n")  # Convert sub-item
            sub_count += 1  # Increment numbering

        else:
            converted_lines.append(line)  # Keep other lines unchanged

    # Save the converted Markdown file
    with open(output_file_path, "w", encoding="utf-8") as file:
        file.writelines(converted_lines)

    print(f"Converted markdown saved to {output_file_path}")


# Example Usage
convert_bullets_to_numbered("bbc_articles_urls.md", "bbc_articles_numbered.md")


Converted markdown saved to bbc_articles_numbered.md
