In [22]:
import requests
from bs4 import BeautifulSoup

class RequestError(Exception):
    """Custom exception for request handling errors."""
    def __init__(self, message, status_code=None):
        super().__init__(message)
        self.status_code = status_code

def make_get_request(url, headers=None):
    """Perform a GET request."""
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise HTTPError for bad requests
        return response
    except requests.exceptions.HTTPError as e:
        raise RequestError(f"HTTP Error: {response.status_code} - {response.text}", status_code=response.status_code)
    except requests.exceptions.RequestException as e:
        raise RequestError(f"Request Exception: {e}")

def make_post_request(url, data=None, json=None, headers=None):
    """Perform a POST request."""
    try:
        response = requests.post(url, data=data, json=json, headers=headers)
        response.raise_for_status()
        return response
    except requests.exceptions.HTTPError as e:
        raise RequestError(f"HTTP Error: {response.status_code} - {response.text}", status_code=response.status_code)
    except requests.exceptions.RequestException as e:
        raise RequestError(f"Request Exception: {e}")

def query_llm(text, endpoint="http://localhost:1234/v1/chat/completions", max_tokens=400):
    """Send a query to the local LLM and return the response with a token limit."""
    data = {"messages": [{"role": "user", "content": text}], "max_tokens": max_tokens}
    try:
        response = make_post_request(endpoint, json=data)
        return response.json().get('choices', [{}])[0].get('message', {}).get('content', "No content returned.").strip()
    except (KeyError, IndexError, RequestError) as e:
        return f"Error: {str(e)}"

def get_wiki_content(url, token_limit=4096):
    """Fetch and parse content from a Wikipedia page, truncate if it exceeds token limit."""
    try:
        response = make_get_request(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        content_text = ' '.join(p.get_text() for p in soup.find_all('p'))
        tokens = content_text.split()
        print(f"Wikipedia page token count: {len(tokens)}")  # Always output token count
        if len(tokens) > token_limit:
            # Truncate tokens to the token_limit
            content_text = ' '.join(tokens[:token_limit])
            print(f"Content truncated to {token_limit} tokens.")
        return content_text
    except RequestError as e:
        return f"Error: {str(e)}"


def summarize_wiki_article_with_limit(url, token_limit=4096, summary_word_limit=200):
    """Fetch, parse, and summarize a Wikipedia article using the local LLM, considering token limits."""
    try:
        article_content = get_wiki_content(url, token_limit=token_limit)
        if article_content.startswith("Error:"):
            return article_content
        summary_prompt = f"Summarize the following Wikipedia article content in about {summary_word_limit} words: {article_content}"
        summary = query_llm(summary_prompt)
        if not summary.startswith("Error:"):
            # Truncate the summary to ensure it does not exceed the word limit
            summary_words = summary.split()
            if len(summary_words) > summary_word_limit:
                summary = ' '.join(summary_words[:summary_word_limit])
        return summary
    except RequestError as e:
        return f"Error: {str(e)}"

def answer_question(question, url):
    """Get an answer to a question based on the Wikipedia article at the provided URL."""
    try:
        context = get_wiki_content(url)
        if context.startswith("Error:"):
            return context
        question_prompt = f"Given the context: {context}, answer the question: {question}"
        return query_llm(question_prompt)
    except RequestError as e:
        return f"Error: {str(e)}"

# Commented out for development purposes:
wiki_url = "https://en.wikipedia.org/wiki/Arnold_Bax"
summary = summarize_wiki_article_with_limit(wiki_url, token_limit=4096, summary_word_limit=200)
print("Summary:", summary)

print("\n")

specific_question = "Based on the text, provide a cohesive description, in about 100 words, of the countries the composer had affiliations with or citizenships in"
answer = answer_question(specific_question, wiki_url)
print("Answer:", answer)


Wikipedia page token count: 5824
Content truncated to 4096 tokens.
Summary: Arnold Bax (1882-1953) was an English composer who was active during the early 20th century. He is best known for his orchestral works, particularly his seven symphonies, which are considered some of the most important works of the period. Bax's music is characterized by its use of rich harmonies and complex textures, as well as its incorporation of elements from Celtic and other folk traditions. He was also known for his ability to evoke a sense of atmosphere and mood in his compositions. Bax was born in London and studied at the Royal College of Music, where he was exposed to a wide range of musical influences, including the works of Richard Strauss and Gustav Mahler. He later travelled to Germany, where he studied with Arnold Schoenberg and other prominent composers of the time. Bax's early works were heavily influenced by the Impressionist movement in music, but he later developed his own unique style that 