# Chat with Wikipedia - from Promptflow (a simplified version)

## Load the required package and prepare some global variables

In [None]:
import random
import time
import common
from jinja2 import Template
import bs4
from get_url import decode_str, get_page_sentence

client = common.get_openai_client()
deployment_name = "gpt"
chat_history = []

## Support function to render a Jinja2 template

In [None]:
def render_template(template_string, **kwargs) -> str:
    return Template(template_string).render(**kwargs)

## Execute an OpenAI call with a template

In [None]:
def Call_OpenAI(client, deployment_name, content, max_tokens=100, temperature=0.3):
    response = client.chat.completions.create(
        model=deployment_name,
        messages=[
            {"role": "assistant", "content": content},
        ],
        max_tokens=max_tokens,
        temperature=temperature    
    )
    return str(response.choices[0].message.content)

## Scrape the Microsoft Wikipedia information

In [None]:
URI = "https://en.wikipedia.org/wiki/Microsoft"

def fetch_text_content_from_url(url: str, count: int = 10):
    # Send a request to the URL
    try:
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.35"
        }
        delay = random.uniform(0, 0.5)
        time.sleep(delay)
        response = session.get(url, headers=headers)
        if response.status_code == 200:
            # Parse the HTML content using BeautifulSoup
            soup = bs4.BeautifulSoup(response.text, "html.parser")
            page_content = [p_ul.get_text().strip() for p_ul in soup.find_all("p") + soup.find_all("ul")]
            page = ""
            for content in page_content:
                if len(content.split(" ")) > 2:
                    page += decode_str(content)
                if not content.endswith("\n"):
                    page += "\n"
            text = get_page_sentence(page, count=count)
            return (url, text)
        else:
            msg = (
                f"Get url failed with status code {response.status_code}.\nURL: {url}\nResponse: "
                f"{response.text[:100]}"
            )
            print(msg)
            return (url, "No available content")

    except Exception as e:
        print("Get url failed with error: {}".format(e))
        return (url, "No available content")

context = fetch_text_content_from_url(URI)
context

## Prepare the Jinja2 template for the first time and print it

In [None]:
question="Where is Microsoft?"

template = """system:
You are a chatbot having a conversation with a human.
Given the following extracted parts of a long document and a question, create a final answer with references ("SOURCES").
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
ALWAYS return a "SOURCES" part in your answer.

{{contexts}}

{% for item in chat_history %}
user:
{{item.question}}
assistant:
{{item.answer}}
{% endfor %}

user:
{{question}}"""

content = render_template(template, contexts=context[1]+"\nSources:"+URI, chat_history=chat_history,question=question)
print(content)

## Submit the first question to OpenAI and add the question and answer to the history

In [None]:
ans = Call_OpenAI(client,deployment_name,content)
print(ans)
chat_history.append({"question": question, "answer": ans})

## Submit the second question to OpenAI and add the question and answer to the history

In [None]:
question="What are some products?"
content = render_template(template, contexts=context[1]+"\nSources:"+URI, chat_history=chat_history,question=question)

ans = Call_OpenAI(client,deployment_name,content)
print(ans)

chat_history.append({"question": question, "answer": response.choices[0].message.content})

## Print the chat_history

In [None]:
for item in chat_history:
    print(f'Question: {item["question"]}')
    print(f'Anwer: {item["answer"]}')