In [2]:
import os
import dotenv
from hugchat import hugchat
from hugchat.login import Login

In [3]:
dotenv.load_dotenv("./secrets.env")

True

In [4]:
EMAIL = os.environ['HUGGING_CHAT_USERNAME']
PASSWD = os.environ['HUGGING_CHAT_PASSWORD']
cookie_path_dir = "./cookies/" # NOTE: trailing slash (/) is required to avoid errors
sign = Login(EMAIL, PASSWD)
cookies = sign.login(cookie_dir_path=cookie_path_dir, save_cookies=True)

In [5]:
chatbot = hugchat.ChatBot(cookies=cookies.get_dict())

# print(chatbot.chat("Tell me about Alpha Centauri").wait_until_done())

In [7]:
models = chatbot.get_available_llm_models()
display([m.name for m in models])

['meta-llama/Llama-3.3-70B-Instruct',
 'Qwen/Qwen2.5-72B-Instruct',
 'CohereForAI/c4ai-command-r-plus-08-2024',
 'deepseek-ai/DeepSeek-R1-Distill-Qwen-32B',
 'nvidia/Llama-3.1-Nemotron-70B-Instruct-HF',
 'Qwen/QwQ-32B-Preview',
 'Qwen/Qwen2.5-Coder-32B-Instruct',
 'meta-llama/Llama-3.2-11B-Vision-Instruct',
 'NousResearch/Hermes-3-Llama-3.1-8B',
 'mistralai/Mistral-Nemo-Instruct-2407',
 'microsoft/Phi-3.5-mini-instruct']

In [8]:
chosen_model_name = 'NousResearch/Hermes-3-Llama-3.1-8B'
model_names = [m.name for m in models]
idx = model_names.index(chosen_model_name)

chatbot.switch_llm(idx)

True

In [9]:
content = ''
with open(os.path.join('../data', 'attention-is-all-you-need.txt'), 'r') as f:
    content = f.read()

In [19]:
prompt = "Read the following paper, don't summarize, set up 2 interesting questions with for and against premises for a debate.\n{}\n Don't forget to share 2 interesting questions for the debate setup with for and against positions".format(content)

for chunk in chatbot.chat(prompt):
    if chunk:
        print(chunk["token"], end='')

Here are 2 interesting questions for a debate setup with for and against positions related to the Transformer model:

**Question 1: Should the Transformer model be used as the primary architecture for all natural language processing tasks?**

* **For position:** The Transformer model has shown state-of-the-art results in many NLP tasks, including machine translation, text classification, and question answering. Its ability to handle long-range dependencies and parallelize computation makes it an ideal choice for many applications. Furthermore, the Transformer's modular design allows for easy modification and extension, making it a versatile architecture for a wide range of NLP tasks.
* **Against position:** While the Transformer model has achieved impressive results in some NLP tasks, it may not be the best choice for all tasks. For example, tasks that require strong sequential dependencies, such as language modeling or text generation, may be better suited for recurrent neural network

In [26]:
debate_text = """
Here are 2 interesting questions for a debate setup with for and against positions related to the Transformer model:

**Question 1: Should the Transformer model be used as the primary architecture for all natural language processing tasks?**

* **For position:** The Transformer model has shown state-of-the-art results in many NLP tasks, including machine translation, text classification, and question answering. Its ability to handle long-range dependencies and parallelize computation makes it an ideal choice for many applications. Furthermore, the Transformer's modular design allows for easy modification and extension, making it a versatile architecture for a wide range of NLP tasks.
* **Against position:** While the Transformer model has achieved impressive results in some NLP tasks, it may not be the best choice for all tasks. For example, tasks that require strong sequential dependencies, such as language modeling or text generation, may be better suited for recurrent neural network (RNN) architectures. Additionally, the Transformer's reliance on self-attention mechanisms can lead to computational inefficiencies and overfitting in some cases.

**Question 2: Is the Transformer model's reliance on self-attention mechanisms a significant limitation for its application in real-world NLP tasks?**

* **For position:** The Transformer model's self-attention mechanisms are a significant limitation for its application in real-world NLP tasks. Self-attention requires computing attention weights for all pairs of input elements, which can lead to quadratic computational complexity and make it difficult to apply the model to long sequences or large datasets. Furthermore, self-attention may not be able to capture certain types of dependencies, such as hierarchical or compositional relationships, which are important in many NLP tasks.
* **Against position:** The Transformer model's self-attention mechanisms are a key strength, allowing the model to capture complex dependencies and relationships between input elements. While it is true that self-attention can be computationally expensive, there are many techniques that can be used to mitigate this cost, such as using sparse attention patterns or approximating attention weights. Additionally, the Transformer's self-attention mechanisms have been shown to be highly effective in many NLP tasks, including machine translation, question answering, and text classification, and are likely to remain a key component of many NLP architectures in the future.
"""

def extract_topics(text):
    debate = {'1': {}, '2': {}}
    lines = text.split("\n")
    curr = 1
    for_pattern1 = '**For position:**'.lower()
    for_pattern2 = '**For:**'.lower()
    against_pattern1 = '**Against position:**'.lower()
    against_pattern2 = '**Against:**'.lower()
    ques_pattern = '**Question 1'.lower()
    for l in lines:
        if len(debate[str(curr)].keys()) == 3:
            curr += 1
        if ques_pattern in l.lower():
            idx = l.lower().find(ques_pattern)
            debate[str(curr)]['question'] = l[idx + len(ques_pattern) + 1:]
        if for_pattern1 in l.lower():
            idx = l.lower().find(for_pattern1)
            if curr == 1:
                debate[str(curr)]['for'] = l[idx + len(for_pattern1) + 1:]
            else:
                debate[str(curr)]['for'] = l[idx + len(for_pattern1) + 1:]
        elif for_pattern2 in l.lower():
            idx = l.lower().find(for_pattern1)
            if curr == 1:
                debate[str(curr)]['for'] = l[idx + len(for_pattern1) + 1:]
            else:
                debate[str(curr)]['for'] = l[idx + len(for_pattern1) + 1:]
        if against_pattern1 in l.lower():
            idx = l.lower().find(against_pattern1)
            if curr == 1:
                debate[str(curr)]['against'] = l[idx + len(against_pattern1) + 1:]
            else:
                debate[str(curr)]['against'] = l[idx + len(against_pattern1) + 1:]
        elif against_pattern2 in l.lower():
            idx = l.lower().find(against_pattern2)
            if curr == 1:
                debate[str(curr)]['against'] = l[idx + len(against_pattern2) + 1:]
            else:
                debate[str(curr)]['against'] = l[idx + len(against_pattern2) + 1:]
    return debate

In [27]:
extract_topics(debate_text)

{'1': {'question': ' Should the Transformer model be used as the primary architecture for all natural language processing tasks?**',
  'for': "The Transformer model has shown state-of-the-art results in many NLP tasks, including machine translation, text classification, and question answering. Its ability to handle long-range dependencies and parallelize computation makes it an ideal choice for many applications. Furthermore, the Transformer's modular design allows for easy modification and extension, making it a versatile architecture for a wide range of NLP tasks.",
  'against': "While the Transformer model has achieved impressive results in some NLP tasks, it may not be the best choice for all tasks. For example, tasks that require strong sequential dependencies, such as language modeling or text generation, may be better suited for recurrent neural network (RNN) architectures. Additionally, the Transformer's reliance on self-attention mechanisms can lead to computational inefficien