In [1]:
!pip install openai==0.28
!pip install beautifulsoup4
import openai, os, torch, re, requests, spacy, json
import torch.nn.functional as F
import numpy as np
from google.colab import drive
from transformers import BertTokenizer, AutoModelForSequenceClassification, AutoModel, RobertaTokenizer, RobertaModel
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, DebertaForSequenceClassification, DebertaTokenizer
from transformers import AlbertTokenizer, AlbertForSequenceClassification
from scipy.spatial.distance import cosine
from torch import nn
from bs4 import BeautifulSoup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
drive.mount('/content/drive')

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
Successfully installed openai-0.28.0
Mounted at /content/drive


In [55]:
# Hyperparameters
threshold = 0.65
# Question Example usage
question = "Which AI companies are worth investing in?"
num_pyramids = 3

# Bing API & OpenAI API
bing_api_key = ''
openai.api_key = ''

In [40]:
class ContrastiveRoberta(nn.Module):
    def __init__(self, freeze_layers=True):
        super().__init__()
        self.roberta = RobertaModel.from_pretrained('roberta-base')

        if freeze_layers:
            # freeze all layers
            for param in self.roberta.parameters():
                param.requires_grad = False

            # unfreeze the last 3 layers
            for param in self.roberta.encoder.layer[-3:].parameters():
                param.requires_grad = True

    def forward(self, arg_tokens, pos_tokens, neg_tokens):

        # extract the embeddings for [CLS] token of the argument, positive, and negative evidences
        arg_emb = self.roberta(**arg_tokens).last_hidden_state[:, 0, :]
        pos_emb = self.roberta(**pos_tokens).last_hidden_state[:, 0, :]

        batch_size, num_negatives, seq_length = neg_tokens['input_ids'].shape

        # flatten 3D tensor for neg_evidences --> (batch_size * num_negatives, seq_length)
        neg_input_ids = neg_tokens['input_ids'].view(-1, seq_length)
        neg_attention_mask = neg_tokens['attention_mask'].view(-1, seq_length)

        # extract the embeddings for [CLS] token and reshape back
        neg_embs = self.roberta(input_ids=neg_input_ids, attention_mask=neg_attention_mask).last_hidden_state[:, 0, :].view(batch_size, num_negatives, -1)

        return arg_emb, pos_emb, neg_embs


class SupportScoreModel(nn.Module):
    def __init__(self, contrastive_model):
        super().__init__()
        self.contrastive_model = contrastive_model

    def forward(self, arg_tokens, evidence_tokens):
        arg_emb = self.contrastive_model.roberta(**arg_tokens).last_hidden_state[:, 0, :]
        evidence_emb = self.contrastive_model.roberta(**evidence_tokens).last_hidden_state[:, 0, :]

        # Compute cosine similarity as the support score
        support_score = F.cosine_similarity(arg_emb, evidence_emb, dim=-1)

        return support_score

# Load Model1 BERT Model(relevance)
tokenizer1 = BertTokenizer.from_pretrained('bert-base-uncased')
model_path1 = '/content/drive/My Drive/NLP Project/checkpoint-MACEP'
model1 = AutoModelForSequenceClassification.from_pretrained(model_path1)
model1 = model1.to(device)

# Load Model2 RoBERTa Model(support)
tokenizer2 = RobertaTokenizer.from_pretrained('roberta-base')
contrastive_model = ContrastiveRoberta()
model_path = '/content/drive/My Drive/NLP Project/sup_model/best_model.pt'
contrastive_model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
support_model = SupportScoreModel(contrastive_model)
support_model = support_model.to(device)

# Load Model3 ALBERT Model(coherence)
model_path3 = '/content/drive/My Drive/NLP Project/CoUDA/models'
tokenizer3 = AlbertTokenizer.from_pretrained(model_path3)
model3 = AlbertForSequenceClassification.from_pretrained(model_path3)
model3 = model3.to(device)

model3.eval()

# Load Model4 DeBERTa Model(completeness)
model_path4 = '/content/drive/My Drive/NLP Project/sufficiency/completeness_model_param'
tokenizer4 = DebertaTokenizer.from_pretrained(model_path4)
model4 = DebertaForSequenceClassification.from_pretrained(model_path4)
model4 = model4.to(device)


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [41]:
def fetch_documents(query, api_key):
    """Use the Bing Search API to get documents related to a query"""
    endpoint = 'https://api.bing.microsoft.com/v7.0/search'
    headers = {'Ocp-Apim-Subscription-Key': api_key}
    params = {'q': query, 'textDecorations': True, 'textFormat': 'HTML'}

    response = requests.get(endpoint, headers=headers, params=params)
    if response.status_code == 200:
        return response.json()['webPages']['value']
    else:
        print(f"Request failed with status code：{response.status_code}")
        return []


def download_webpage(url):
    try:
        response = requests.get(url, timeout=5)
        response.raise_for_status()
        return response.text
    except requests.RequestException as e:
        return None


def extract_evidence_from_web(question, api_key):
    # Load the Flan-T5-Large model
    model5 = "chentong00/propositionizer-wiki-flan-t5-large"
    tokenizer5 = AutoTokenizer.from_pretrained(model5)
    model5 = AutoModelForSeq2SeqLM.from_pretrained(model5).to(device)

    # Fetch documents from Bing Search API
    documents = fetch_documents(question, bing_api_key)
    if not documents:
        print("No documents found.")
        return

    # Process each document
    for doc in documents:
        url = doc['url']
        title = doc['name']
        print(f"Title: {title}")
        print(f"URL: {url}")
        # Download the full webpage content
        html_content = download_webpage(url)
        if not html_content:
            continue

        # Extract the main content from the HTML
        main_content = extract_main_content(html_content)
        #print(f"Main Content Extracted: {main_content[:500]}...")  # Show first 500 characters of content

        # Extract propositions, including the query in the input text
        propositions = extract_propositions(question, main_content, model5, tokenizer5)
        #print(f"Propositions: {json.dumps(propositions, indent=2)}\n")

    return propositions

def extract_main_content(html_content):
    """Extract the main article content from HTML using BeautifulSoup"""
    soup = BeautifulSoup(html_content, 'html.parser')
    # Assume the main content is within <article> tags or the most common <p> tags within a div
    article = soup.find('article') or soup.find('div', {'class': 'article'})
    if not article:
        article = soup  # If specific tags not found, fallback to entire soup
    text = article.get_text(separator=' ', strip=True)
    return text

def extract_propositions(question, text, model, tokenizer):
    """Extract propositions from text using a pre-trained model and considering the query"""
    input_text = f"Title: {question}. Section: . Content: {text}"
    #input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)
    input_ids = tokenizer(
        input_text,
        return_tensors="pt",
        truncation=True,
        max_length=512
    ).input_ids.to(device)

    outputs = model.generate(input_ids, max_new_tokens=512)
    output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    try:
        prop_list = json.loads(output_text)
    except json.JSONDecodeError:
        prop_list = [output_text]  # Fallback to raw output if JSON parsing fails
    return prop_list

In [53]:
def parse_response(response):
    parsed_response = {}
    arguments = []

    # match Claim
    claim_match = re.search(r'\*\*Claim:\*\*(.*?)\n', response, re.S)
    if claim_match:
        parsed_response['claim'] = claim_match.group(1).strip()

    # Matching Arguments and their evidence.
    # Adjust the regular expression to match one or two asterisks, with the colon optionally followed by a space
    argument_matches = re.findall(r'\*{1,2}Argument \d+:\*{0,2} ?(.*?)(?=\*\*|$)(.*?)(?=\n\s*\n|\Z)', response, re.S)
    for argument_match in argument_matches:
        argument = {'title': argument_match[0].strip(), 'evidence': []}
        evidence_matches = re.findall(r'- \*{0,2}Evidence \d+:\*{0,2} (.*?)($|\n)', argument_match[1], re.S)
        for evidence in evidence_matches:
            argument['evidence'].append(evidence[0].strip())
        arguments.append(argument)

    parsed_response['arguments'] = arguments
    return parsed_response


def calculate_claim_argument_quality(claim, arguments, tokenizer1, model1, device):
    relevance_scores = []
    for argument in arguments:
        input_text = claim + " [SEP] " + argument['title']
        inputs = tokenizer1(input_text, return_tensors="pt", padding="max_length", truncation=True, max_length=512)
        inputs = {k: v.to(device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = model1(**inputs)
            logits = outputs.logits
            logits = torch.clamp(logits, min=0, max=1)
            predicted_quality_score = logits.squeeze().item()
            relevance_scores.append(predicted_quality_score)

    return relevance_scores

def calculate_argument_evidence_quality(arguments, tokenizer2, support_model, device):
    support_scores = []
    for argument in arguments:
        evidence_scores = []
        for evidence in argument['evidence']:
            arg_tokens = tokenizer2(argument['title'], return_tensors='pt', padding="max_length", truncation=True, max_length=512)
            evidence_tokens = tokenizer2(evidence, return_tensors='pt', padding="max_length", truncation=True, max_length=512)
            arg_tokens = {k: v.to(device) for k, v in arg_tokens.items()}
            evidence_tokens = {k: v.to(device) for k, v in evidence_tokens.items()}

            with torch.no_grad():
                support_score = support_model(arg_tokens, evidence_tokens)
                evidence_scores.append(support_score.item())
        support_scores.append(evidence_scores)

    return support_scores



def calculate_coh_scores(claim, arguments, tokenizer3, model3, device):
    text_blocks = []

    # First block: claim, followed by a combination of each argument and all its evidences
    first_block = [claim]
    for arg in arguments:
        argument_title = arg['title']
        evidences_text = " ".join(arg['evidence'])
        first_block.append(argument_title + " " + evidences_text)
    text_blocks.append(" ".join(first_block))

    # Second block: claim and all arguments title combinations
    all_arguments_titles = " ".join([arg['title'] for arg in arguments])
    text_blocks.append(claim + " " + all_arguments_titles)

    # Subsequent blocks: Each argument and all its evidences form a separate block.
    for arg in arguments:
        argument_title = arg['title']
        evidences_text = " ".join(arg['evidence'])
        text_blocks.append(argument_title + " " + evidences_text)

    # Encode and compute scores
    encoding = tokenizer3(text_blocks, return_tensors="pt", padding=True, truncation=True).to(device)
    with torch.no_grad():
        outputs = model3(**encoding)
        logits = torch.softmax(outputs.logits, dim=-1)

        # Global score: the score of the first block
        global_score = logits[0][1].item()
        # Local scores: scores of the remaining blocks
        local_scores = [logits[i][1].item() for i in range(1, len(logits))]

    return global_score, local_scores

def calculate_completeness(claim, arguments, tokenizer, model, device):
    # Tokenize the input text
    argument_list = []
    for argument in arguments:
        argument_list.append(argument['title'])

    arguments_string = " ".join(argument_list)
    inputs = tokenizer(claim, arguments_string, return_tensors="pt", truncation=True, padding=True, max_length=512)
    #device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    inputs = {key: val.to(device) for key, val in inputs.items()}

    # Make predictions
    with torch.no_grad():
        outputs = model(**inputs)

    # Get the predicted probabilities
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)

    # Probability of being sufficient (class 1)
    completeness_score = probs[0][1].item()

    return completeness_score


def safe_average(scores):
    if scores:
        # Convert NaN to 0 using np.nan_to_num
        clean_scores = np.nan_to_num(scores)
        return np.mean(clean_scores)
    else:
        return 0.0

def safe_convert_to_numeric(scores):
    # Convert a non-number to NaN and replace it with a 0
    numeric_scores = [np.nan if not isinstance(score, (int, float)) else score for score in scores]
    return np.nan_to_num(numeric_scores)

def calculate(argument_pyramid):
    parsed_response = parse_response(argument_pyramid)
    claim = parsed_response['claim']
    arguments = parsed_response['arguments']

    relevance_scores = calculate_claim_argument_quality(claim, arguments, tokenizer1, model1, device)
    print(f"\nRelevance_scores: {relevance_scores}")

    support_scores = calculate_argument_evidence_quality(arguments, tokenizer2, support_model, device)
    print(f"Support_scores: {support_scores}")

    global_score, local_scores = calculate_coh_scores(claim, arguments, tokenizer3, model3, device)
    print("Global Coherence Score:", global_score)
    print("Local Coherence Scores:", local_scores)

    relevance_score = safe_average(relevance_scores)
    #relevance_score = safe_convert_to_numeric(relevance_scores)
    #support_score = safe_convert_to_numeric(support_scores)
    support_score = np.nanmean(support_scores)

    lambda_val = 0.2

    num_local_scores = len(local_scores)
    weights = [0.4] + [(0.6 / (num_local_scores - 1)) for _ in range(num_local_scores - 1)]
    weighted_local_score = np.average(local_scores, weights=weights)
    coh_score = global_score * (1 - lambda_val) + weighted_local_score * lambda_val

    # Generate questions
    questions = generate_questions(claim)

    # Check answers
    compl_scores = check_complet(questions, arguments)
    completeness_score_2 = sum(compl_scores) / len(questions) if questions else 0
    completeness_score_2 = np.nanmean(np.nan_to_num(compl_scores))
    completeness_score_1 = calculate_completeness(claim, arguments, tokenizer4, model4, device)

    completeness_score = completeness_score_1 * 0.5 + completeness_score_2 * 0.5


    # Print each question and its score
    for question, score in zip(questions, compl_scores):
        print(f"Question: {question}\nScore: {score}\n")

    # Print the overall completeness score
    #print(f"Overall completeness score: {completeness_score:.2f}")
    #print(f"Relevance_score: {relevance_score}")
    #print(f"Support_score: {support_score}")
    #print("Coherence Score:", coh_score)
    print(f"""
    +-------------------------------------+
    | Overall completeness score: {completeness_score:.3f}
    | Relevance score: {relevance_score:.3f}
    | Support score: {support_score:.3f}
    | Coherence Score: {coh_score:.3f}
    +-------------------------------------+
    """)
    #total_score = alpha * relevance_score + beta * support_score + gamma * coh_score + completeness_score
    #print("Total Argument—Pyramid Score:", total_score)

    return relevance_score, support_score, coh_score, completeness_score


def generate_questions(claim):
    # Construct a prompt that encourages the generation of comprehensive questions
    prompt = f"""
    Considering the claim '{claim}', generate a list of detailed questions that cover various critical aspects necessary to assess the validity and completeness of supporting arguments. Focus on areas such as:
    - General Understanding
    - Context and Relevance
    - Assumptions and Implications
    - Counterarguments and Criticisms
    - Specific Aspects and Details
    Each question should invite a deep analysis of the arguments.
    """
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        #model="gpt-4",
        messages=[{"role": "system", "content": "You are a helpful assistant."},
                  {"role": "user", "content": prompt}]
    )
    questions = response['choices'][0]['message']['content'].strip().split('\n')
    return [q.strip() for q in questions if q.strip() != '']


def check_complet(questions, arguments):
    scores = []
    arguments_text = " ".join([arg['title'] + " " + " ".join(arg['evidence']) for arg in arguments])

    for question in questions:
        prompt = f"""
        Assess the response based on how well the provided arguments address this question:
        '{question}'
        Arguments: {arguments_text}
        Score the response as follows:
        - 1.0 for fully addressed and well supported
        - 0.75 for mostly addressed but some details missing
        - 0.5 for adequately addressed but lacking depth
        - 0.25 for minimally addressed with very little detail
        - 0.0 for not addressed at all
        """
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            #model="gpt-4",
            messages=[{"role": "system", "content": "You are a helpful assistant."},
                      {"role": "user", "content": prompt}]
        )
        answer = response['choices'][0]['message']['content'].lower()

        if "1.0" in answer:
            scores.append(1.0)
        elif "0.75" in answer:
            scores.append(0.75)
        elif "0.5" in answer:
            scores.append(0.5)
        elif "0.25" in answer:
            scores.append(0.25)
        else:
            scores.append(0.0)

    return scores

In [51]:
def generate_argument_pyramid(question, contents, openai_api_key):
    """Generating Pyramid with GPT-API"""
    openai.api_key = openai_api_key

    # Converts a list of contents into a single string, each separated by two newlines
    formatted_contents = "\n\n".join(contents)
    question = question

    # Defining Prompt Messages
    messages = [
        {"role": "system", "content": "You are a skilled debater and expert in constructing logical arguments."},
        {"role": "user", "content": f"""
        Create an argument pyramid with the following structure based on the evidences provided: "{formatted_contents}".

        1. **Claim:** Provide a claim or thesis statement that directly answers or addresses the question: "{question}", ensuring it is in line with how the question is posed (e.g., using 'should' if the question does).

        2. **Arguments:** Provide at least five complete sentences as arguments supporting the claim, and include evidence for each argument. Ensure each argument is clearly stated and distinct.

        3. **Evidence:** For each argument, provide at least four pieces of evidence to support it. The evidence should be credible, varied (e.g., statistics, expert opinions, real-world examples), and directly relevant to the argument.

        Ensure that the structure follows this format:

        - **Claim:** [Your claim here]
          - **Argument 1: [Argument]**
            - Evidence 1: [Detail of the evidence]
            - Evidence 2: [Detail of the evidence]
            - Evidence 3: [Detail of the evidence]
            - Evidence 4: [Detail of the evidence]
          - **Argument 2: [Another Argument]**
            - Evidence 1: [Detail of the evidence]
            - Evidence 2: [Detail of the evidence]
            - Evidence ...
          - **Argument 3**
            - Evidence 1
            - Evidence 2
            - Evidence ...
          - **Argument ...**
            - Evidence 1
            - Evidence 2
            - Evidence ...

        Please use clear and logical reasoning throughout.
        """}
    ]

    # Send Request
    response = openai.ChatCompletion.create(
        #model="gpt-3.5-turbo",
        model="gpt-4",
        messages=messages,
        max_tokens=1500,
        n=1,
        temperature=0.7,
    )

    # Extracting response content
    result = response.choices[0].message['content'].strip()

    return result


def improve_argument_pyramid(pyramid, contents, prompt, openai_api_key):
    """Improving Pyramid with GPT"""
    openai.api_key = openai_api_key

    formatted_contents = "\n\n".join(contents)

    prompt = prompt.format(
        pyramid=pyramid,
        contents=formatted_contents
    )

    messages = [
        {"role": "system", "content": "You are a skilled debater and expert in constructing logical arguments."},
        {"role": "user", "content": prompt}
    ]

    response = openai.ChatCompletion.create(
        #model="gpt-3.5-turbo",
        model="gpt-4",
        messages=messages,
        max_tokens=1500,
        n=1,
        temperature=0.7,
    )

    # Extracting response content
    result = response.choices[0].message['content'].strip()

    return result

####
prompt_relevance = """
Based on the following evidences:
{contents}

Improve this Argument Pyramid:
{pyramid}

Your task is to enhance the relevance between the claim and each of the arguments. Examine and strengthen the connections to ensure each argument is robustly supported by the evidence provided. The goal is to improve the logical flow and ensure that each argument directly relates to and supports the claim, making the entire pyramid more coherent and closely aligned with the given evidences.

1. **Claim:** Provide a claim or thesis statement that directly answers or addresses the question, ensuring it is in line with how the question is posed (e.g., using 'should' if the question does).

2. **Arguments:** Provide at least five complete sentences as arguments supporting the claim, and include evidence for each argument. Ensure each argument is clearly stated and distinct.

3. **Evidence:** For each argument, provide at least four pieces of evidence to support it. The evidence should be credible, varied (e.g., statistics, expert opinions, real-world examples), and directly relevant to the argument.

Ensure that the structure follows this format:

        - **Claim:** [Your claim here]
          - **Argument 1: [Argument]**
            - Evidence 1: [Detail of the evidence]
            - Evidence 2: [Detail of the evidence]
            - Evidence 3: [Detail of the evidence]
            - Evidence 4: [Detail of the evidence]
          - **Argument 2: [Another Argument]**
            - Evidence 1: [Detail of the evidence]
            - Evidence 2: [Detail of the evidence]
            - Evidence ...
          - **Argument 3**
            - Evidence 1
            - Evidence 2
            - Evidence ...
          - **Argument ...**
            - Evidence 1
            - Evidence 2
            - Evidence ...
"""


####
prompt_support = """
Based on the following evidences:
{contents}

Improve this Argument Pyramid:
{pyramid}

Your task is to enhance the support between each argument and its corresponding evidences. Examine the connections and assess how well the evidence underpins each argument. The goal is to ensure that each argument is strongly backed by the evidence provided, enhancing the overall persuasive power and credibility of the pyramid.

1. **Claim:** Provide a claim or thesis statement that directly answers or addresses the question, ensuring it is in line with how the question is posed (e.g., using 'should' if the question does).

2. **Arguments:** Provide at least five complete sentences as arguments supporting the claim, and include evidence for each argument. Ensure each argument is clearly stated and distinct.

3. **Evidence:** For each argument, provide at least four pieces of evidence to support it. The evidence should be credible, varied (e.g., statistics, expert opinions, real-world examples), and directly relevant to the argument.

Ensure that the structure follows this format:

- **Claim:** [Your claim here]
  - **Argument 1: [Argument]**
    - Evidence 1: [Detail of the evidence]
    - Evidence 2: [Detail of the evidence]
    - Evidence 3: [Detail of the evidence]
    - Evidence 4: [Detail of the evidence]
  - **Argument 2: [Another Argument]**
    - Evidence 1: [Detail of the evidence]
    - Evidence 2: [Detail of the evidence]
    - Evidence ...
  - **Argument 3**
    - Evidence 1
    - Evidence 2
    - Evidence ...
  - **Argument ...**
    - Evidence 1
    - Evidence 2
    - Evidence ...

This modification focuses specifically on enhancing how well the evidence supports the arguments, ensuring that each piece of evidence is not only relevant but effectively strengthens the argument it supports.
"""

####
prompt_coherence = """
Based on the following evidences:
{contents}

Improve this Argument Pyramid:
{pyramid}

Your task is to enhance the logical coherence of the arguments within the pyramid. Assess how each argument logically connects to and supports the main claim. The goal is to ensure that all arguments work together in a coherent and logically consistent manner, collectively strengthening the main claim.

1. **Claim:** Provide a claim or thesis statement that directly answers or addresses the question, ensuring it is in line with how the question is posed (e.g., using 'should' if the question does).

2. **Arguments:** Provide at least five complete sentences as arguments supporting the claim. Ensure each argument is clearly stated and distinct, and logically follows from the claim and from each other.

3. **Evidence:** For each argument, provide at least four pieces of evidence. The evidence should be credible, varied (e.g., statistics, expert opinions, real-world examples), and logically relevant to the argument it supports.

Ensure that the structure follows this format:

- **Claim:** [Your claim here]
  - **Argument 1: [Argument]**
    - Evidence 1: [Detail of the evidence]
    - Evidence 2: [Detail of the evidence]
    - Evidence 3: [Detail of the evidence]
    - Evidence 4: [Detail of the evidence]
  - **Argument 2: [Another Argument]**
    - Evidence 1: [Detail of the evidence]
    - Evidence 2: [Detail of the evidence]
    - Evidence ...
  - **Argument 3**
    - Evidence 1
    - Evidence 2
    - Evidence ...
  - **Argument ...**
    - Evidence 1
    - Evidence 2
    - Evidence ...

This modified prompt focuses on assessing and improving how each argument not only supports the main claim but also aligns and interconnects with other arguments, forming a seamless logical progression that enhances the overall persuasiveness of the pyramid.
"""
####
prompt_completeness = """
Based on the following evidences:
{contents}

Improve this Argument Pyramid:
{pyramid}

Your task is to enhance the completeness of the argument pyramid. Review the structure to ensure it covers all relevant aspects of the issue at hand. Assess whether the pyramid adequately addresses counterarguments and includes a diverse range of evidence. The goal is to ensure that no significant aspect related to the claim is overlooked and that each argument is well-rounded and fully developed.

1. **Claim:** Provide a claim or thesis statement that directly answers or addresses the question, ensuring it is comprehensive and fully reflective of the issue.

2. **Arguments:** Provide at least five complete sentences as arguments supporting the claim. Ensure each argument is clearly stated, distinct, and encompasses a wide range of perspectives and evidence.

3. **Evidence:** For each argument, provide at least four pieces of evidence. The evidence should be credible, varied (e.g., statistics, expert opinions, real-world examples), and cover different dimensions relevant to the argument.

Ensure that the structure follows this format:

- **Claim:** [Your claim here]
  - **Argument 1: [Argument]**
    - Evidence 1: [Detail of the evidence]
    - Evidence 2: [Detail of the evidence]
    - Evidence 3: [Detail of the evidence]
    - Evidence 4: [Detail of the evidence]
  - **Argument 2: [Another Argument]**
    - Evidence 1: [Detail of the evidence]
    - Evidence 2: [Detail of the evidence]
    - Evidence ...
  - **Argument 3**
    - Evidence 1
    - Evidence 2
    - Evidence ...
  - **Argument ...**
    - Evidence 1
    - Evidence 2
    - Evidence ...

This prompt encourages you to ensure that the pyramid not only supports the claim but also fully explores the topic, addresses potential objections, and includes a comprehensive range of supporting details. This will make the entire pyramid more robust and complete, effectively strengthening its persuasive power and validity.
"""

def generate_alternative_pyramid(question, contents, openai_api_key, existing_pyramid):
    """Generate an alternative argument pyramid with a different or opposite claim"""
    openai.api_key = openai_api_key

    # Converts a list of contents into a single string, each separated by two newlines
    formatted_contents = "\n\n".join(contents)

    # Define a prompt that explicitly asks for a different or opposite perspective
    messages = [
        {"role": "system", "content": "You are a skilled debater and expert in constructing logical arguments that consider various perspectives."},
        {"role": "user", "content": f"""
        Based on the following evidences: "{formatted_contents}"

        We previously discussed this claim in our argument pyramid:
        "{existing_pyramid}"

        Now, create a new argument pyramid with a claim that presents a completely different or opposite perspective to the original. Ensure the new claim that directly answers or addresses the question: "{question}".

        1. **Claim:** Propose a new claim that contrasts with or opposes the original.
        2. **Arguments:** Provide at least five complete sentences as arguments supporting the new claim, and include evidence for each argument. Ensure each argument is clearly distinct from those in the original pyramid.
        3. **Evidence:** For each new argument, provide at least four pieces of evidence. The evidence should be credible, varied (e.g., statistics, expert opinions, real-world examples), and support the new, contrasting arguments.

        Ensure that the structure follows this format:

        - **Claim:** [Your new claim here]
          - **Argument 1: [New Argument]**
            - Evidence 1: [Detail of the evidence]
            - Evidence 2: [Detail of the evidence]
            - Evidence 3: [Detail of the evidence]
            - Evidence 4: [Detail of the evidence]
          - **Argument 2: [Another New Argument]**
            - Evidence 1: [Detail of the evidence]
            - Evidence 2: [Detail of the evidence]
            - Evidence ...
          - **Argument 3**
            - Evidence 1
            - Evidence 2
            - Evidence ...
          - **Argument ...**
            - Evidence 1
            - Evidence 2
            - Evidence ...

        Use clear and logical reasoning to establish a compelling alternative perspective.
        """}
    ]

    # Send request to OpenAI
    response = openai.ChatCompletion.create(
        #model="gpt-3.5-turbo",
        model="gpt-4",
        messages=messages,
        max_tokens=1500,
        n=1,
        temperature=0.7,
    )

    # Extracting response content
    result = response.choices[0].message['content'].strip()

    return result



In [58]:
def generate_multiple_pyramids(question, threshold, num_pyramids):
    documents = extract_evidence_from_web(question, bing_api_key)
    best_pyramid = None
    best_score = -float('inf')
    existing_pyramids = []

    for i in range(num_pyramids):
        #print(f"Generating pyramid {i+1}/{num_pyramids}...")
        print(f"\nGenerating pyramid {i+1}/{num_pyramids}...\n")
        if i == 0:
            # Use the standard generation method for the first pyramid
            pyramid, total_score = generate_and_evaluate(question, documents, threshold)
        else:
            # Use the alternative generation method for subsequent pyramids
            pyramid, total_score = generate_and_evaluate_alter(question, documents, openai.api_key, existing_pyramids)

        if pyramid:
            #print(pyramid)
            existing_pyramids.append(pyramid)
            if total_score > best_score:
                best_score = total_score
                best_pyramid = pyramid
                #print(f"New best pyramid with score {total_score}:\n{pyramid}\n")

    if best_pyramid:
        print("\n=======================================================")
        print(f"Best pyramid with total score {best_score}:\n{best_pyramid}")
        print("=======================================================")
    else:
        print("Failed to generate a satisfactory pyramid.")

    return best_pyramid

def generate_and_evaluate(question, documents, threshold):
    attempt = 0
    while True:
        pyramid = generate_argument_pyramid(question, documents, openai.api_key)
        print(pyramid)

        relevance_score, support_score, coh_score, completeness_score = calculate(pyramid)
        scores = {
            'relevance': relevance_score,
            'support': support_score,
            'coherence': coh_score,
            'completeness': completeness_score
        }
        min_score_category, min_score = min(scores.items(), key=lambda item: item[1])
        total_score = sum(scores.values())

        if min_score >= threshold:
            print("--------------------------------------------------------")
            print(f"Successful pyramid generated with total score {total_score}")
            print(pyramid)
            print("--------------------------------------------------------")
            return pyramid, total_score
        elif min_score < 0.6:
            attempt += 1
            print(f"Attempt {attempt}: Minimum score of {min_score_category} is below 0.6, re-generating the pyramid.")
            continue  # Re-generate the pyramid if the score is too low
        else:
            attempt += 1
            print(f"Attempt {attempt}: Improving {min_score_category} (current score: {min_score}).")
            prompt = select_prompt(min_score_category)
            pyramid = improve_argument_pyramid(pyramid, documents, prompt, openai.api_key)

def generate_and_evaluate_alter(question, documents, openai_api_key, existing_pyramid):
    attempt = 0
    while True:
        pyramid = generate_alternative_pyramid(question, documents, openai_api_key, existing_pyramid)
        print(pyramid)

        relevance_score, support_score, coh_score, completeness_score = calculate(pyramid)
        scores = {
            'relevance': relevance_score,
            'support': support_score,
            'coherence': coh_score,
            'completeness': completeness_score
        }
        min_score_category, min_score = min(scores.items(), key=lambda item: item[1])
        total_score = sum(scores.values())

        if min_score >= threshold:
            print("--------------------------------------------------------")
            print(f"Successful pyramid generated with total score {total_score}")
            print(pyramid)
            print("--------------------------------------------------------")
            return pyramid, total_score
        elif min_score < 0.55:
            attempt += 1
            print(f"Attempt {attempt}: Minimum score {min_score_category} is below 0.55, re-generating the pyramid.")
            continue  # Re-generate the pyramid if the score is too low
        else:
            attempt += 1
            print(f"Attempt {attempt}: Improving {min_score_category} (current score: {min_score}).")
            prompt = select_prompt(min_score_category)
            pyramid = improve_argument_pyramid(pyramid, documents, prompt, openai.api_key)

def select_prompt(score_category):
    if score_category == 'relevance':
        return prompt_relevance
    elif score_category == 'support':
        return prompt_support
    elif score_category == 'coherence':
        return prompt_coherence
    elif score_category == 'completeness':
        return prompt_completeness


successful_pyramid = generate_multiple_pyramids(question, threshold, num_pyramids)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Title: Best AI Stocks Of August 2024 – Forbes Advisor
URL: https://www.forbes.com/advisor/investing/best-ai-stocks/
Title: Artificial Intelligence Stocks: The 10 Best AI Companies | Investing ...
URL: https://money.usnews.com/investing/articles/artificial-intelligence-stocks-the-10-best-ai-companies
Title: Analysts Say These 13 AI Stocks Will Skyrocket in 2024 - Yahoo Finance
URL: https://finance.yahoo.com/news/analysts-13-ai-stocks-skyrocket-131844568.html
Title: 7 Best-Performing AI Stocks for August 2024 - NerdWallet
URL: https://www.nerdwallet.com/article/investing/ai-stocks-invest-in-artificial-intelligence
Title: 12 Best Artificial Intelligence (AI) Stocks To Buy For 2023 - Forbes
URL: https://www.forbes.com/sites/investor-hub/article/12-best-artificial-intelligence-ai-stocks-to-buy/
Title: Forbes 2024 AI 50 List - Top Artificial Intelligence Startups
URL: https://www.forbes.com/lists/ai50/
Title: The 5 Best AI Stocks to Buy in 2024 | The Motley Fool
URL: https://www.fool.com/inv