# STORM (generating long article)

<img src="./assets/storm.png" height="600"> <img src="./assets/storm_framework.png" width="600"> 

In [2]:
# the prompts are used to generate different writer personas prompt for the given topic of interest
FIND_RELATED_TOPIC = """I'm writing a Wikipedia-like content for a topic mentioned below. Please identify and recommend some Wikipedia pages on closely related subjects. 
I'm looking for examples that provide insights into interesting aspects commonly associated with this topic, or examples that help me understand the typical content and structure included in Wikipedia pages for similar topics.
Please list the urls in separate lines. The topic of interest is: {topic}"""

GENERATE_WRITERS_PERSONA = """You need to select a group of Wikipedia editors who will work together to create a comprehensive article on the topic. Each of them represents a different perspective, role, or affiliation related to this topic. You can use other Wikipedia pages of related topics for inspiration. For each editor, add description of what they will focus on. 
Give your answer in the following format: 1. short summary of editor 1: description\n2. short summary of editor 2: description\n...
------------------------------
Wiki page outlines of related topics for inspiration: {examples}""" # examples: f'Title: {title}\nTable of Contents: {toc}' of wikipedia pages

DEFAULT_WRITER_PERSONA = """Basic fact writer: Basic fact writer focusing on broadly covering the basic facts about the topic."""

ASK_QUESTION = """You are an experienced Wikipedia writer and want to edit a specific page. Besides your identity as a Wikipedia writer, you have specific focus when researching the topic.
Now, you are chatting with an expert to get information. Ask good questions to get more useful information. 
When you have no more question to ask, say "Thank you so much for your help!" to end the conversation.
Please only ask a question at a time and don't ask what you have asked before. Your questions should be related to the topic you want to write.

Topic you are going to write: {topic}
Your persona besides being a Wikipedia writer: {persona}

"""
# Here is the conversation hsitory:\n {conversation}

In [3]:
# Topic expert will have conversations with writer

QUESTION_TO_QUERY="""You want to answer the question using Google search. What do you type in the search box?
Write the queries you will use in the following format:
- query 1
- query 2
...
- query n
Topic you are discussing about: {topic}
"""

ANSWER_QUESTION = """You are an expert who can use information effectively. You are chatting with a Wikipedia writer who wants to write a Wikipedia page on topic you know. 
You have gathered the related information and will now use the information to form a response.
Make your response as informative as possible and make sure every sentence is supported by the gathered information.
Topic you are discussing about: {topic}
Question: {question}
Gathered information: {search_result}
Now give your response. (Try to use as many different sources as possible and add do not hallucinate.)
"""

In [4]:
# generate the outline for the article
WRITE_OUTLINE = """Write an outline for a Wikipedia page.
Here is the format of your writing:
1. Use "#" Title" to indicate section title, "##" Title" to indicate subsection title, "###" Title" to indicate subsubsection title, and so on.
2. Do not include other information.
The topic you want to write: {topic}
Write the Wikipedia page outline:\n
"""

IMPROVE_WRITE_OUTLINE_FROM_CONVERSATION = """Improve an outline for a Wikipedia page. You already have a draft outline that covers the general information. Now you want to improve it based on the information learned from an information-seeking conversation to make it more informative.
Here is the format of your writing:
1. Use "#" Title" to indicate section title, "##" Title" to indicate subsection title, "###" Title" to indicate subsubsection title, and so on.
2. Do not include other information.
The topic you want to write: {topic}
Conversation history:\n {converation}
Current outline:\n {old_outline}
Write the Wikipedia page outline (Use "#" Title" to indicate section title, "##" Title" to indication subsection title, ...):\n
"""


In [6]:
from collections import OrderedDict, Counter
from typing import Optional, Union, Literal, Any, List

class DialogueTurn:
    def __init__(
            self,
            agent_utterance: str = None,
            user_utterance: str = None,
            search_queries: Optional[List[str]] = None,
            search_results: Optional[List[dict[str, Any]]] = None
    ):
        self.agent_utterance = agent_utterance
        self.user_utterance = user_utterance
        self.search_queries = search_queries
        self.search_results = search_results

    def log(self):
        """
        Returns a json object that contains all information inside `self`
        """

        return OrderedDict(
            {
                'agent_utterance': self.agent_utterance,
                'user_utterance': self.user_utterance,
                'search_queries': self.search_queries,
                'search_results': self.search_results,
            }
        )

In [7]:
# from sine.models.kimi import MoonshotWrapper
# model = MoonshotWrapper() # -> good valid url, and the wiki item looks good

# from sine.models.chatglm import GLM4Wrapper
# model = GLM4Wrapper() # -> give bad valid url

# from sine.models.qwen import QWENWrapper
# model = QWENWrapper() # -> every url will have bad ending with `>`

from sine.models.groq import GroqMixtralWrapper
model = GroqMixtralWrapper() # every url will have bad ending with `)`

topic = "How to get over the fear of being judged by others?"

def generate_related_topics(message):
    return model.chat(message)

message = [
    dict(role='system', content=FIND_RELATED_TOPIC),
    dict(role='user', content=f"The topic of interest is: {topic}"),
]

related_topics=generate_related_topics(message)
related_topics

'Here are some Wikipedia pages that you may find useful as examples for your Wikipedia-like content about "How to get over the fear of being judged by others?":\n\n1. <https://en.wikipedia.org/wiki/Social_anxiety>\n2. <https://en.wikipedia.org/wiki/Self-esteem>\n3. <https://en.wikipedia.org/wiki/Assertiveness>\n4. <https://en.wikipedia.org/wiki/Cognitive_behavioral_therapy>\n5. <https://en.wikipedia.org/wiki/Mindfulness>\n6. <https://en.wikipedia.org/wiki/Emotional_intelligence>\n7. <https://en.wikipedia.org/wiki/Vulnerability_(emotion)>\n8. <https://en.wikipedia.org/wiki/Public_speaking>\n9. <https://en.wikipedia.org/wiki/Shyness>\n10. <https://en.wikipedia.org/wiki/Self-help>\n\nThese pages cover various aspects related to the fear of being judged by others, such as social anxiety, self-esteem, assertiveness, mindfulness, emotional intelligence, vulnerability, public speaking, and shyness. By examining these pages, you can get an idea of the typical content and structure included in 

In [13]:
urls = [s[s.find('http'):].rstrip('>') for s in related_topics.split('\n')][:-2]
urls

[':',
 '',
 'https://en.wikipedia.org/wiki/Social_anxiety',
 'https://en.wikipedia.org/wiki/Self-esteem',
 'https://en.wikipedia.org/wiki/Assertiveness',
 'https://en.wikipedia.org/wiki/Cognitive_behavioral_therapy',
 'https://en.wikipedia.org/wiki/Mindfulness',
 'https://en.wikipedia.org/wiki/Emotional_intelligence',
 'https://en.wikipedia.org/wiki/Vulnerability_(emotion)',
 'https://en.wikipedia.org/wiki/Public_speaking',
 'https://en.wikipedia.org/wiki/Shyness',
 'https://en.wikipedia.org/wiki/Self-help']

In [11]:
import requests
from bs4 import BeautifulSoup

def get_wiki_page_title_and_toc(url):
    """Get the main title and table of contents from an url of a Wikipedia page."""

    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Get the main title from the first h1 tag
    main_title = soup.find('h1').text.replace('[edit]', '').strip().replace('\xa0', ' ')

    toc = ""
    levels = []
    excluded_sections = {'Contents', 'See also', 'Notes', 'References', 'External links'}

    # Start processing from h2 to exclude the main title from TOC
    for header in soup.find_all(['h2', 'h3', "h4", "h5", "h6"]):
        level = int(header.name[1])  # Extract the numeric part of the header tag (e.g., '2' from 'h2')
        section_title = header.text.replace('[edit]', '').strip().replace('\xa0', ' ')
        if section_title in excluded_sections:
            continue

        while levels and level <= levels[-1]:
            levels.pop()
        levels.append(level)

        indentation = "  " * (len(levels) - 1)
        toc += f"{indentation}{section_title}\n"

    return main_title, toc.strip()

In [14]:
import logging
examples = []
for url in urls:
    try:
        title, toc = get_wiki_page_title_and_toc(url)
        examples.append(f'Title: {title}\nTable of Contents: {toc}')
    except Exception as e:
        logging.error(f'Error occurs when processing {url}: {e}')
        continue
examples

ERROR:root:Error occurs when processing :: No connection adapters were found for ':'
ERROR:root:Error occurs when processing : Invalid URL '': No scheme supplied. Perhaps you meant https://?


['Title: Social anxiety\nTable of Contents: Disorder\nStages\n  Child development\n  Adults\nSigns and symptoms\nAttention bias\nTriggers and behaviors\nMeasures and treatment\nDevelopment and evolutionary theories\n  Social development in childhood\n  Sensory processing sensitivity\n  Biological adaptation to living in small groups\n  Exclusion theory',
 'Title: Self-esteem\nTable of Contents: History\n  In public policy\nTheories\nMeasurement\nDevelopment across lifespan\n  Shame\n  Real self, ideal self, and dreaded self\nTypes\n  High\n  Secure vs. defensive\n  Implicit and explicit\n  Narcissism and threatened egotism\n  Low\n    Treatments\n  The three states\n    Shattered\n    Vulnerable\n    Strong\n  Contingent vs. non-contingent\n  Domain-specific self-esteem\nImportance\n  Correlations\n  Mental health\nNeuroscience\nCriticism and controversy\n  False attempts\n  As narcissism\nFurther reading',
 'Title: Assertiveness\nTable of Contents: Training\nCommunication\nCharacteris

In [18]:
print(examples[4])

Title: Mindfulness
Table of Contents: Practice
  Watching the breath, body-scan and other techniques
  Timings
  In Buddhist context; moral precepts
Translations
  Sati and smṛti
  Translation
  Alternative translations
Definitions
  Psychology
    Trait, state and practice
      Trait-like constructs
      State-like phenomenon
    Mindfulness-practice
  Definitions arising in modern teaching of meditation
  Buddhism
  Other uses
Models and frameworks for mindfulness practices
  Two-component model
  The five-aggregate model
  Cultivating self-knowledge and wisdom
Historical development
  Buddhism
    Early Buddhism
    Zazen
    Contemporary Vipassana-meditation
      Anapanasati, satipaṭṭhāna, and vipassana
      Samprajaña, apramāda and atappa
    Monitoring mental processes
  Stoicism
  Christianity
  Transcendentalism
  Jon Kabat-Zinn and MBSR
  Popularization, "mindfulness movement"
Applications
  Therapy programs
    Mindfulness-based stress reduction
    Mindfulness-based cogn

In [16]:
# question_asker_model = MoonshotWrapper()
# question_asker_model = GroqMixtralWrapper()

# from sine.models.groq import GroqLlama70BWrapper
# question_asker_model = GroqMixtralWrapper()

from sine.models.chatglm import GLM4Wrapper
question_asker_model = GLM4Wrapper()

def gen_persona(topic, examples):
    message = [
        dict(role='system', content=GENERATE_WRITERS_PERSONA.format(topic=topic, examples="\n".join(examples))),
        dict(role='user', content="Please give the editors."),
    ]
    return question_asker_model.chat(message)

personas = gen_persona(topic, examples)
personas

'Certainly! Here\'s a diverse group of Wikipedia editors who would work together to create a comprehensive article on the topic of social anxiety, drawing inspiration from related Wikipedia pages:\n\n1. **Psychologist Editor**: Focuses on the clinical aspects, signs and symptoms, and treatment options for social anxiety, drawing from the "Social anxiety" and "Cognitive behavioral therapy" pages.\n\n   - Short Summary: A licensed psychologist with experience in treating anxiety disorders.\n   - Description: This editor will delve into the psychological underpinnings of social anxiety, explain diagnostic criteria, and discuss evidence-based treatment approaches, including cognitive behavioral therapy and mindfulness.\n\n2. **Developmental Editor**: Focuses on the developmental aspects of social anxiety, including child development and evolutionary theories, informed by the "Child development" and "Social development in childhood" sections.\n\n   - Short Summary: A developmental psycholog

In [17]:
import re

personass = []
for s in personas.split('\n'):
    match = re.search(r'\d+\.\s*(.*)', s)
    if match:
        personass.append(match.group(1))

personass

['**Psychologist Editor**: Focuses on the clinical aspects, signs and symptoms, and treatment options for social anxiety, drawing from the "Social anxiety" and "Cognitive behavioral therapy" pages.',
 '**Developmental Editor**: Focuses on the developmental aspects of social anxiety, including child development and evolutionary theories, informed by the "Child development" and "Social development in childhood" sections.',
 '**Sociocultural Editor**: Examines the impact of culture on social anxiety, drawing from the "Different cultural views" section of the "Shyness" page and the cross-cultural aspects of "Emotional intelligence."',
 '**Neuroscience Editor**: Focuses on the neurological and biological aspects of social anxiety, informed by the "Neuroscience" sections of the "Self-esteem" and "Mindfulness" pages.',
 '**Patient Advocate Editor**: Provides a personal perspective on living with social anxiety, drawing from the "Vulnerability (emotion)" page and their own experiences.',
 '**C

In [19]:
import requests
import os
api_key = os.environ.get('SERPER_API_KEY')

def google_search(
            api_key,
            search_term: str,
            search_type = 'search',
            **kwargs):
    headers = {
        'X-API-KEY': api_key,
        'Content-Type': 'application/json',
    }
    print('[google search] searching: ', search_term)
    params = {
        'q': search_term,
        **{
            key: value
            for key, value in kwargs.items() if value is not None
        },
    }
    try:
        response = requests.post(
            f'https://google.serper.dev/{search_type}',
            headers=headers,
            params=params,
            timeout=5)
    except Exception as e:
        return -1, str(e)
    return response.status_code, response.json()

def parse_results(results: dict, top_k=2) :
        snippets = []

        if results.get('answerBox'):
            answer_box = results.get('answerBox', {})
            if answer_box.get('answer'):
                return [answer_box.get('answer')]
            elif answer_box.get('snippet'):
                return [answer_box.get('snippet').replace('\n', ' ')]
            elif answer_box.get('snippetHighlighted'):
                return answer_box.get('snippetHighlighted')

        if results.get('knowledgeGraph'):
            kg = results.get('knowledgeGraph', {})
            title = kg.get('title')
            entity_type = kg.get('type')
            if entity_type:
                snippets.append(f'{title}: {entity_type}.')
            description = kg.get('description')
            if description:
                snippets.append(description)
            for attribute, value in kg.get('attributes', {}).items():
                snippets.append(f'{title} {attribute}: {value}.')

        for result in results['organic'][:top_k]:
            if 'snippet' in result:
                snippets.append(result['snippet'])
            for attribute, value in result.get('attributes', {}).items():
                snippets.append(f'{attribute}: {value}.')

        if len(snippets) == 0:
            return ['No good Google Search Result was found']
        return snippets

In [None]:
code, result = google_search(api_key, "crypto")
from pprint import pprint
pprint(result, indent=2)

In [20]:
topic

'How to get over the fear of being judged by others?'

In [None]:
from concurrent.futures import as_completed
import concurrent.futures
from sine.models.chatglm import GLM4Wrapper
conv_sim_model = GLM4Wrapper()

max_perspective = 8
considered_personas = [DEFAULT_WRITER_PERSONA] + personass[:max_perspective]

def remove_citations(s):
    """Remove citations from a string."""

    return re.sub(r'\[\d+\]', '', s)

def wiki_writer(topic, persona, dialogue_turns):
    conv = []
    for turn in dialogue_turns[:-4]:
        conv.append(f'You: {turn.user_utterance}\nExpert: Omit the answer here due to space limit.')
    for turn in dialogue_turns[-4:]:
        conv.append(f'You: {turn.user_utterance}\nExpert: {remove_citations(turn.agent_utterance)}')
    conv = '\n'.join(conv)
    conv = conv.strip() or 'N/A'

    message = [
        dict(role='user', content=ASK_QUESTION.format(topic=topic, persona=persona, conversation=conv)),
    ]

    return question_asker_model.chat(message)


def question2query(topic, question):
    message = [
        dict(role='system', content=QUESTION_TO_QUERY.format(topic=topic)),
        dict(role='user', content=f"Question you want to answer: {question}"),
    ]
    return conv_sim_model.chat(message)


def answer_question(topic, question, info):
    message = [
        dict(role='user', content=ANSWER_QUESTION.format(topic=topic, question=question, search_result=info)),
    ]
    return conv_sim_model.chat(message)


def topic_expert(topic, question):
    raw_queries = question2query(topic, question)
    matches = re.findall(r'query \d+: (.*)', raw_queries)
    queries = [match.strip() for match in matches]

    collected_results = []
    for query in queries:
        status_code, results = google_search(api_key, query)
        if status_code == 200:
            collected_results.append(parse_results(results))
        else:
            print(f"Failed to get search results for query: {query}")

    if len(collected_results) > 0:
        info = ''
        for n, r in enumerate(collected_results):
            info += f'[{n + 1}]: {r[0]}'
            info += '\n\n'

        answer = answer_question(topic, question, info)
    
    return answer, queries, collected_results
        
import time

def run_conversation(topic, persona, max_turn=4):
    print('Perspective: ', persona)
    dlg_history: List[DialogueTurn] = []
    for _ in range(max_turn):
        user_utterance = wiki_writer(topic, persona, dlg_history)
        print('user_utterance: ', user_utterance)
        if user_utterance == '':
            logging.error('Simulated Wikipedia writer utterance is empty.')
            break
        if user_utterance.startswith('Thank you so much for your help!'):
            break
        
        time.sleep(5)

        expert_answer, queries, searched_results = topic_expert(topic, user_utterance)
        print('expert_answer: ', expert_answer)

        dlg_turn = DialogueTurn(
            agent_utterance=expert_answer,
            user_utterance=user_utterance,
            search_queries=queries,
            search_results=searched_results
        )

        dlg_history.append(dlg_turn)

        time.sleep(10)

    return dlg_history

# run_conversation(topic, considered_personas[0])



In [27]:
# !important generate conversations 
import json
def handle_non_serializable(obj):
    return "non-serializable contents"  # mark the non-serializable part
def dump_json(obj, file_name, encoding="utf-8"):
    with open(file_name, 'w', encoding=encoding) as fw:
        json.dump(obj, fw, default=handle_non_serializable)
def remove_uncompleted_sentences_with_citations(text):
    """Remove uncompleted sentences with citations from a string.

    The expected format of citation is '[1]', '[2]', etc.
    """

    # Convert citations like [1, 2, 3] to [1][2][3].
    def replace_with_individual_brackets(match):
        numbers = match.group(1).split(', ')
        return ' '.join(f'[{n}]' for n in numbers)

def clean_up_citation(dlg_history):
    for turn in dlg_history:
        turn.agent_utterance = turn.agent_utterance[:turn.agent_utterance.find('References:')]
        turn.agent_utterance = turn.agent_utterance[:turn.agent_utterance.find('Sources:')]
        turn.agent_utterance = turn.agent_utterance.replace('Answer:', '').strip()
        try:
            max_ref_num = max([int(x) for x in re.findall(r'\[(\d+)\]', turn.agent_utterance)])
        except Exception as e:
            max_ref_num = 0
        if max_ref_num > len(turn.search_results):
            for i in range(len(turn.search_results), max_ref_num + 1):
                turn.agent_utterance = turn.agent_utterance.replace(f'[{i}]', '')
        turn.agent_utterance = remove_uncompleted_sentences_with_citations(turn.agent_utterance)

    return dlg_history

conversations = []
with concurrent.futures.ThreadPoolExecutor(max_workers=len(considered_personas)) as executor:
    future_to_persona = {executor.submit(run_conversation, topic=topic, persona=persona): persona for persona in
                            considered_personas}

    for future in as_completed(future_to_persona):
        persona_conv = future.result()
        conversations.append(clean_up_citation(persona_conv))

conversation_log = []
for persona, persona_conv in zip(considered_personas, conversations):
    conversation_log.append(
        {
            'perspective': persona,
            'dlg_turns': [turn.log() for turn in persona_conv]
        }
    )
    dump_json(conversation_log, 'conversation_log.json')
    

user_utterance:  What are the primary cultural factors that contribute to the fear of being judged by others?
user_utterance:  What are the primary psychological reasons that underpin the fear of being judged by others?
user_utterance:  Can you provide insights into the historical origins of the fear of being judged by others, and how it has been perceived across different cultures and time periods?
user_utterance:  Can you define what fear of being judged by others, often referred to as "social anxiety," is and explain its common manifestations in social situations?
user_utterance:  What are the key neural mechanisms involved in the fear of being judged by others, often referred to as social anxiety?
user_utterance:  What are the primary psychological barriers that contribute to the fear of being judged by others, especially in the context of public policy decision-making and workplace environments?
user_utterance:  What are the key developmental factors that contribute to the fear of

In [None]:
# load converation
import json
def load_json(file_name, encoding="utf-8"):
    with open(file_name, 'r', encoding=encoding) as fr:
        return json.load(fr)

conversation_log = load_json(file_name='conversation_log.json')
conversations = [[DialogueTurn(**turn) for turn in item['dlg_turns']] for item in conversation_log]



In [None]:
# generate the outline for the article
WRITE_DRAFT_OUTLINE = """Write an outline for a Wikipedia page.
Here is the format of your writing:
1. Use "#" Title" to indicate section title, "##" Title" to indicate subsection title, "###" Title" to indicate subsubsection title, and so on.
2. Do not include other information.
"""

IMPROVE_WRITE_OUTLINE_FROM_CONVERSATION = """Improve an outline for a Wikipedia page. You already have a draft outline that covers the general information. Now you want to improve it based on the information learned from an information-seeking conversation to make it more informative.
Here is the format of your writing:
1. Use "#" Title" to indicate section title, "##" Title" to indicate subsection title, "###" Title" to indicate subsubsection title, and so on.
2. Do not include other information.
The topic you want to write: {topic}
Conversation history:\n {conversation}
Current outline:\n {old_outline}
Write the Wikipedia page outline (Use "#" Title" to indicate section title, "##" Title" to indication subsection title, ...):\n
"""

outline_model = GLM4Wrapper()

def limit_word_count_preserve_newline(input_string, max_word_count):
    """Limit the word count of a string while preserving complete lines."""

    word_count = 0
    limited_string = ''

    for word in input_string.split('\n'):
        line_words = word.split()
        for lw in line_words:
            if word_count < max_word_count:
                limited_string += lw + ' '
                word_count += 1
            else:
                break
        if word_count >= max_word_count:
            break
        limited_string = limited_string.strip() + '\n'

    return limited_string.strip()

def clean_up_outline(outline, topic=""):
    output_lines = []
    current_level = 0  # To track the current section level

    for line in outline.split('\n'):
        stripped_line = line.strip()

        if topic != "" and f"# {topic.lower()}" in stripped_line.lower():
            output_lines = []

        # Check if the line is a section header
        if stripped_line.startswith('#'):
            current_level = stripped_line.count('#')
            output_lines.append(stripped_line)
        # Check if the line is a bullet point
        elif stripped_line.startswith('-'):
            subsection_header = '#' * (current_level + 1) + ' ' + stripped_line[1:].strip()
            output_lines.append(subsection_header)

    outline = '\n'.join(output_lines)

    # Remove references.
    outline = re.sub(r"#[#]? See also.*?(?=##|$)", '', outline, flags=re.DOTALL)
    outline = re.sub(r"#[#]? See Also.*?(?=##|$)", '', outline, flags=re.DOTALL)
    outline = re.sub(r"#[#]? Notes.*?(?=##|$)", '', outline, flags=re.DOTALL)
    outline = re.sub(r"#[#]? References.*?(?=##|$)", '', outline, flags=re.DOTALL)
    outline = re.sub(r"#[#]? External links.*?(?=##|$)", '', outline, flags=re.DOTALL)
    outline = re.sub(r"#[#]? External Links.*?(?=##|$)", '', outline, flags=re.DOTALL)
    outline = re.sub(r"#[#]? Bibliography.*?(?=##|$)", '', outline, flags=re.DOTALL)
    outline = re.sub(r"#[#]? Further reading*?(?=##|$)", '', outline, flags=re.DOTALL)
    outline = re.sub(r"#[#]? Further Reading*?(?=##|$)", '', outline, flags=re.DOTALL)
    outline = re.sub(r"#[#]? Summary.*?(?=##|$)", '', outline, flags=re.DOTALL)
    outline = re.sub(r"#[#]? Appendices.*?(?=##|$)", '', outline, flags=re.DOTALL)
    outline = re.sub(r"#[#]? Appendix.*?(?=##|$)", '', outline, flags=re.DOTALL)

    return outline

def write_str(s, path):
    with open(path, 'w') as f:
        f.write(s)

def write_draft_outline(topic):
    message = [
        dict(role='system', content=WRITE_DRAFT_OUTLINE),
        dict(role='user', content=f'The topic you want to write: {topic}\n Write the Wikipedia page outline:\n')
    ]
    return outline_model.chat(message)

draft_outline = clean_up_outline(write_draft_outline(topic))
write_str(draft_outline, 'direct_gen_outline.txt')

def improve_outline_from_conversation(topic, conversations, old_outline):
    
    trimmed_dlg_history = []
    for turn in conversations:
        if turn.agent_utterance is None or turn.user_utterance is None:
            continue
        if 'topic you' in turn.agent_utterance.lower() or 'topic you' in turn.user_utterance.lower():
            continue
        trimmed_dlg_history.append(turn)
    conv = '\n'.join([f'Wikipedia Writer: {turn.user_utterance}\nExpert: {turn.agent_utterance}' for turn in
                        trimmed_dlg_history])
    conv = remove_citations(conv)
    conv = limit_word_count_preserve_newline(conv, 5000)

    message = [
        dict(role='system', content=IMPROVE_WRITE_OUTLINE_FROM_CONVERSATION.format(topic=topic, conversation=conv, old_outline=old_outline)),
        dict(role='user', content='Write the Wikipedia page outline:\n')
    ]
    return outline_model.chat(message)

improved_outline = clean_up_outline(improve_outline_from_conversation(topic, sum(conversations, []), draft_outline))
write_str(improved_outline, 'storm_gen_outline.txt')

print(improved_outline)

In [125]:
def process_table_of_contents(toc):
    """Convert a table of contents into a tree structure.

    The table of contents is a string with each line representing a heading.
    "#" Title"  indicates section title, "##" Title" to indication subsection title, "###" Title" to indicate subsubsection title, and so on.
    """
    lines = toc.split('\n')

    root = {}
    path = [(root, -1)]

    for line in lines:
        line = line.strip()
        if not line.startswith('#'):
            continue

        # Count only the leading '#' symbols
        level = 0
        for char in line:
            if char == '#':
                level += 1
            else:
                break

        heading = line[level:].strip()
        if len(heading) == 0:
            continue
        while path and path[-1][1] >= level:
            path.pop()

        # Add the new heading
        if path:
            current_dict = path[-1][0]
            current_dict[heading] = {}
            path.append((current_dict[heading], level))

    return root

outline_tree = process_table_of_contents(improved_outline)
outline_tree = list(outline_tree.values())[0]

for sec_title in outline_tree:
    print(sec_title)


Introduction
Understanding the Fear
The Psychology of Judgment
Strategies for Overcoming the Fear
Seeking Professional Help
Real-life Experiences and Success Stories
Prevention and Maintenance
Addressing Judgment in Different Contexts
External Resources and References
Conclusion
Additional subsections to consider


In [127]:
# build 
def get_search_results(conversations):
    info = []
    for conv in conversations:
        for turn in conv:
            search_results = turn.search_results
            for res in search_results:              
                if len(res) == 1:
                    if res[0] not in info:
                        info.append(res[0])
                elif len(turn.search_results) > 1:
                    s = ''
                    for i in range(len(res)):
                        s += res[i] + ' '
                    if s not in info:
                        info.append(s)
    return info


info = get_search_results(conversations)

from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np


class SemanticsSearch:
    def __init__(self, snippets):
        self.encoder = SentenceTransformer('paraphrase-MiniLM-L6-v2')
        self.collected_snippets = snippets

    def run_encoding_snippets(self):
        self.encoded_snippets = self.encoder.encode(self.collected_snippets, show_progress_bar=False)

    def search(self, queries, top_k=3):
        selected_snippets = []
        if type(queries) is str:
            queries = [queries]
        for query in queries:
            encoded_query = self.encoder.encode(query, show_progress_bar=False)
            sim = cosine_similarity([encoded_query], self.encoded_snippets)[0]
            sorted_indices = np.argsort(sim)
            for i in sorted_indices[-top_k:][::-1]:
                selected_snippets.append(self.collected_snippets[i])
        
        return selected_snippets

sem_search = SemanticsSearch(info)
sem_search.run_encoding_snippets()
# pprint(sem_search.search("Societal and cultural factors contributing to the fear", top_k=5))
print(info)


['Social anxiety disorder is an intense, persistent fear of being watched and judged by others.', 'Social anxiety disorder: Also called: SAD and social phobia. A chronic mental health condition in which social interactions cause irrational anxiety.Treatable by a medical professionalRequires a medical diagnosisLab tests or imaging not requiredChronic: can last for years or be lifelongFor people with social anxiety disorder, everyday social interactions cause irrational anxiety, fear, self-consciousness, and embarrassment.Symptoms may include excess fear of situations in which one may be judged, worry about embarrassment or humiliation, or concern about offending someone.Talk therapy and antidepressants can help increase confidence and improve ability to interact with others. Social anxiety disorder Common: More than 200,000 US cases per year. In contrast to everyday nervousness, social anxiety disorder includes fear, anxiety and avoidance that interfere with relationships, daily ... Soc

In [137]:
# Generate a curated article for the topic based on the outline and the collected references **section-by-section**
WRITE_SECTION = """Write a Wikipedia section based on the collected information.
Here is the format of your writing:
1. Use "#" Title" to indicate section title, "##" Title" to indicate subsection title, "###" Title" to indicate subsubsection title, and so on.
2. Use [1], [2], ..., [n] in line (for example, "The capital of the United States is Washington, D.C.[1][3]."). You DO NOT need to include a References or Sources section to list the sources at the end.
The collected information:\n{info}
The topic of the page: {topic}
The section you need to write: {section}
Write the section with proper inline citations (Start your writing with # section title. Don't include the page tile or try to write other sections):\n
"""

from sine.models.chatglm import GLM4Wrapper
section_writer = GLM4Wrapper()

def write_section(topic, section_title, searched_results):
    # search the information
    search_qs = [section_title]
    selected_snippets = sem_search.search(search_qs, top_k=5)

    info = ''
    for n, r in enumerate(selected_snippets):
        info += f'[{n + 1}] ' + '\n'.join([r])
        info += '\n\n'

    # print(WRITE_SECTION.format(info=info, topic=topic, section=section_title))
    message = [
        dict(role='user', content=WRITE_SECTION.format(info=info, topic=topic, section=section_title)),
    ]
    return outline_model.chat(message)

article = []
for sec_title in outline_tree:
    print('Writing ', sec_title)
    section_content = write_section(topic, sec_title, info)
    print(section_content)
    article.append(section_content)

pprint(article)


Writing  Introduction
# Introduction

The fear of being judged by others is a common psychological phenomenon that can significantly impact an individual's social interactions and well-being. This fear, often referred to as fear of negative evaluation (FNE) or fear of failure, is defined as "apprehension about others' evaluations, distress over negative evaluations by others, and the expectation that others would evaluate one negatively"[5]. Understanding the underlying mechanisms of this fear and exploring effective strategies to overcome it are essential for enhancing one's social life and self-esteem.

The study of culture and decision-making highlights the variations in how and why people from different cultures may decide differently, which can influence their social judgements and fears of being judged[1]. For instance, certain cultures may place a higher emphasis on group harmony and social approval, leading to increased anxiety about being judged by others. Exploring these cult

In [139]:
article_str = ''
for sec in article:
    article_str += sec + '\n'

print(article_str)

# Introduction

The fear of being judged by others is a common psychological phenomenon that can significantly impact an individual's social interactions and well-being. This fear, often referred to as fear of negative evaluation (FNE) or fear of failure, is defined as "apprehension about others' evaluations, distress over negative evaluations by others, and the expectation that others would evaluate one negatively"[5]. Understanding the underlying mechanisms of this fear and exploring effective strategies to overcome it are essential for enhancing one's social life and self-esteem.

The study of culture and decision-making highlights the variations in how and why people from different cultures may decide differently, which can influence their social judgements and fears of being judged[1]. For instance, certain cultures may place a higher emphasis on group harmony and social approval, leading to increased anxiety about being judged by others. Exploring these cultural differences can p