In [1]:
# !pip -q install langchain-groq
# !pip -q install -U langchain langgraph tavily-python
# !pip -q install langchain-cohere
# !pip -q install langchain-openai

# The goal

Create Learning material
1. Get the topic
2. Use the topic to create keywords for a search to research info needed for creating learning material
3. Write a draft material
4. Review the material
7. rewrite if needed

In [2]:
import os
from google.colab import userdata

os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')
os.environ["TAVILY_API_KEY"] = userdata.get('TAVILY_API_KEY')
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = userdata.get("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_PROJECT"] = "learning-assistant-llama"

In [3]:
from langchain_groq import ChatGroq

GROQ_LLM = ChatGroq(model="llama3-70b-8192")

In [4]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import PromptTemplate

from langchain_core.output_parsers import StrOutputParser
from langchain_core.output_parsers import JsonOutputParser

In [5]:
def write_markdown_file(content, filename):
  """Writes the given content as a markdown file to the local directory.

  Args:
    content: The string content to write to the file.
    filename: The filename to save the file as.
  """
  with open(f"{filename}.md", "w") as f:
    f.write(content)


## Basic Chains

1. Generate keywords/queries for research
2. Write draft material
3. Rewrite Router
3. Rewrite content

In [6]:
## Search keywords
search_keyword_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a master at working out the best keywords to search for in a web search to get the best info about the topic.

    given the TOPIC, Work out the best keywords that will find the best info for helping to write a learning material about the topic.

    Return a JSON with a single key 'keywords' with no more than 2 keywords and no premable or explaination.

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    TOPIC: {topic} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["topic"],
)

search_keyword_chain = search_keyword_prompt | GROQ_LLM | JsonOutputParser()

# print(search_keyword_chain.invoke({"topic": "LLM"}))

In [7]:
## Write Draft Email
draft_writer_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a master at creating detailed learning material for a given TOPIC based on the RESEARCH INFOFMATION.
    Write a detailed learning material.
    You never make up information that hasn't been provided in RESEARCH INFOFMATION.

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    TOPIC: {topic} \n
    RESEARCH INFORMATION: {research_info} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["topic", "research_info"],
)

draft_writer_chain = draft_writer_prompt | GROQ_LLM | StrOutputParser()

# content = """
# Algorithmic bias describes systematic and repeatable errors in a computer system that create "unfair" outcomes, such as "privileging" one category over another in ways different from the intended function of the algorithm.
# Bias can emerge from many factors, including but not limited to the design of the algorithm or the unintended or unanticipated use or decisions relating to the way data is coded, collected, selected or used to train the algorithm. For example, algorithmic bias has been observed in search engine results and social media platforms. This bias can have impacts ranging from inadvertent privacy violations to reinforcing social biases of race, gender, sexuality, and ethnicity. The study of algorithmic bias is most concerned with algorithms that reflect "systematic and unfair" discrimination. This bias has only recently been addressed in legal frameworks, such as the European Union's General Data Protection Regulation (2018) and the proposed Artificial Intelligence Act (2021).
# As algorithms expand their ability to organize society, politics, institutions, and behavior, sociologists have become concerned with the ways in which unanticipated output and manipulation of data can impact the physical world. Because algorithms are often considered to be neutral and unbiased, they can inaccurately project greater authority than human expertise (in part due to the psychological phenomenon of automation bias), and in some cases, reliance on algorithms can displace human responsibility for their outcomes. Bias can enter into algorithmic systems as a result of pre-existing cultural, social, or institutional expectations; by how features and labels are chosen; because of technical limitations of their design; or by being used in unanticipated contexts or by audiences who are not considered in the software's initial design.[2]
# Algorithmic bias has been cited in cases ranging from election outcomes to the spread of online hate speech. It has also arisen in criminal justice, healthcare, and hiring, compounding existing racial, socioeconomic, and gender biases. The relative inability of facial recognition technology to accurately identify darker-skinned faces has been linked to multiple wrongful arrests of black men, an issue stemming from imbalanced datasets. Problems in understanding, researching, and discovering algorithmic bias persist due to the proprietary nature of algorithms, which are typically treated as trade secrets. Even when full transparency is provided, the complexity of certain algorithms poses a barrier to understanding their functioning. Furthermore, algorithms may change, or respond to input or output in ways that cannot be anticipated or easily reproduced for analysis. In many cases, even within a single website or application, there is no single "algorithm" to examine, but a network of many interrelated programs and data inputs, even between users of the same service.
# """

# output = draft_writer_chain.invoke({"topic": "Algorithmic bias", "research_info": content})
# print(output)

In [8]:
## Rewrite Router
rewrite_router_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are an expert at evaluating the learning material and deciding if they need to be rewritten or not.
    Criteria for the proper learning material: It should have all the sections mentioned here.
    1. Introduction
    2. Main Content
    3. Conclusion
    4. Points to Remember
    5. Quiz/Test
    6. Assignment

    Read the given learning material line by line and verify if all the given sections are present.
    I have to rewrite the learning material even if any one of the above section is not present.
    Give a binary choice 'rewrite' (for needs to be rewritten) or 'no_rewrite' (for doesn't need to be rewritten) based on the the criteria.
    Return the a JSON with a single key 'router_decision' and no premable or explaination.
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    TOPIC: {topic} \n
    LEARNING MATERIAL: {learning_material} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["topic","learning_material"],
)

rewrite_router = rewrite_router_prompt | GROQ_LLM | JsonOutputParser()

# topic = 'Algorithmic bias'

# print(rewrite_router.invoke({"topic": topic, "learning_material":output}))

In [9]:
## Draft Email Analysis
from pprint import pprint
draft_analysis_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are the Quality Control Agent. Review the provided LEARNING MATERIAL.
    Proper learning material should have these sections.
    1. Introduction
    2. Main Content
    3. Conclusion
    4. Points to Remember
    5. Quiz/Test
    6. Assignment
    Check if the given learning material has all the given sections.

    Give feedback of how the learning material can be improved and what specific things can be added or change\
    to make the learning material more effective.

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    TOPIC: {topic} \n\n
    LEARNING MATERIAL: {draft_material} \n\n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["topic","draft_material"],
)

draft_analysis_chain = draft_analysis_prompt | GROQ_LLM | StrOutputParser()

# feedback = draft_analysis_chain.invoke({
#     "topic": "Algorthmic Bias",
#     "learning_material": output
# })

In [10]:
# Rewrite Email with Analysis
rewrite_email_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are the Final Learning Material agent. Read the DRAFT LEARNING MATERIAL FEEDBACK below from the QC Agent \
    and use it to rewrite and improve the DRAFT LEARNING MATERIAL to create a final one.

    You never make up or add information that hasn't been provided by the RESEARCH INFORMATION or in DRAFT LEARNING MATERIAL.

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    TOPIC: {topic} \n\n
    RESEARCH INFORMATION : {research_info} \n\n
    DRAFT LEARNING MATERIAL: {draft_material} \n\n
    DRAFT LEARNING MATERIAL FEEDBACK: {draft_material_feedback} \n\n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["topic",
                     "research_info",
                     "draft_material_feedback",
                     "draft_material",
                     ],
)

rewrite_chain = rewrite_email_prompt | GROQ_LLM | StrOutputParser()

# final_material = rewrite_chain.invoke({"topic": "Algorithmic Bias",
#                                  "draft_analysis":feedback,
#                                  "research_info":content,
#                                  "initial_material": output})

# print(final_material)

## Tools Setup

In [11]:
from langchain_community.tools.tavily_search import TavilySearchResults

web_search_tool = TavilySearchResults(k=1)

## State

In [12]:
from langchain.schema import Document
from langgraph.graph import END, StateGraph

from typing_extensions import TypedDict
from typing import List

### State

class GraphState(TypedDict):
    topic: str
    draft_material : str
    final_material : str
    research_info : List[str]
    num_steps : int
    draft_material_feedback : dict

In [13]:

def research_info_search(state):

    print("---RESEARCH INFO SEARCHING---")
    topic = state["topic"]
    num_steps = state['num_steps']
    num_steps += 1

    keywords = search_keyword_chain.invoke({"topic": topic})
    keywords = keywords['keywords']
    # keywords.append(topic)
    full_searches = []
    for keyword in keywords:
        print("keyword:", keyword)
        temp_docs = web_search_tool.invoke({"query": keyword})
        web_results = "\n".join([d["content"] for d in temp_docs])
        web_results = Document(page_content=web_results)
        if full_searches is not None:
            full_searches.append(web_results)
        else:
            full_searches = [web_results]
    print("RESEARCH INFO: ", full_searches)
    return {"research_info": full_searches, "num_steps":num_steps}

# HOW TO USE:
# my_graph_state = GraphState(
#     topic="Machine Learning Algorithms",
#     num_steps=0
# )

# r = research_info_search(my_graph_state)
# print(r)

In [14]:
def draft_material_writer(state):
    print("---DRAFT MATERIAL WRITER---")
    topic = state["topic"]
    research_info = state["research_info"]
    num_steps = state['num_steps']
    num_steps += 1

    draft_material = draft_writer_chain.invoke({"topic": topic,
                                     "research_info":research_info})
    print("DRAFT MATERIAL: ", draft_material)

    write_markdown_file(draft_material, "draft_material")

    return {"draft_material": draft_material, "num_steps":num_steps}

In [15]:
def analyze_draft_material(state):
    print("---DRAFT MATERIAL ANALYZER---")
    topic = state["topic"]
    draft_material = state["draft_material"]
    num_steps = state['num_steps']
    num_steps += 1

    draft_material_feedback = draft_analysis_chain.invoke({"topic": topic,
                                                "draft_material": draft_material})

    write_markdown_file(str(draft_material_feedback), "draft_material_feedback")
    return {"draft_material_feedback": draft_material_feedback, "num_steps":num_steps}

In [16]:
def rewrite_material(state):
    print("---REWRITE MATERIAL ---")
    topic = state["topic"]
    research_info = state["research_info"]
    draft_material_feedback = state["draft_material_feedback"]
    draft_material = state["draft_material"]
    num_steps = state['num_steps']
    num_steps += 1

    final_material = rewrite_chain.invoke({"topic": topic,
                                                "draft_material_feedback": draft_material_feedback,
                                                "research_info":research_info,
                                                "draft_material":draft_material
                                           })

    write_markdown_file(str(final_material), "final_material")
    return {"final_material": final_material, "num_steps":num_steps}

In [17]:
def no_rewrite(state):
    print("---NO REWRITE MATERIAL ---")
    ## Get the state
    draft_material = state["draft_material"]
    num_steps = state['num_steps']
    num_steps += 1

    write_markdown_file(str(draft_material), "final_material")
    return {"final_email": draft_material, "num_steps":num_steps}

In [18]:
def state_printer(state):
    """print the state"""
    print("---STATE PRINTER---")
    print(f"Topic: {state['topic']} \n" )
    print(f"Draft Material: {state['draft_material']} \n")
    print(f"Final Material: {state['final_material']} \n" )
    print(f"Research Info: {state['research_info']} \n" )
    print(f"Feedback: {state['draft_material_feedback']} \n")
    print(f"Num Steps: {state['num_steps']} \n")
    return

In [19]:
def route_to_rewrite(state):

    print("---ROUTE TO REWRITE---")
    topic = state["topic"]
    draft_material = state["draft_material"]

    router = rewrite_router.invoke({"topic": topic,
                                     "learning_material":draft_material})
    print("ROUTER DECISION: ", router['router_decision'])
    if router['router_decision'] == 'rewrite':
        print("---ROUTE TO ANALYSIS - REWRITE---")
        return "rewrite"
    elif router['router_decision'] == 'no_rewrite':
        print("---ROUTE TO FINAL MATERIAL---")
        return "no_rewrite"

In [20]:

workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("research_info_search", research_info_search) # web search
workflow.add_node("draft_material_writer", draft_material_writer)
workflow.add_node("state_printer", state_printer)
workflow.add_node("analyze_draft_material", analyze_draft_material)
workflow.add_node("rewrite_material", rewrite_material)
workflow.add_node("no_rewrite", no_rewrite)

In [21]:
workflow.set_entry_point("research_info_search")
workflow.add_edge("research_info_search", "draft_material_writer")


workflow.add_conditional_edges(
    "draft_material_writer",
    route_to_rewrite,
    {
        "rewrite": "analyze_draft_material",
        "no_rewrite": "no_rewrite",
    },
)
workflow.add_edge("analyze_draft_material", "rewrite_material")
workflow.add_edge("no_rewrite", "state_printer")
workflow.add_edge("rewrite_material", "state_printer")
workflow.add_edge("state_printer", END)

In [22]:
# Compile
app = workflow.compile()

In [23]:
# run the agent
inputs = {"topic": "Meta's LLAMA models","research_info": None, "num_steps":0}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}:")

---RESEARCH INFO SEARCHING---
keyword: LLaMA AI
keyword: Meta AI models
RESEARCH INFO:  [Document(page_content="In the coming months, we expect to introduce new capabilities, longer context windows, additional model sizes, and enhanced performance, and we'll share the Llama 3 research paper. Meta AI, built with Llama 3 technology, is now one of the world's leading AI assistants that can boost your intelligence and lighten your load—helping you ...\nA better assistant: Thanks to our latest advances with Meta Llama 3, we believe Meta AI is now the most intelligent AI assistant you can use for free - and it's available in more countries across our apps to help you plan dinner based on what's in your fridge, study for your test and so much more. More info: You can use Meta AI in feed ...\nLlama 3 is an accessible, open-source large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a found