In [90]:
!pip install --quiet langchain openai wikipedia python-dotenv tiktoken

In [70]:
import os
from langchain.docstore.wikipedia import Wikipedia
from langchain.llms import OpenAI
from langchain.agents import initialize_agent, Tool, AgentExecutor
from langchain.agents.react.base import DocstoreExplorer
from dotenv import load_dotenv
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.agents import load_tools
from langchain import FewShotPromptTemplate
from langchain.tools import WikipediaQueryRun
from langchain.chat_models import ChatOpenAI
from langchain_community.utilities import WikipediaAPIWrapper
import tiktoken

In [48]:
import os

load_dotenv()

#Loading models
gpt3 = OpenAI(api_key = os.environ["OPENAI_API_KEY"], model="gpt-3.5-turbo-instruct")
gpt4 = ChatOpenAI(api_key = os.environ["OPENAI_API_KEY"], model="gpt-4")

# Wikipedia search and GPT as Summariser


In [63]:
#Making wikipedia object which takes a topic and returns wikipedia data. return doc_content_chars_max characters.
wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(doc_content_chars_max = 2000))

In [16]:
wikipedia.run("HUNTER X HUNTER")

'Page: Hunter × Hunter\nSummary: Hunter × Hunter (stylized as HUNTER×HUNTER and pronounced "hunter hunter") is a Japanese manga series written and illustrated by Yoshihiro Togashi. It has been serialized in Shueisha\'s shōnen manga magazine Weekly Shōnen Jump since March 1998, although the manga has frequently gone on extended hiatuses since 2006. Its chapters have been collected in 37 tankōbon volumes as of November 2022. The story focuses on a young boy named Gon Freecss who discovers that his father, who left him at a young age, is actually a world-renowned Hunter, a licensed professional who specializes in fantastical pursuits such as locating rare or unidentified animal species, treasure hunting, surveying unexplored enclaves, or hunting down lawless individuals. Gon departs on a journey to become a Hunter and eventually find his father. Along the way, Gon meets various other Hunters and encounters the paranormal.\nHunter × Hunter was adapted into a 62-episode anime television ser

In [66]:
#Creating a template giving the task, example of output format, and what the input will look like
template = """You are a assitant great at summarising stuff. 


            You will be given a Wikipedia snippet about a subject and also the subject separately. 
            Your task is to summarise it into 3 short sentences revloving around the subject. Do not have information in the summary which is not related to subject. Only use the knoweldge in the context and not add anything.
            Do not return anything except the 3 sentances and do not return more than 3 sentences. The sentences should be separated by full stops '.' and not NEW LINE CHARACTERS. No bullet points too.
            
            Here are some examples of what output should look like.

            Summary: 'Sun is a part of Solar System. Earth revolves around the sun. Sun will covered by the moon on April 8, 2024.'


            Subject: {subject}
            
            Context: {context}

            Summary: """

In [67]:
#Creates a PromptTemplateObject through which we can pass required subject and the context
prompt_template = PromptTemplate(template=template, input_variables=["subject", "context"])

In [43]:
#Example of how a complete prompt looks like
print(prompt_template.format(subject = "Hunter X Hunter", context = wikipedia.run("HUNTER X HUNTER")))

You are a assitant great at summarising stuff. 

            You will be given a Wikipedia snippet about a subject and also the subject separately. 
            Your task is to summarise it into 3 short sentences. Only use the knoweldge in the context and not add anything.
            Do not return anything except the 3 sentances. The sentences should be separated by full stops '.' and not NEW LINE CHARACTERS. No bullet points too.
            
            Here are some examples of what output should look like.

            Summary: 'Sun is a part of Solar System. Earth revolves around the sun. Sun will covered by the moon on April 8, 2024.'


            Subject: Hunter X Hunter
            
            Context: Page: Hunter × Hunter
Summary: Hunter × Hunter (stylized as HUNTER×HUNTER and pronounced "hunter hunter") is a Japanese manga series written and illustrated by Yoshihiro Togashi. It has been serialized in Shueisha's shōnen manga magazine Weekly Shōnen Jump since March 1998, al

In [76]:
#GPT-4 output using gpt4.invoke
print(gpt4.invoke(prompt_template.format(subject = "Charlie Hebdo", context = wikipedia.run("Charlie Hebdo"))))

content="'Charlie Hebdo is a French satirical weekly magazine known for its anti-racist, secular, and libertarian stances. The magazine has been targeted in three terrorist attacks in 2011, 2015, and 2020, in response to its controversial cartoons depicting Muhammad. Charlie Hebdo first appeared in 1970, was briefly ceased in 1981, and resurrected in 1992, with Gérard Biard currently serving as the editor-in-chief.'"


In [68]:
#gpt 3 output
print(gpt3(prompt_template.format(subject = "File Explorer", context = wikipedia.run("File Explorer"))))


Cabinet is a file format used in Microsoft Windows for archiving and compressing data. It is recognized by its .cab extension and magic number MSCF. ES File Explorer is a popular file manager application for Android devices, known for its cloud storage integration and root browser feature.


In [73]:
#Used to check prompt length
enc = tiktoken.encoding_for_model("gpt-3.5-turbo-instruct")

len(enc.encode(prompt_template.format(subject = "File Explorer", context = wikipedia.run("File Explorer"))))

'You are a assitant great at summarising stuff. \n\n            You will be given a Wikipedia snippet about a subject and also the subject separately. \n            Your task is to summarise it into 3 short sentences revloving around the subject. Do not have information in the summary which is not related to subject. Only use the knoweldge in the context and not add anything.\n            Do not return anything except the 3 sentances and do not return more than 3 sentences. The sentences should be separated by full stops \'.\' and not NEW LINE CHARACTERS. No bullet points too.\n            \n            Here are some examples of what output should look like.\n\n            Summary: \'Sun is a part of Solar System. Earth revolves around the sun. Sun will covered by the moon on April 8, 2024.\'\n\n\n            Subject: File Explorer\n            \n            Context: Page: File Explorer\nSummary: File Explorer, previously known as Windows Explorer, is a file manager application and def

# Few Shot Prompt Temlate for GPT context generation

In [82]:
#How the examples should look like
example_template = """
"subject": {subj},
"context": {contex}"""

In [83]:
#Actual examples that should be passed to model, should match {__} in the example template
examples = [{
    "subj": "File Explorer",
    "contex": 'File Explorer, previously known as Windows Explorer, is a file manager application in Microsoft Windows operating systems. The application allows for a graphical user interface for accessing file systems and other user interface elements. It was renamed from "Windows Explorer" to "File Explorer" in Windows 8.'
},
{"subj":"Charlie Hebdo",
 "contex": "Charlie Hebdo is a French satirical weekly magazine known for its anti-racist, secular, and libertarian stances. The magazine has been targeted in three terrorist attacks in 2011, 2015, and 2020, in response to its controversial cartoons depicting Muhammad. Charlie Hebdo first appeared in 1970, was briefly ceased in 1981, and resurrected in 1992, with Gérard Biard currently serving as the editor-in-chief."},
 {
     "subj": "Andrew Gilligan",
     "contex": "Andrew Paul Gilligan is a British policy adviser and former transport adviser to Boris Johnson. He has served as a senior correspondent of The Sunday Times and head of the Capital City Foundation at Policy Exchange. Gilligan is widely known for his 2003 report on BBC Radio 4's Today programme about the British government's briefing paper on Iraq and weapons of mass destruction."
 }]

In [84]:
#Making a example prompt template
example_prompt = PromptTemplate(input_variables=["subject", "context"], template=example_template)

#What should go before examples, i.e. Task
prefix = """You are a knowledge book which can tell information on things asked. 
Your task is, given a subject you have to return 3 facts about the subject as context about the subject. You do not return anything else except the context. The sentences should be full stop separated as shown in the examples below.
"""

#What should go after examples, i.e. Input
suffix = """subject: {subject}
context: """

In [85]:
#Mixing everything together
few_shot_prompt_template = FewShotPromptTemplate(examples = examples,
                                       example_prompt=example_prompt,
                                       prefix = prefix,
                                       suffix = suffix,
                                       input_variables = ["subject"],
                                       example_separator = "\n\n"
                                         )

In [86]:
#Example of a full prompt
subject = "Hunter X Hunter"

print(few_shot_prompt_template.format(subject=subject))

You are a knowledge book which can tell information on things asked. 
Your task is, given a subject you have to return 3 facts about the subject as context about the subject. You do not return anything else except the context. The sentences should be full stop separated as shown in the examples below.



"subject": File Explorer,
"context": File Explorer, previously known as Windows Explorer, is a file manager application in Microsoft Windows operating systems. The application allows for a graphical user interface for accessing file systems and other user interface elements. It was renamed from "Windows Explorer" to "File Explorer" in Windows 8.


"subject": Charlie Hebdo,
"context": Charlie Hebdo is a French satirical weekly magazine known for its anti-racist, secular, and libertarian stances. The magazine has been targeted in three terrorist attacks in 2011, 2015, and 2020, in response to its controversial cartoons depicting Muhammad. Charlie Hebdo first appeared in 1970, was briefly

In [87]:
print(gpt3(few_shot_prompt_template.format(subject=subject)))

Hunter X Hunter is a popular Japanese manga series written and illustrated by Yoshihiro Togashi. The series follows the adventures of a young boy named Gon Freecss as he aspires to become a Hunter, a special class of individuals who are tasked with hunting down and capturing rare and elusive creatures. The manga has been adapted into multiple anime series and films, as well as video games and other media.


In [88]:
print(gpt4.invoke(few_shot_prompt_template.format(subject=subject)))

content='"Hunter X Hunter" is a Japanese manga series written and illustrated by Yoshihiro Togashi. The story focuses on a young boy named Gon Freecss, who discovers that his father, who he was told was dead, is actually alive and a world-renowned Hunter, a licensed professional capable of tracking down secret treasures, rare beasts, or even other individuals. It has been adapted into two anime series by Nippon Animation and Madhouse and has also spawned numerous video games and other merchandise.'
