In [166]:
#Load environment variables from .env file - specifically 
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

import os
#for i in os.environ: print(i) ##test environment vars


from langchain.llms import OpenAI
from langchain.utilities import SerpAPIWrapper
from langchain.agents import load_tools, initialize_agent
from langchain import PromptTemplate

from pprint import pprint as pp
import numpy as np
import pandas as pd
pd.set_option('display.max_colwidth', 500)
pd.set_option('display.max_rows', 100)
from pprint import pprint
import ast

def displays(df, n = 5):
    display(df.shape, df.head(n))

### LLM test

In [13]:
llm = OpenAI(model = 'text-davinci-003', temperature = 0, verbose = True)

llm("Give one sentence that includes the word baseball.")

'\n\nI love to watch baseball games in the summer.'

## Testing SerpApi

In [4]:
tool_names = ["serpapi"]
tools = load_tools(tool_names)

In [9]:
agent = initialize_agent(tools, llm, agent = "zero-shot-react-description", verbose = True)

In [11]:
agent.run("Who will win the 2024 US Presidential election?")



[1m> Entering new  chain...[0m
[32;1m[1;3m I need to find out who is running for the election
Action: Search
Action Input: "2024 US Presidential election candidates"[0m
Observation: [36;1m[1;3mFour years after a historically large number of candidates ran for president, the field for the 2024 campaign is getting crowded once more, ...[0m
Thought:[32;1m[1;3m I need to research the candidates and their chances of winning
Action: Search
Action Input: "2024 US Presidential election predictions"[0m
Observation: [36;1m[1;3mThe latest coverage of the 2024 presidential, House and Senate elections.[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: It is too early to predict who will win the 2024 US Presidential election.[0m

[1m> Finished chain.[0m


'It is too early to predict who will win the 2024 US Presidential election.'

# Langchain with Autocast

In [161]:
# load autocast data
keycols = [
    "id",
    "question",
    "choices",
    "answer"
]
# ac = pd.read_json("Data/Autocast/autocast/autocast_questions.json") # original data
ac = pd.read_csv("Data/Autocast/filtered_events.csv") # coco's filtered data
ac = ac[
#         (ac['choices'].notna()) &
        (ac['status'] == 'Active')
#         (ac['status'] == 'Resolved')
][keycols]

In [167]:
# add letters to multiple choice choices, convert to dict
def clean_choices(choices):
    if pd.notna(choices):
        choices = ast.literal_eval(choices)
        letters = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"]
        choices_dict = zip(letters[:len(choices)], choices)
        return dict(choices_dict)
    else:
        return choices

ac['choices'] = ac['choices'].apply(clean_choices)

In [182]:
# view key columns
displays(ac)

(62, 4)

Unnamed: 0,id,question,choices,answer
0,M3191,"What will the BLS report as the US civilian labor force participation rate (total including all ethnicities, 16 and older) for October 2030?",,
5,M4967,"This question asks, what will be the real median household income in the United States in 2030, in dollars?",,
36,M5865,"Longbets series: will the annual rate of US labor productivity growth for the year 2025, as measured by the U.S. Bureau of Labor Statistics, be 2 percent or above?","{'A': 'yes', 'B': 'no'}",
37,M5866,"Longbets series: will the June, 2025 Labor Force Participation Rate and unemployment rate, reported by the US Bureau of Labor Statistics, be, respectively above 60 percent and below 7.5 percent?","{'A': 'yes', 'B': 'no'}",
38,M5867,What will US house prices be at the end of 2030?,,


In [172]:
# sample a question using question ID
ac_id = "M5865"
ac_filterid = ac[ac['id'] == ac_id]
autocast_question = ac_filterid['question'].astype(str).values[0] + ' ' + ac_filterid['choices'].astype(str).values[0]
print(f"Question:\n\n{autocast_question}")

Question:

Longbets series: will the annual rate of US labor productivity growth for the year 2025, as measured by the U.S. Bureau of Labor Statistics, be 2 percent or above? {'A': 'yes', 'B': 'no'}


In [178]:
# Create prompt template

prefix = """You are an expert LLM prompt engineer. Your job is to take
in a simple forecasting question, which may not provide enough context
and examples for the LLM to answer adequately, and to output a prompt
that is optimized for querying an LLM for a response with maximum
accuracy.

The tools you have at your disposal are the following:

Search: A Google search engine API. It is useful when you need to provide
context and examples about current events.

Wikipedia: A search engine. Useful for when you need to answer questions about current
events. Input should be a search query.

Use the following format:"""



format_instructions = """
Question: the input question you must prompt engineer.
It will be provided in multiple choice format.

Thought: you should always think about what to do.

Action: the action to take. It should be one of [Search, Wikipedia].

Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer

Your final answer should provide context and examples on
the question's topic(s) to optimize the prompt. Provide at least 1
sentence of context and 1 example, but you can provide more at your
discretion. Your examples must include a question and multiple choice options.

The question should be the original question from the question provided
to you. The engineered prompt should be in the following format:

Here is context for the question: (context)

Here are example(s) for the question: (examples)

Here is the question: (the original input question you've been asked)

Here is an example of what the pipeline should look like:

Question: Who will win the 2024 US presidential election?

A: Joe Biden,
B: Donald Trump,
C: Pete Buttigieg,
D: Ron DeSantis

Final Answer:

Here is context for the question: The US presidential election
is held every 4 years. The United States is a country founded in
1776 which holds democratic elections.

Here are example(s) similar to this question:

Who will win the 2020 US presidential election?

A: Joe Biden
B: Donald Trump
C: Bernie Sanders
D: Jeb Bush

Here is the question:

Who will win the 2024 US presidential election?

A: Joe Biden,
B: Donald Trump,
C: Pete Buttigieg,
D: Ron DeSantis

"""

suffix = """
Begin!
Question: {input}
Thought:{agent_scratchpad}
"""

# prompt = PromptTemplate(autocast_question = [autocast_question],
#                         template = template,
#                         )


# llm(prompt.format(prompt = prompt))

In [179]:
### initialize LLM, tools, agent
# llm
llm = OpenAI(model = 'text-davinci-003',
             temperature = 0,
             verbose = True)

# tools
tool_names = ["serpapi", "wikipedia"]
tools = load_tools(tool_names, llm = llm)

# agent
agent = initialize_agent(tools,
                         llm,
                         agent = "zero-shot-react-description",
                         verbose = True,
                         agent_kwargs={
                                'prefix':prefix,
                                'format_instructions':format_instructions,
                                'suffix':suffix
                        })

In [None]:
# agent.run(autocast_question)

### Question ID M5865

In [180]:
# note: this question is active and in the future
print(f"ID: {ac_id}")
print(f"\nQuestion:\n\n{autocast_question}")

ID: M5865

Question:

Longbets series: will the annual rate of US labor productivity growth for the year 2025, as measured by the U.S. Bureau of Labor Statistics, be 2 percent or above? {'A': 'yes', 'B': 'no'}


In [181]:
# run agent
agent.run(autocast_question)



[1m> Entering new  chain...[0m
[32;1m[1;3mI need to provide context and examples to help the LLM understand the question.
Action: Search
Action Input: US labor productivity growth[0m
Observation: [36;1m[1;3mTotal factor productivity decreases 1.2% in 2022, following a 4.0% increase in 2021. 03/23/2023 · Total factor productivity increased in 15 out of 21 major ...[0m
Thought:[32;1m[1;3mI need to provide more examples to help the LLM understand the question.
Action: Wikipedia
Action Input: US labor productivity growth[0m
Observation: [33;1m[1;3mPage: Productivity
Summary: Productivity is the efficiency of production of goods or services expressed by some measure. Measurements of productivity are often expressed as a ratio of an aggregate output to a single input or an aggregate input used in a production process, i.e. output per unit of input, typically over a specific period of time. The most common example is the (aggregate) labour productivity measure, one example of w

'Here is context for the question: The US Bureau of Labor Statistics measures labor productivity growth annually. This measure is important for understanding the economic health of the country.\n\nHere are example(s) similar to this question:\n\nWhat was the annual rate of US labor productivity growth in 2020?\n\nA: 0.3%\nB: 1.2%\nC: 2.5%\nD: 4.0%\n\nHere is the question:\n\nLongbets series: will the annual rate of US labor productivity growth for the year 2025, as measured by the U.S. Bureau of Labor Statistics, be 2 percent or above?\n\nA: yes\nB: no'

### Question ID G1

In [144]:
# id = G1 (note: this question is already resolved)
print(f"Question:\n\n{autocast_question}")

Question:

What will the end-of-day closing value for the dollar against the renminbi be on 1 January 2016? {'A': 'Less than 6.30', 'B': 'Between 6.30 and 6.35, inclusive', 'C': 'More than 6.35 but less than 6.40', 'D': '6.40 or more'}


In [143]:
# run agent
agent.run(autocast_question)



[1m> Entering new  chain...[0m
[32;1m[1;3mI need to find out the current exchange rate between the dollar and the renminbi.
Action: Search
Action Input: Exchange rate between dollar and renminbi[0m
Observation: [36;1m[1;3mConvert Chinese Yuan Renminbi to US Dollar ; 1 CNY, 0.139287 USD ; 5 CNY, 0.696433 USD ; 10 CNY, 1.39287 USD ; 25 CNY, 3.48217 USD.[0m
Thought:[32;1m[1;3mI need to find out the historical exchange rate between the dollar and the renminbi.
Action: Search
Action Input: Historical exchange rate between dollar and renminbi[0m
Observation: [36;1m[1;3mTable of 1 US Dollar to Chinese Yuan Exchange Rate: Updated: 22/06/23 05:45. Date, US Dollar to Chinese Yuan, Link. Wednesday 21 June 2023, 1 USD = 7.1798 ...[0m
Thought:[32;1m[1;3mI now know the final answer
Final Answer:

Here is context for the question: The exchange rate between the US dollar and the Chinese renminbi is an important indicator of the relative strength of the two currencies.

Here are examp

'Here is context for the question: The exchange rate between the US dollar and the Chinese renminbi is an important indicator of the relative strength of the two currencies.\n\nHere are example(s) similar to this question:\n\nWhat was the end-of-day closing value for the dollar against the renminbi on 15 June 2023?\nA: Less than 7.15\nB: Between 7.15 and 7.20, inclusive\nC: More than 7.20 but less than 7.25\nD: 7.25 or more\n\nHere is the question:\n\nWhat will the end-of-day closing value for the dollar against the renminbi be on 1 January 2016?\nA: Less than 6.30\nB: Between 6.30 and 6.35, inclusive\nC: More than 6.35 but less than 6.40\nD: 6.40 or more'