# Parsing SQL Data

In [None]:
!pip install langchain
!pip install openai

In [None]:
from langchain import OpenAI, SQLDatabase, SQLDatabaseChain

In [None]:
Server = ""
Database = ""
Driver = ""
Username = ""
Password = ""
db_conn = f"mssql://{Username}:{Password}@{Server}/{Database}?driver={Driver}"

db = SQLDatabase.from_uri(db_conn)
llm = OpenAI(temperature=0)

In [None]:
db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)

In [None]:
query = ""
db_chain.run(query)

# Analyzing Tabular Data - Approach1

In [None]:
!pip install langchain
!pip install openai
!pip install chromadb
!pip install tiktoken

In [None]:
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

In [None]:
import os
os.environ['OPENAI_API_KEY'] = ""

In [None]:
!wget https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv

--2023-04-23 07:37:22--  https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 60302 (59K) [text/plain]
Saving to: ‘titanic.csv’


2023-04-23 07:37:23 (8.21 MB/s) - ‘titanic.csv’ saved [60302/60302]



In [None]:
loader = CSVLoader(file_path='titanic.csv')

In [None]:
index_creator = VectorstoreIndexCreator()
doc_search = index_creator.from_loaders([loader])



In [None]:
chain =RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=doc_search.vectorstore.as_retriever(), input_key = "question", verbose=True)

In [None]:
query =  "impute average age for the missing values in the Age columnn and tell the new average age" #"how many rows and columns are present in this dataset"   #"run df.info() on the entire data and write your observations"
response = chain({"question":query})
print(response['result'])



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
 The average age for the missing values can be calculated by taking the average of the ages of the passengers that were given. The average age of the passengers provided is 27.5, so the imputed average age for the missing values in the Age column is 27.5.


# Analyzing Tabular Data - Approach2

In [None]:
!pip install openai
!pip install langchain

In [7]:
import os
os.environ['OPENAI_API_KEY'] = ""

In [8]:
from langchain.llms import OpenAI
from langchain.agents import create_pandas_dataframe_agent
import pandas as pd

In [9]:
df = pd.DataFrame({
    "Name" : ["Alice", "Bob", "James", "James"],
    "Age" : [32, 25, 56, 23],
    "City" : ["New York", "Sydney", "Delhi", "New York"]
})

In [11]:
agent = create_pandas_dataframe_agent(OpenAI(temperature=0, model_kwargs = {"model":'text-davinci-003'}), df, verbose=True)
# agent = create_pandas_dataframe_agent(OpenAI(temperature=0, model ='text-davinci-003'), df, verbose=True)

In [None]:
agent.run("Give the list of names after sorting the age in ascending order")

"['James', 'Bob', 'Alice', 'James']"

In [None]:
agent.run("Sort the information by age ascending order")

'The dataframe sorted by age in ascending order is:\n    Name  Age      City\n3  James   23  New York\n1    Bob   25    Sydney\n0  Alice   32  New York\n2  James   56     Delhi'

In [None]:
agent.run("Who are the 2 people with the same name")

'James (age 56) and James (age 23)'

In [None]:
agent.run("What does the data represent?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to look at the data to understand what it represents
Action: python_repl_ast
Action Input: print(df.head())[0m    Name  Age      City
0  Alice   32  New York
1    Bob   25    Sydney
2  James   56     Delhi
3  James   23  New York

Observation: [36;1m[1;3mNone[0m
Thought:[32;1m[1;3m I can see that the data represents the names, ages, and cities of four people
Final Answer: The data represents the names, ages, and cities of four people.[0m

[1m> Finished chain.[0m


'The data represents the names, ages, and cities of four people.'

In [None]:
agent.run("what is the age of james staying in New York?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to filter the dataframe to find James in New York
Action: python_repl_ast
Action Input: df[(df['Name'] == 'James') & (df['City'] == 'New York')]['Age'][0m
Observation: [36;1m[1;3m3    23
Name: Age, dtype: int64[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: 23[0m

[1m> Finished chain.[0m


'23'

In [None]:
agent.run("how many rows and columns are present?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find out the shape of the dataframe
Action: python_repl_ast
Action Input: df.shape[0m
Observation: [36;1m[1;3m(3, 3)[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: 3 rows and 3 columns[0m

[1m> Finished chain.[0m


'3 rows and 3 columns'

# OR

In [5]:
df = pd.read_excel('/content/sales_data_sample - new.xlsx')

In [6]:
agent = create_pandas_dataframe_agent(OpenAI(temperature=0, model_kwargs = {"model":'text-davinci-003'}), df, verbose=True)
# agent = create_pandas_dataframe_agent(OpenAI(temperature=0, model ='text-davinci-003'), df, verbose=True)

In [None]:
agent.run("what is the total sales when country is USA and productline is motorcycles for the year 2003")

'The total sales when country is USA and productline is motorcycles for the year 2003 is 178108.95.'

In [None]:
agent.run("what is the total sales per quarter when country is USA and productline is motorcycles for the year 2003")

'The total sales per quarter when country is USA and productline is motorcycles for the year 2003 is 25783.76 for Q1, 59619.84 for Q3, and 92705.35 for Q4.'

In [None]:
agent.run("Display the top 5 records when country is USA and productline is motorcycles for the year 2003")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to filter the dataframe to only include records with country USA and productline motorcycles for the year 2003
Action: python_repl_ast
Action Input: df[(df['COUNTRY'] == 'USA') & (df['PRODUCTLINE'] == 'Motorcycles') & (df['YEAR_ID'] == 2003)].head()[0m
Observation: [36;1m[1;3m   ORDERNUMBER  QUANTITYORDERED  PRICEEACH  ORDERLINENUMBER    SALES  \
0        10107               30      95.70                2  2871.00   
3        10145               45      83.26                6  3746.70   
4        10159               49     100.00               14  5205.27   
5        10168               36      96.66                1  3479.76   
8        10201               22      98.57                2  2168.54   

             ORDERDATE   STATUS  QTR_ID  MONTH_ID  YEAR_ID  ... ADDRESSLINE2  \
0       2/24/2003 0:00  Shipped       1         2     2003  ...          NaN   
3       8/25/2003 0:00  Shipped       3         8 

'The top 5 records when country is USA and productline is motorcycles for the year 2003 are shown above.'

In [None]:
#Q18
agent.run("Retrieve a list of all orders where the sales amount is greater than $10,000 and Customer Name starts with mini. Use CONTACTNAME column.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to filter the dataframe to get the orders that meet the criteria.
Action: python_repl_ast
Action Input: df[(df['SALES'] > 10000) & (df['CONTACTNAME'].str.startswith('Mini'))][0m
Observation: [36;1m[1;3m'CONTACTNAME'[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: df[(df['SALES'] > 10000) & (df['CONTACTNAME'].str.startswith('Mini'))][0m

[1m> Finished chain.[0m


"df[(df['SALES'] > 10000) & (df['CONTACTNAME'].str.startswith('Mini'))]"

In [None]:
#Q17
agent.run("Display the average sales amount for each year.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to group the data by year and calculate the average sales amount.
Action: python_repl_ast
Action Input: df.groupby('YEAR_ID')['SALES'].mean()[0m
Observation: [36;1m[1;3mYEAR_ID
2003    3516.979540
2004    3512.388550
2005    3747.880146
Name: SALES, dtype: float64[0m
Thought:[32;1m[1;3m I now know the average sales amount for each year.
Final Answer: The average sales amount for each year is 2003: 3516.98, 2004: 3512.39, 2005: 3747.88.[0m

[1m> Finished chain.[0m


'The average sales amount for each year is 2003: 3516.98, 2004: 3512.39, 2005: 3747.88.'

In [None]:
#Q16
agent.run("Show the total sales for the 3rd Quarter of year 2003 when product line is ships. Use YEAR_ID column to filter on year and QTR_ID column to filter on quarter")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to filter the dataframe by year and quarter, then sum the sales
Action: python_repl_ast
Action Input: df[(df['YEAR_ID'] == 2003) & (df['QTR_ID'] == 3) & (df['PRODUCTLINE'] == 'Ships')]['SALES'].sum()[0m
Observation: [36;1m[1;3m46171.27[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: The total sales for the 3rd Quarter of year 2003 when product line is ships is 46171.27.[0m

[1m> Finished chain.[0m


'The total sales for the 3rd Quarter of year 2003 when product line is ships is 46171.27.'

In [None]:
#Q14
agent.run("Get the average sales amount for the customer 'Alpha Cognac' in the year 2005. Use CUSTOMERNAME column.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to filter the dataframe to only include rows with customer name 'Alpha Cognac' and year 2005.
Action: python_repl_ast
Action Input: df[(df['CUSTOMERNAME'] == 'Alpha Cognac') & (df['YEAR_ID'] == 2005)].mean()['SALES'][0m
Observation: [36;1m[1;3m3027.8239999999996[0m
Thought:



[32;1m[1;3m I now know the final answer
Final Answer: The average sales amount for the customer 'Alpha Cognac' in the year 2005 is 3027.824.[0m

[1m> Finished chain.[0m


"The average sales amount for the customer 'Alpha Cognac' in the year 2005 is 3027.824."

In [11]:
#Q14 - custom
agent.run("Give me the list of sales amount for the customer 'Alpha Cognac' in the year 2005. Use CUSTOMERNAME column.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to filter the dataframe to get the rows with customer name 'Alpha Cognac' and year 2005.
Action: python_repl_ast
Action Input: df[(df['CUSTOMERNAME'] == 'Alpha Cognac') & (df['YEAR_ID'] == 2005)]['SALES'].tolist()[0m
Observation: [36;1m[1;3m[2577.6, 1463.0, 5192.64, 3789.72, 2116.16][0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: [2577.6, 1463.0, 5192.64, 3789.72, 2116.16][0m

[1m> Finished chain.[0m


'[2577.6, 1463.0, 5192.64, 3789.72, 2116.16]'

In [12]:
#Q14 - custom
agent.run("Give me the details of sales for the customer 'Alpha Cognac' in the year 2005. Use CUSTOMERNAME column.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to filter the dataframe to get the details of sales for the customer 'Alpha Cognac' in the year 2005.
Action: python_repl_ast
Action Input: df[(df['CUSTOMERNAME'] == 'Alpha Cognac') & (df['YEAR_ID'] == 2005)][0m
Observation: [36;1m[1;3m      ORDERNUMBER  QUANTITYORDERED  PRICEEACH  ORDERLINENUMBER    SALES  \
2532        10397               32      80.55                5  2577.60   
2612        10397               22      66.50                4  1463.00   
2715        10397               48     100.00                3  5192.64   
2741        10397               36     100.00                2  3789.72   
2821        10397               34      62.24                1  2116.16   

           ORDERDATE   STATUS  QTR_ID  MONTH_ID  YEAR_ID  ... ADDRESSLINE2  \
2532  3/28/2005 0:00  Shipped       1         3     2005  ...          NaN   
2612  3/28/2005 0:00  Shipped       1         3     2005  ...          NaN   

"The details of sales for the customer 'Alpha Cognac' in the year 2005 are shown in the output above."

In [None]:
#Q13
agent.run("Show the total sales amount for each territory in the year 2003.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to group the data by territory and year, and then sum the sales amount.
Action: python_repl_ast
Action Input: df.groupby(['TERRITORY', 'YEAR_ID'])['SALES'].sum()[0m
Observation: [36;1m[1;3mTERRITORY  YEAR_ID
APAC       2003        253134.45
           2004        345307.83
           2005        147679.55
EMEA       2003       1660314.53
           2004       2389128.89
           2005        929828.99
Japan      2003        243773.18
           2004        168479.10
           2005         42920.94
Name: SALES, dtype: float64[0m
Thought:[32;1m[1;3m I now know the total sales amount for each territory in the year 2003.
Final Answer: APAC: 253134.45, EMEA: 1660314.53, Japan: 243773.18[0m

[1m> Finished chain.[0m


'APAC: 253134.45, EMEA: 1660314.53, Japan: 243773.18'

In [None]:
df.groupby(['TERRITORY', 'YEAR_ID'])['SALES'].sum()

TERRITORY  YEAR_ID
APAC       2003        253134.45
           2004        345307.83
           2005        147679.55
EMEA       2003       1660314.53
           2004       2389128.89
           2005        929828.99
Japan      2003        243773.18
           2004        168479.10
           2005         42920.94
Name: SALES, dtype: float64

In [None]:
df['TERRITORY'].value_counts()

EMEA     1407
APAC      221
Japan     121
Name: TERRITORY, dtype: int64

In [None]:
df.head()

Unnamed: 0,ORDERNUMBER,QUANTITYORDERED,PRICEEACH,ORDERLINENUMBER,SALES,ORDERDATE,STATUS,QTR_ID,MONTH_ID,YEAR_ID,...,ADDRESSLINE2,CITY,STATE,POSTALCODE,COUNTRY,TERRITORY,CONTACTLASTNAME,CONTACTFIRSTNAME,DEALSIZE,DISCOUNT
0,10107,30,95.7,2,2871.0,2/24/2003 0:00,Shipped,1,2,2003,...,,NYC,NY,10022.0,USA,,Yu,Kwai,Small,5.0
1,10121,34,81.35,5,2765.9,2003-07-05 00:00:00,Shipped,2,5,2003,...,,Reims,,51100.0,France,EMEA,Henriot,Paul,Small,4.0
2,10134,41,94.74,2,3884.34,2003-01-07 00:00:00,Shipped,3,7,2003,...,,Paris,,75508.0,France,EMEA,Da Cunha,Daniel,Medium,1.0
3,10145,45,83.26,6,3746.7,8/25/2003 0:00,Shipped,3,8,2003,...,,Pasadena,CA,90003.0,USA,,Young,Julie,Medium,6.0
4,10159,49,100.0,14,5205.27,2003-10-10 00:00:00,Shipped,4,10,2003,...,,San Francisco,CA,,USA,,Brown,Julie,Medium,9.0


In [None]:
#Q12 - A
agent.run("Display the average sales amount of each month of the year 2004 when product is Motorcycles.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to filter the dataframe to only include 2004 and Motorcycles, then calculate the average sales amount for each month.
Action: python_repl_ast
Action Input: df[(df['YEAR_ID'] == 2004) & (df['PRODUCTLINE'] == 'Motorcycles')].groupby('MONTH_ID')['SALES'].mean()[0m
Observation: [36;1m[1;3mMONTH_ID
1     3169.270769
2     3774.346154
4     2789.928462
5     3603.765385
6     3633.646923
7     3253.428571
8     3300.259474
9     3267.003846
10    3031.843077
11    3890.047692
12    2605.872500
Name: SALES, dtype: float64[0m
Thought:[32;1m[1;3m I now know the average sales amount of each month of the year 2004 when product is Motorcycles.
Final Answer: The average sales amount of each month of the year 2004 when product is Motorcycles is 3169.270769 for January, 3774.346154 for February, 2789.928462 for April, 3603.765385 for May, 3633.646923 for June, 3253.428571 for July, 3300.259474 for August, 3267.003846 f

'The average sales amount of each month of the year 2004 when product is Motorcycles is 3169.270769 for January, 3774.346154 for February, 2789.928462 for April, 3603.765385 for May, 3633.646923 for June, 3253.428571 for July, 3300.259474 for August, 3267.003846 for September, 3031.843077 for October, 3890.047692 for November, and 2605.872500 for December.'

In [None]:
#Q12 - B
agent.run("Display the average sales amount of each quarter of the year 2004 when product is Motorcycles.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to filter the dataframe to only include 2004 and Motorcycles, then calculate the average sales amount for each quarter.
Action: python_repl_ast
Action Input: df[(df['YEAR_ID'] == 2004) & (df['PRODUCTLINE'] == 'Motorcycles')].groupby('QTR_ID')['SALES'].mean()[0m
Observation: [36;1m[1;3mQTR_ID
1    3471.808462
2    3342.446923
3    3280.768718
4    3532.880000
Name: SALES, dtype: float64[0m
Thought:[32;1m[1;3m I now know the average sales amount of each quarter of the year 2004 when product is Motorcycles.
Final Answer: The average sales amount of each quarter of the year 2004 when product is Motorcycles is 3471.81, 3342.45, 3280.77, and 3532.88 respectively.[0m

[1m> Finished chain.[0m


'The average sales amount of each quarter of the year 2004 when product is Motorcycles is 3471.81, 3342.45, 3280.77, and 3532.88 respectively.'

In [None]:
#Q11
agent.run("Show the total sales for each quarter of the year 2005")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to filter the dataframe to only include rows from 2005
Action: python_repl_ast
Action Input: df[df['YEAR_ID'] == 2005][0m
Observation: [36;1m[1;3m      ORDERNUMBER  QUANTITYORDERED  PRICEEACH  ORDERLINENUMBER    SALES  \
22          10375               21      34.91               12   733.11   
23          10388               42      76.36                4  3207.12   
24          10403               24     100.00                7  2434.56   
25          10417               66     100.00                2  7516.08   
49          10369               41     100.00                2  4514.92   
...           ...              ...        ...              ...      ...   
2796        10415               42      57.61                3  2419.62   
2819        10373               29     100.00                1  3978.51   
2820        10386               43     100.00                4  5417.57   
2821        10397       

'The total sales for each quarter of the year 2005 are Q1: $1,071,992.36 and Q2: $719,494.35.'

In [None]:
#Q7
agent.run("what is the Average Sales when the territory is 'EMEA' and the discount >= 10.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to filter the dataframe to only include rows with the territory 'EMEA' and discount >= 10.
Action: python_repl_ast
Action Input: df[(df['TERRITORY'] == 'EMEA') & (df['DISCOUNT'] >= 10)].mean()['SALES'][0m
Observation: [36;1m[1;3m3396.536788321168[0m
Thought:



[32;1m[1;3m I now know the final answer
Final Answer: The average sales when the territory is 'EMEA' and the discount is greater than or equal to 10 is 3396.54.[0m

[1m> Finished chain.[0m


"The average sales when the territory is 'EMEA' and the discount is greater than or equal to 10 is 3396.54."

In [None]:
#Q2
agent.run("Show only customer names from state 'CA' who ordered products in the year 2003 and sort the results by sales amount, with the highest sales at the top. Use CUSTOMERNAME column")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to filter the dataframe to only include customers from CA who ordered in 2003, then sort by sales
Action: python_repl_ast
Action Input: df[(df['STATE'] == 'CA') & (df['YEAR_ID'] == 2003)].sort_values(by='SALES', ascending=False)['CUSTOMERNAME'][0m
Observation: [36;1m[1;3m319     Mini Gifts Distributors Ltd.
243                Toys4GrownUps.com
165         Corporate Gift Ideas Co.
88                   Mini Wheels Co.
189     Mini Gifts Distributors Ltd.
                    ...             
2200               Toys4GrownUps.com
2027        Signal Collectibles Ltd.
1543        Corporate Gift Ideas Co.
2030    Mini Gifts Distributors Ltd.
1794    Mini Gifts Distributors Ltd.
Name: CUSTOMERNAME, Length: 163, dtype: object[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: Mini Gifts Distributors Ltd., Toys4GrownUps.com, Corporate Gift Ideas Co., Mini Wheels Co., Signal Collectibles Ltd.[0m

[1m

'Mini Gifts Distributors Ltd., Toys4GrownUps.com, Corporate Gift Ideas Co., Mini Wheels Co., Signal Collectibles Ltd.'

In [None]:
#Q1
agent.run("For the product code starting with 'S18_', retrieve the total sales amount when quantity ordered >= 50.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to filter the dataframe to only include rows with product code starting with 'S18_' and quantity ordered >= 50.
Action: python_repl_ast
Action Input: df[(df['PRODUCTCODE'].str.startswith('S18_')) & (df['QUANTITYORDERED'] >= 50)].SALES.sum()[0m
Observation: [36;1m[1;3m272902.59[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: The total sales amount for product code starting with 'S18_' when quantity ordered is greater than or equal to 50 is 272902.59.[0m

[1m> Finished chain.[0m


"The total sales amount for product code starting with 'S18_' when quantity ordered is greater than or equal to 50 is 272902.59."

# Passing Custom Data

In [None]:
!pip install langchain
!pip install openai
!pip install PyPDF2
!pip install faiss-cpu
!pip install tiktoken

In [None]:
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS

In [None]:
import os
os.environ['OPENAI_API_KEY'] = ""

In [None]:
reader = PdfReader("/content/2304.09697.pdf")

In [None]:
raw_text = ""
for i, page in enumerate(reader.pages):
  text = page.extract_text()
  if text:
    raw_text += text

In [None]:
raw_text[:500]

'arXiv:2304.09697v1  [cs.PL]  19 Apr 2023A CALCULUS FOR SCOPED EFFECTS & HANDLERS\nROGER BOSMANa, BIRTHE VAN DEN BERGa, WENHAO TANGb,\nAND TOM SCHRIJVERSa\naKU Leuven, Celestijnenlaan 200A\ne-mail address :{roger.bosman/birthe.vandenberg/tom.schrijvers }@kuleuven.be\nbThe University of Edinburgh, 10 Crichton Street\ne-mail address : wenhao.tang@ed.ac.uk\nAbstract. Algebraic eﬀects & handlers have become a standard approach for side-eﬀects\nin functional programming. Their modular composition with other e'

In [None]:
text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 1000,
    chunk_overlap = 200,
    length_function = len,
)

texts = text_splitter.split_text(raw_text)


In [None]:
len(texts)

213

In [None]:
texts[0]

'arXiv:2304.09697v1  [cs.PL]  19 Apr 2023A CALCULUS FOR SCOPED EFFECTS & HANDLERS\nROGER BOSMANa, BIRTHE VAN DEN BERGa, WENHAO TANGb,\nAND TOM SCHRIJVERSa\naKU Leuven, Celestijnenlaan 200A\ne-mail address :{roger.bosman/birthe.vandenberg/tom.schrijvers }@kuleuven.be\nbThe University of Edinburgh, 10 Crichton Street\ne-mail address : wenhao.tang@ed.ac.uk\nAbstract. Algebraic eﬀects & handlers have become a standard approach for side-eﬀects\nin functional programming. Their modular composition with other eﬀects and clean sepa-\nration of syntax and semantics make them attractive to a wide audience. However, not all\neﬀects can be classiﬁed as algebraic; some need a more sophis ticated handling. In partic-\nular, eﬀects that have or create a delimited scope need speci al care, as their continuation\nconsists of two parts—in and out of the scope—and their modul ar composition introduces\nadditional complexity. These eﬀects are called scopedand have gained attention by their'

In [None]:
embeddings = OpenAIEmbeddings()

In [None]:
docsearch = FAISS.from_texts(texts, embeddings)

In [None]:
docsearch

<langchain.vectorstores.faiss.FAISS at 0x7f71a112e580>

In [None]:
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI

In [None]:
chain = load_qa_chain(OpenAI(), chain_type = "stuff")

In [None]:
query = "explain calculus" #"give a summary of the paper"
docs = docsearch.similarity_search(query)
chain.run(input_documents = docs, question=query)

' Calculus is a branch of mathematics that is used to study the behavior of functions through the use of limits, derivatives, integrals, and other mathematical tools. It is used in many areas of science and engineering, such as physics, engineering, economics, and finance. It is also used to solve problems in mathematics and computer science.'

# Test

In [128]:
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
from langchain.prompts import StringPromptTemplate
from langchain import OpenAI, SerpAPIWrapper, LLMChain
from typing import List, Union
from langchain.schema import AgentAction, AgentFinish
from langchain.agents.agent_toolkits import NLAToolkit
from langchain.tools.plugin import AIPlugin
import re

In [None]:
agent = LLMSingleActionAgent(
    llm_chain=llm_chain, 
    output_parser=output_parser,
    stop=["\nObservation:"], 
    allowed_tools=tool_names
)

In [129]:
llm = OpenAI(temperature=0)

In [132]:
# LLM chain consisting of the LLM and a prompt
llm_chain = LLMChain(llm=llm, prompt = ""
                     )

ValidationError: ignored