# Parsing SQL Data

In [None]:
!pip install langchain
!pip install openai

In [None]:
from langchain import OpenAI, SQLDatabase, SQLDatabaseChain

In [None]:
Server = ""
Database = ""
Driver = ""
Username = ""
Password = ""
db_conn = f"mssql://{Username}:{Password}@{Server}/{Database}?driver={Driver}"

db = SQLDatabase.from_uri(db_conn)
llm = OpenAI(temperature=0)

In [None]:
db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)

In [None]:
query = ""
db_chain.run(query)

# Analyzing Tabular Data

In [None]:
!pip install langchain
!pip install openai
!pip install chromadb
!pip install tiktoken

In [None]:
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

In [None]:
import os
os.environ['OPENAI_API_KEY'] = ""

In [None]:
!wget https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv

--2023-04-23 07:37:22--  https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 60302 (59K) [text/plain]
Saving to: ‘titanic.csv’


2023-04-23 07:37:23 (8.21 MB/s) - ‘titanic.csv’ saved [60302/60302]



In [None]:
loader = CSVLoader(file_path='titanic.csv')

In [None]:
index_creator = VectorstoreIndexCreator()
doc_search = index_creator.from_loaders([loader])



In [None]:
chain =RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=doc_search.vectorstore.as_retriever(), input_key = "question", verbose=True)

In [None]:
query =  "impute average age for the missing values in the Age columnn and tell the new average age" #"how many rows and columns are present in this dataset"   #"run df.info() on the entire data and write your observations"
response = chain({"question":query})
print(response['result'])



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
 The average age for the missing values can be calculated by taking the average of the ages of the passengers that were given. The average age of the passengers provided is 27.5, so the imputed average age for the missing values in the Age column is 27.5.


# OR





In [None]:
from langchain.llms import OpenAI

In [None]:
df = pd.read_csv('titanic.csv')

In [None]:
agent = create_pandas_dataframe_agent(OpenAI(temperature=0), df, verbose=True)

In [None]:
agent.run("how many rows and columns are present?")

In [None]:
agent.run("how many people have more than 3 siblings?")

# Passing Custom Data

In [None]:
!pip install langchain
!pip install openai
!pip install PyPDF2
!pip install faiss-cpu
!pip install tiktoken

In [None]:
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS

In [None]:
import os
os.environ['OPENAI_API_KEY'] = ""

In [None]:
reader = PdfReader("/content/2304.09697.pdf")

In [None]:
raw_text = ""
for i, page in enumerate(reader.pages):
  text = page.extract_text()
  if text:
    raw_text += text

In [None]:
raw_text[:500]

'arXiv:2304.09697v1  [cs.PL]  19 Apr 2023A CALCULUS FOR SCOPED EFFECTS & HANDLERS\nROGER BOSMANa, BIRTHE VAN DEN BERGa, WENHAO TANGb,\nAND TOM SCHRIJVERSa\naKU Leuven, Celestijnenlaan 200A\ne-mail address :{roger.bosman/birthe.vandenberg/tom.schrijvers }@kuleuven.be\nbThe University of Edinburgh, 10 Crichton Street\ne-mail address : wenhao.tang@ed.ac.uk\nAbstract. Algebraic eﬀects & handlers have become a standard approach for side-eﬀects\nin functional programming. Their modular composition with other e'

In [None]:
text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 1000,
    chunk_overlap = 200,
    length_function = len,
)

texts = text_splitter.split_text(raw_text)


In [None]:
len(texts)

213

In [None]:
texts[0]

'arXiv:2304.09697v1  [cs.PL]  19 Apr 2023A CALCULUS FOR SCOPED EFFECTS & HANDLERS\nROGER BOSMANa, BIRTHE VAN DEN BERGa, WENHAO TANGb,\nAND TOM SCHRIJVERSa\naKU Leuven, Celestijnenlaan 200A\ne-mail address :{roger.bosman/birthe.vandenberg/tom.schrijvers }@kuleuven.be\nbThe University of Edinburgh, 10 Crichton Street\ne-mail address : wenhao.tang@ed.ac.uk\nAbstract. Algebraic eﬀects & handlers have become a standard approach for side-eﬀects\nin functional programming. Their modular composition with other eﬀects and clean sepa-\nration of syntax and semantics make them attractive to a wide audience. However, not all\neﬀects can be classiﬁed as algebraic; some need a more sophis ticated handling. In partic-\nular, eﬀects that have or create a delimited scope need speci al care, as their continuation\nconsists of two parts—in and out of the scope—and their modul ar composition introduces\nadditional complexity. These eﬀects are called scopedand have gained attention by their'

In [None]:
embeddings = OpenAIEmbeddings()

In [None]:
docsearch = FAISS.from_texts(texts, embeddings)

In [None]:
docsearch

<langchain.vectorstores.faiss.FAISS at 0x7f71a112e580>

In [None]:
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI

In [None]:
chain = load_qa_chain(OpenAI(), chain_type = "stuff")

In [None]:
query = "explain calculus" #"give a summary of the paper"
docs = docsearch.similarity_search(query)
chain.run(input_documents = docs, question=query)

' Calculus is a branch of mathematics that is used to study the behavior of functions through the use of limits, derivatives, integrals, and other mathematical tools. It is used in many areas of science and engineering, such as physics, engineering, economics, and finance. It is also used to solve problems in mathematics and computer science.'