In [12]:
import os
from langchain.schema import Document
import fitz  
from pptx import Presentation

In [13]:
def load_pdf(file_path):
    doc = fitz.open(file_path)
    text = ""
    for page_num in range(len(doc)):
        page = doc[page_num]
        text += page.get_text()
    return text

In [14]:
def load_ppt(file_path):
    prs = Presentation(file_path)
    text = ""
    for slide in prs.slides:
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text += shape.text + "\n"
    return text

In [15]:
def load_documents_from_directory(directory_path):
    docs = "" 
    for filename in os.listdir(directory_path):
        file_path = os.path.join(directory_path, filename)
        if filename.endswith('.pdf'):
            text = load_pdf(file_path)
        elif filename.endswith('.pptx'):
            text = load_ppt(file_path)
        else:
            print(f"Skipping unsupported file format: {filename}")
            continue
        
        docs += text + "\n"  
    return docs



In [11]:
from langchain.text_splitter import RecursiveCharacterTextSplitter


In [16]:
directory_path = "./multi"

docs = load_documents_from_directory(directory_path)

print(docs[:500])

APRIL 2023
100 Practical Applications and 
Use Cases of Generative AI
1	
Breakthrough Technology 
and Promising Prospects 
3	
What Is Artificial 
Intelligence (AI)? What is 
Generative AI? 
4	
The Importance of 
Protecting Data Privacy
5	
Insufficient Arabic 
Language Applications 
for Natural Language 
Processing
6	
Platforms at Risk of Being 
Sold or Dying Out
7	
Difficulties Associated with 
the Utilization of Generative 
AI Technologies
8	
Are Inputs and Outputs 
Always Reliable Despite 
Qua


In [17]:
def split_text_into_chunks(text, chunk_size=1000, chunk_overlap=200):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    chunks = splitter.split_text(text)
    return chunks

In [18]:
chunks = split_text_into_chunks(docs)

In [19]:
for i, chunk in enumerate(chunks[:2], 1):
    print(f"Chunk {i}:\n{chunk}\n{'-'*40}")

Chunk 1:
APRIL 2023
100 Practical Applications and 
Use Cases of Generative AI
1	
Breakthrough Technology 
and Promising Prospects 
3	
What Is Artificial 
Intelligence (AI)? What is 
Generative AI? 
4	
The Importance of 
Protecting Data Privacy
5	
Insufficient Arabic 
Language Applications 
for Natural Language 
Processing
6	
Platforms at Risk of Being 
Sold or Dying Out
7	
Difficulties Associated with 
the Utilization of Generative 
AI Technologies
8	
Are Inputs and Outputs 
Always Reliable Despite 
Quality-Related Concerns?
9	
How Can You Engage with 
and Obtain Information 
from Generative AI? 
11	
Reinforcement Learning 
from Human Feedback 
(RLHF)
Preface
Applications and Use Cases
1	
ChatGPT 
3	
Useful Guidelines 
and Techniques for 
ChatGPT
12	
Use Cases for New 
Businesses 
27	 Use Cases for 
Students
42	 Use Cases for Fresh 
Graduates, Job 
Seekers and New 
Employees 
57	 Use Cases for 
Employees 
72	 Midjourney
76	 Other Platforms
76	 Jasper
77	 Syhthesia
78	 DALL·E 2
79	 Tom

In [20]:
from langchain.embeddings import HuggingFaceEmbeddings


In [21]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


  warn_deprecated(
Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md



In [23]:
chunk_embeddings = [embeddings.embed_query(chunk) for chunk in chunks]

print(f"First chunk embedding:\n{chunk_embeddings[0]}")


First chunk embedding:
[-0.0742826834321022, -0.029410449787974358, 0.06696932017803192, -0.02286062203347683, -0.02098800614476204, 0.052969690412282944, 0.02592637948691845, 0.032274942845106125, -0.028028879314661026, 0.016584163531661034, -0.03299710527062416, -0.003927039913833141, 0.006547821220010519, -0.06945374608039856, 0.04272918403148651, 0.034846287220716476, 0.055202048271894455, -0.038294725120067596, -0.04937104135751724, -0.10260594636201859, 0.01504532527178526, 0.0471486821770668, -0.005048910155892372, -0.0006805926677770913, -0.012040045112371445, 0.036506570875644684, 0.018806664273142815, -0.10860075801610947, 0.048296790570020676, -0.0352928452193737, -0.007574012503027916, 0.11731439083814621, -0.002956823445856571, 0.0224103145301342, -0.06010154262185097, 0.0809231773018837, -0.08818709850311279, -0.020694583654403687, 0.0716385617852211, -0.008787315338850021, -0.07235170900821686, -0.1435173898935318, -0.011138818226754665, -0.04209935665130615, 0.100695535

In [24]:
from langchain_astradb import AstraDBVectorStore
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import DirectoryLoader

ASTRA_DB_API_ENDPOINT = 'https://7b4f4f6f-5c2c-4614-839b-f064568f5bea-us-east-2.apps.astra.datastax.com'
ASTRA_DB_APPLICATION_TOKEN = 'AstraCS:hkuBWtKhPLFmRuGcyrlclTdN:0c276a7f4e440ec2463eb67554565e620a0e95a2c8d0c8e603c47093a41697f5'
ASTRA_DB_NAMESPACE = 'your_namespace'  

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vector_store = AstraDBVectorStore(
    collection_name="db",
    embedding=embeddings,
    api_endpoint=ASTRA_DB_API_ENDPOINT,
    token=ASTRA_DB_APPLICATION_TOKEN
)


In [30]:
documents_to_insert = [
    Document(
        page_content=chunk,
        metadata={"embedding": embedding}  
    )
    for chunk, embedding in zip(chunks, chunk_embeddings)
]


In [31]:
vector_store.add_documents(documents_to_insert)

['43ef3e8d17c3400f8a30ef672b0f72d6',
 '4f6a27a2a8554cf0a63c5af0ff2a6cdd',
 '18780902694b4cd9b07834eec11979e4',
 'b226eec180b946ca9a9dc1049e5755b0',
 'ed8ae32b583f49868d9d764b4b928fe0',
 'f7551a03cedc4a5da97bd4b65c241722',
 '74f6389dd4e540988417d5718c1459c1',
 '6bbc23fc235b4d2b9073ba1b9fdeb7cc',
 '5bbf81510f404223afc5b4f4cf0e7529',
 '45bf5c0c916544a595fd51f4809c1e15',
 'df9dfae136764bf29ff24a27c61a3133',
 'b1b69b2f55c8468989309bd5d5079981',
 '0e843cffc4a94f35ad19cd9384d4ce4a',
 '5e2c5077fc26406bbd1cf350deceb681',
 '57022fe8328b4f3f8cc5524bbbdc171e',
 'a4b097e38a7f471abfc4cbaf73e565dc',
 'a0e6bdf4faaa4d498032a6ce6216373c',
 '95ae4850e4ff40a8b52df53eedbe91cf',
 '18a31e0f83b5431aa168984d07700e7b',
 '4fc2174b9e3d4cdd96654a70752261ca',
 '9d521f2ea6a94b94af40ebaf006fd2bf',
 'b126e5a258cd4397a77c4538ecb14e3c',
 'e5b94b3046c947df833a1ede978bdaac',
 '2754abb3976540dc8a4cf2009705c242',
 '23321099e69546e082f8c71f01bd65ea',
 'bfcfc4932f9747f086ca3de9ca8f53b5',
 '2395bcba46ba4abfb25958fa1fec62df',
 

In [32]:
from langchain import PromptTemplate, LLMChain


In [44]:
prompt_template = """
You are a helpful assistant with access to a large database of documents. Please provide a comprehensive and detailed answer to the user's query based only on the relevant information provided below. 


User Query: {user_query}

Relevant Information:
{relevant_documents}

Answer:
"""


In [34]:
from langchain.llms import Ollama

llama = Ollama(model="llama3")
llm_chain = LLMChain(
    llm=llama,
    prompt=PromptTemplate.from_template(prompt_template)
)

  warn_deprecated(


In [35]:
def search_and_generate_answer(user_query):
    results = vector_store.similarity_search(query=user_query, k=3)
    
    relevant_documents = "\n".join([doc.page_content for doc in results])
    
    response = llm_chain.run({
        "user_query": user_query,
        "relevant_documents": relevant_documents
    })
    
    return response

In [45]:
user_query = "Give some applications of generative ai?"
answer = search_and_generate_answer(user_query)
print(answer)

Based on the provided information, here are some applications of Generative AI:

1. **Art and Design**: Generative AI can be used to create new and original artistic content, such as images, music, or even videos.
2. **Content Creation**: This technology can generate new content, including text, images, or audio files, which can be used in various industries, such as entertainment, marketing, or education.
3. **Chatbots and Virtual Assistants**: Generative AI can be used to develop more advanced chatbots and virtual assistants that can understand and respond to user queries in a more natural and human-like way.
4. **Language Models**: Large-scale language models like OpenAI's GPT-3 can be trained on vast amounts of data to generate new text content, summarize long documents, or even assist with writing tasks.
5. **Image Generation**: Generative AI models can create realistic images based on a given prompt or input, as demonstrated by the example provided from Midjourney.

These applica

In [46]:
user_query = "Is it possible to make snake game using reinforcement learning?"
answer = search_and_generate_answer(user_query)
print(answer)