In [6]:
import os
from pptx import Presentation

In [7]:
# 3. Function to convert a single pptx file to txt
def pptx_to_txt(pptx_path, txt_output_path):
    prs = Presentation(pptx_path)
    text = ""

    for slide in prs.slides:
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text += shape.text + "\n"

    with open(txt_output_path, "w", encoding="utf-8") as f:
        f.write(text)

# 4. Convert all .pptx files in a folder
def convert_all_pptx_to_txt(pptx_folder_path, output_folder_path):
    # Make sure output folder exists
    os.makedirs(output_folder_path, exist_ok=True)
    
    for filename in os.listdir(pptx_folder_path):
        if filename.endswith(".pptx"):
            pptx_path = os.path.join(pptx_folder_path, filename)
            txt_filename = filename.replace(".pptx", ".txt")
            txt_output_path = os.path.join(output_folder_path, txt_filename)
            pptx_to_txt(pptx_path, txt_output_path)
            print(f"Converted {filename} -> {txt_filename}")

In [8]:

# 5. Set your paths
pptx_folder = "./"  # If the .pptx files are in the same folder as your notebook
output_folder = "./converted_txt"  # Save .txt files in a new folder

In [9]:
# 6. Run the conversion
convert_all_pptx_to_txt(pptx_folder, output_folder)

print("✅ All PPTX files converted to TXT files successfully!")

Converted AWS User Groups Colombo - Introduction to AWS Cloud Platform.pptx -> AWS User Groups Colombo - Introduction to AWS Cloud Platform.txt
Converted CAP Theorem.pptx -> CAP Theorem.txt
Converted Cloud Computing 101.pptx -> Cloud Computing 101.txt
Converted Containers 101.pptx -> Containers 101.txt
Converted Intro to DevOps and Beyond.pptx -> Intro to DevOps and Beyond.txt
Converted Lecture 2 - Part 1.pptx -> Lecture 2 - Part 1.txt
Converted Lecture 2 - Part 2.pptx -> Lecture 2 - Part 2.txt
✅ All PPTX files converted to TXT files successfully!


In [10]:
# Combine all converted txt files into a single file
def combine_txt_files(input_folder, output_file):
    with open(output_file, "w", encoding="utf-8") as outfile:
        for filename in os.listdir(input_folder):
            if filename.endswith(".txt"):
                file_path = os.path.join(input_folder, filename)
                with open(file_path, "r", encoding="utf-8") as infile:
                    outfile.write(infile.read())
                    outfile.write("\n\n")  # Add some space between documents

combine_txt_files("./converted_txt", "ctse_notes.txt")
print("✅ All TXT files combined into ctse_notes.txt!")

✅ All TXT files combined into ctse_notes.txt!


In [60]:
# 1. Import Libraries
import os
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline



In [61]:
# Load your CTSE lecture notes
loader = TextLoader("ctse_notes.txt", encoding="utf-8")
documents = loader.load()


In [62]:
# Split the documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(documents)


In [63]:
# Create embeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [64]:
# Create FAISS vector store
vectorstore = FAISS.from_documents(docs, embedding_model)

In [65]:

# Setup the Hugging Face LLM locally
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [66]:
# Create a **text2text** generation pipeline correctly
text2text_generator = pipeline(
    "text2text-generation",   # use correct task
    model=model,
    tokenizer=tokenizer,
    device=-1  # -1 means CPU
)

Device set to use cpu


In [67]:
# Now wrap the local pipeline properly
llm = HuggingFacePipeline(pipeline=text2text_generator, model_kwargs={"temperature":0})

  llm = HuggingFacePipeline(pipeline=text2text_generator, model_kwargs={"temperature":0})


In [68]:
# Setup the RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever())

In [69]:
# Function to ask questions
def answer_query(query):
    response = qa_chain.invoke({"query": query})
    print(f"\nQuestion: {query}\nAnswer: {response['result']}")

In [71]:
# Example usage
if __name__ == "__main__":
    user_query = "What is container?"
    answer_query(user_query)


Question: What is container?
Answer: a platform to manage lifecycle of your containers, Develop your apps and supporting components using containers
