# Imports

In [1]:
from IPython.display import clear_output

In [2]:
!pip install -q chromadb google-generativeai python-pptx PyPDF2 pyTelegramBotAPI qdrant-client
clear_output()

In [3]:
import chromadb
from google.colab import userdata
import google.generativeai as genai
import csv
import json
import telebot
from telebot import types
import os
import getpass
import PyPDF2
import re
import time
from sentence_transformers import SentenceTransformer
from qdrant_client import models, QdrantClient

In [4]:
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

genai.configure(api_key=GOOGLE_API_KEY)

In [5]:
QDRANT_API = userdata.get('QDRANT-API')

qdrant_client = QdrantClient(
    url="https://2f1c4ffe-c4fe-4f27-a1ad-85acebc21ffe.us-east4-0.gcp.cloud.qdrant.io:6333",
    api_key=QDRANT_API,
)

# Presentation creation

In [6]:
# Use the embedding model from SentenceTransformers
embedder = SentenceTransformer('all-MiniLM-L6-v2')


model = genai.GenerativeModel('gemini-1.5-flash')
clear_output()

In [7]:
def generate_content(prompt: str) -> str:

  try:
    response = model.generate_content(prompt)
    return response.text if hasattr(response, 'text') else "Sorry, I couldn't generate a response."
  except Exception as e:
    return f"There was an error generating the response: {str(e)}"

  return "Sorry, I couldn't generate a response."


def generate_prompt_for_db(text):
  prompt_input = f"""
You are an assistant with artificial intelligence who corrects this text, preprocesses it so that the related words are separated by a space, turns it into a coherent text, shortens it a little and outputs it convenient for explaining the topic in the presentation.
Text:
{text}
Please provide output only in this format without anything else.
Format example:
Title:: Some title
Text:: Some text from given text
"""
  content = generate_content(prompt_input)

  return content


In [8]:
def database_replenishment(content, documents):
  if 'Title:: ' in content:
    content = content[8:].split('Title:: ')

    title = ''
    text=content

    for i in content:
      i = i.split('\nText:: ')
      if len(i)==2:
        title = i[0]
        text = i[1]

      # Add page as documents to the vector database
        documents.append({ "title": title, "text": text})
  else:
    documents.append({ "title": "Some text", "text": content})
  return documents


def extract_text_from_pdf(pdf_path, message, msg, COLLECTION_NAME):
  # removing special characters and tags from the documents
  pattern=r"[^\w]"
  length=0
  documents=[]

  with open(pdf_path, 'rb') as pdf_file:
    pdf_reader = PyPDF2.PdfReader(pdf_file)

    for i, page in enumerate(pdf_reader.pages):

      text = page.extract_text()

      if i%10==0:
        bot.edit_message_text(chat_id = message.chat.id, message_id = msg.message_id, text = f"Please wait while the file is being written to the database. It may take some time.\nDone...{round(i/len(pdf_reader.pages)*100, 2)}%")

      if text:

        text=re.sub(pattern, " ", text)
        mark=True

        try:
          for i in range(5):
            content = generate_prompt_for_db(text)

            if 'error generating the response:' not in content and "couldn't generate a response."  not in content:
              documents = database_replenishment(str(content), documents)
              mark=False
              break

          if mark:
            documents = database_replenishment(str(text), documents)
          mark=True

        except Exception as e:
          print('Error:', e)
          documents = database_replenishment(str(text), documents)

  qdrant_client.upload_points(
      collection_name=COLLECTION_NAME,
      points=[
          models.PointStruct(
              id=idx, vector=embedder.encode(doc["text"]).tolist(), payload=doc
          )
          for idx, doc in enumerate(documents)
      ],
  )
  bot.edit_message_text(chat_id = message.chat.id, message_id = msg.message_id, text = "Please wait while the file is being written to the database. It may take some time.\nDone!")
  print('Successful')
  return [i['title'] for i in documents]


In [9]:
def generate_quastion_for_PP_code(topics, text, name):
  prompt_input = f"""
You are an assistant with artificial intelligence who generates a code for creation a PowerPoint presentation using python-pptx library based on given text. Name the pptx file like this:{name}
Please provide output only in code format without anything else. Note that 'SlideShapes' object has no attribute 'subtitle'. Please note that the topics and texts must be spelled out explicitly (for example, text = 'Some text...')
List of topics:
{topics}
List of texts from book:
{text}
"""

  content = generate_content(prompt_input)
  return content[10:-4]


def generate_topics_for_section(text, titles):
  prompt_input = f"""
You are an artificial intelligence assistant who sets the topics that should be included in this section of the course. Create themes based on existing titles extracted from the database.
Please provide output only in code format without anything else.
Section:
{text}
Titles from database:
{titles}

Expected output format:
topic1
topic2
...
"""

  content = generate_content(prompt_input)[4:-4]
  content = content.split('\n')
  return content


def extract_section_topics_from_qdrant(section, COLLECTION_NAME, titles):
  print('Section:', section)
  while 1:
    topics = generate_topics_for_section(section, titles)

    if 'error generating the response:' not in topics and "couldn't generate a response."  not in topics:
      break

  print('Topics generated:', topics)

  output=[]
  for query in section:

    # Retrieve relevant documents from ChromaDB
    try:
      results = qdrant_client.query_points(
          collection_name=COLLECTION_NAME,
          query=embedder.encode(query).tolist(),
          limit=1,
      ).points
    except Exception as e:
      print(f"Error occured in data extraction from db: {e}")
      results={}


    for result in results:
    # Extract content from the results
      retrieved_docs = result.payload['text']

    output.append(retrieved_docs)



  return output, topics



def topics2code(sections, docs):
  file_pptx = []
  length = 0

  for i, section in enumerate(sections):
    print('Topics:', section)
    print('Docs:', docs[i])

    while len(file_pptx)<=length:

      code=generate_quastion_for_PP_code(section, docs[i], f"Lecture{i+1}.pptx")

      while 'error generating the response' in code:
        code=generate_quastion_for_PP_code(section, docs[i], f"Lecture{i+1}.pptx")


      try:
        exec(code)
        file_pptx.append(f"Lecture{i+1}.pptx")
      except Exception as e:
        print('Error:', e)

    length+=1


  return file_pptx



In [10]:
TELEGRAM_API = userdata.get('pptx_telegram_API')
bot = telebot.TeleBot(TELEGRAM_API)
global titles
global COLLECTION_NAME
titles=[]

for i in qdrant_client.get_collections().collections:
  COLLECTION_NAME=i.name


@bot.message_handler(commands=['start', 'help'])
def send_welcome(message):

    if len(qdrant_client.get_collections().collections)==0:
      bot.send_message(message.chat.id, "Hi, this is a pptx_creator_bot for creating presentations based on the pdf version of the course book and lecture plan.\nTo start creating presentations, send a pdf file with the course materials.")

    else:
      bot.send_message(message.chat.id, "Hi, this is a pptx_creator_bot for creating presentations based on the pdf version of the course book and lecture plan.\nYou already have a pdf file in the database.\nTo continue working with the file, send topics for creating presentations, and to replace the file with another one, send a new pdf file with course materials, but the data from the previous file will be deleted.")



@bot.message_handler(content_types=['document'])
def handle_pdf(message):
    if len(qdrant_client.get_collections().collections)!=0:
      client.delete_collection(collection_name=f"{COLLECTION_NAME}")
    try:


        # Download the PDF file
        file_info = bot.get_file(message.document.file_id)
        downloaded_file = bot.download_file(file_info.file_path)
        COLLECTION_NAME = str(message.document.file_id)

        qdrant_client.create_collection(
            collection_name=COLLECTION_NAME,
            vectors_config=models.VectorParams(
                size=embedder.get_sentence_embedding_dimension(),
                distance=models.Distance.COSINE,
            ),
        )

        pdf_path = 'book.pdf'
        # Save the PDF file temporarily
        with open(pdf_path, 'wb') as new_file:
            new_file.write(downloaded_file)

        # Extract text from the PDF
        msg = bot.send_message(message.chat.id, "Please wait while the file is being written to the database. It may take some time.")

        titles = extract_text_from_pdf(pdf_path, message, msg, COLLECTION_NAME)
        os.remove('book.pdf')


        # Wait for the user to send text
        bot.reply_to(message, f"{len(titles)} documents have been successfully saved in the database.")
        bot.reply_to(message, "Please provide me the course content by highlighting each lecture in a separate line so that I can include it in pptx.")

    except Exception as e:
        bot.reply_to(message, f"Error: {e}")


@bot.message_handler(content_types=['text'])
def handle_text(message):

  if len(qdrant_client.get_collections().collections)==0:
    bot.reply_to(message, "The database does not contain data about the book, so first send the pdf file, and then enter the topics of the lectures.")
  else:

    try:
        # Get the text from the user
        sections = message.text
        bot.reply_to(message, "Please wait until the bot sends you the pptx files.")
        sections = sections.split('\n')

        topics=[]
        docs = []

        # Create a PPTX file
        for section in sections:
          outputs, queries = extract_section_topics_from_qdrant(section, COLLECTION_NAME, titles)
          topics.append(queries)
          docs.append(outputs)

        presents = topics2code(topics, docs)

        # Send the PPTX file back to the user
        for i in presents:
          with open(i, 'rb') as pptx_file:
              bot.send_document(message.chat.id, pptx_file)

        for i in presents:
          os.remove(i)


    except Exception as e:
        bot.reply_to(message, f"Error: {e}")


# Run the bot
bot.polling(non_stop=True, interval=0)

Section: Representation of images and videos (Computer representation, Rescaling/manipulating images)
Topics generated: ['Image Representation', 'Image Data Structures', 'Pixel Manipulation', 'Image Rescaling', 'Image Filtering', 'Image Enhancement', 'Image Compression', 'Video Representation', 'Video Data Structures', 'Frame Processing', 'Video Compression']
Section: Image Classification (Loss Functions, Backpropagation)
Topics generated: ['Image Classification Fundamentals', 'Loss Functions for Image Classification', 'Backpropagation in Image Classification']
Section: Neural Networks (Training)
Topics generated: ['Backpropagation', 'Gradient Descent', 'Optimization Algorithms', 'Regularization', 'Hyperparameter Tuning', 'Early Stopping', 'Learning Rate Scheduling', 'Batch Normalization', 'Dropout', 'Data Augmentation', 'Overfitting and Underfitting', 'Model Evaluation']
Section: Convolutional Neural Networks  (Training, Architectures)
Topics generated: ['Convolutional Neural Network 



Successful
Section: Representation of images and videos (Computer representation, Rescaling/manipulating images)
Topics generated: ['Image Representation', 'Pixel-Based Representation', 'Color Models', 'Image Compression', 'Image Rescaling', 'Image Manipulation', 'Geometric Transformations', 'Filtering and Enhancement', 'Video Representation', 'Video Compression']
Section: Image Classification (Loss Functions, Backpropagation)
Topics generated: ['Image Classification Fundamentals', 'Loss Functions in Image Classification', 'Backpropagation for Image Classification']
Section: Neural Networks (Training)
Topics generated: ['Training Neural Networks', 'Backpropagation Algorithm', 'Gradient Descent and its Variants', 'Optimization Techniques', 'Learning Rate and its Impact', 'Regularization Methods', 'Overfitting and Underfitting', 'Hyperparameter Tuning', 'Early Stopping', 'Data Augmentation', 'Batch Normalization', 'Transfer Learning']
Section: Convolutional Neural Networks  (Training, Ar