In [2]:
import os
import google.generativeai as genai
from dotenv import load_dotenv

load_dotenv()

genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

In [2]:
from langchain.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [3]:
from PyPDF2 import PdfReader
# provide the path of  pdf file/files.
pdfreader = PdfReader('TheHundred-pageMachineLearning.pdf')
from typing_extensions import Concatenate
# read text from pdf
text = ''
for i, page in enumerate(pdfreader.pages):
    content = page.extract_text()
    if content:
        text += content

In [4]:
llm = ChatGoogleGenerativeAI(temperature=0, model='gemini-pro',max_output_tokens=4096)

In [5]:
llm.get_num_tokens(text)

70428

In [7]:
## Splittting the text
text_splitter = RecursiveCharacterTextSplitter(chunk_size=50000, chunk_overlap=500)
chunks = text_splitter.create_documents([text])

In [8]:
len(chunks)

6

In [9]:
chain = load_summarize_chain(
    llm,
    chain_type='map_reduce',
    verbose=False
)
summary = chain.run(chunks)

  warn_deprecated(


In [19]:
summary

'This book provides an introduction to machine learning, defining it as finding mathematical formulas that produce desired outputs from input data. It covers different types of learning, mathematical notation, and fundamental algorithms like linear regression, logistic regression, and support vector machines. The book emphasizes the limitations and ethical considerations of machine learning models. It also discusses advanced topics such as neural networks, sequence modeling, class imbalance handling, ensemble algorithms, regularization techniques, and transfer learning.'

In [10]:
chunks_prompt="""
Please summarize the below book:
book:`{text}'
Summary:
"""
map_prompt_template=PromptTemplate(input_variables=['text'],
                                    template=chunks_prompt)

In [11]:
final_combine_prompt='''
Provide a final summary of the entire book with these important points.
Add a Generic Motivational Title,
Start the precise summary with an introduction and provide the
summary in number points and the ouput in maximum 150 words.
book: `{text}`
'''
final_combine_prompt_template=PromptTemplate(input_variables=['text'],
                                             template=final_combine_prompt)

In [26]:
summary_chain = load_summarize_chain(
    llm=llm,
    chain_type='map_reduce',
    map_prompt=map_prompt_template,
    combine_prompt=final_combine_prompt_template,
    verbose=False
)
output = summary_chain.run(chunks)

In [13]:
output

'**Unlock the Power of Machine Learning**\n\nThis comprehensive guide to machine learning empowers you with:\n\n1. **Types of Learning:** Supervised, unsupervised, metric learning, and learning to rank.\n2. **Algorithms:** Linear regression, logistic regression, decision trees, support vector machines, neural networks, clustering, and dimensionality reduction.\n3. **Practical Applications:** Real-world examples and code snippets for implementing solutions.\n4. **Advanced Techniques:** Learning to rank, learning to recommend, self-supervised learning, and probabilistic graphical models.\n5. **Practical Value:** Emphasis on understanding principles and applying them effectively.\n6. **"Read First, Buy Later" Principle:** Evaluate content before purchasing.\n7. **Detailed Explanations and Examples:** Clear and concise explanations with supporting examples.\n8. **QR Codes for Additional Resources:** Access to online materials for further exploration.\n9. **Suitable for Beginners and Practi

In [27]:
chain = load_summarize_chain(
    llm=llm,
    chain_type='refine',
    verbose=False
)
output_summary = chain.run(chunks)

In [32]:
output_summary

'This book provides a concise introduction to machine learning, defining it as the process of finding mathematical formulas that generate desired outputs when applied to a collection of inputs (training data). These formulas can also generate correct outputs for most other inputs from the same or a similar statistical distribution as the training data.\n\nThe book emphasizes that machines do not learn in the same way as animals. If a machine is trained to play a video game by looking straight at the screen, it will not be able to play the game on a rotated screen unless it was also trained to recognize rotation.\n\nThe term "machine learning" is used to refer to the science and engineering of building machines capable of doing various useful things without being explicitly programmed to do so.\n\nThe book is divided into three parts:\n\n1. Introduction: This part provides an overview of machine learning, including its different types (supervised, semi-supervised, unsupervised, and rein

In [33]:
question_prompt_template = """
                  Please provide a summary of the following text.
                  TEXT: {text}
                  SUMMARY:
                  """

question_prompt = PromptTemplate(
    template=question_prompt_template, input_variables=["text"]
)

In [34]:
refine_prompt_template = """
              Write a concise summary of the following text delimited by triple backquotes.
              Return your response in bullet points which covers the key points of the text.
              ```{text}```
              BULLET POINT SUMMARY:
              """

refine_prompt = PromptTemplate(
    template=refine_prompt_template, input_variables=["text"]
)

In [37]:
refine_chain = load_summarize_chain(
    llm=llm,
    chain_type="refine",
    question_prompt=question_prompt,
    refine_prompt=refine_prompt,
    verbose=False
)
output = refine_chain.run(chunks)

In [38]:
output

"- **Learning to Rank:**\n    - Supervised learning problem used to optimize search results.\n    - Three approaches: pointwise, pairwise, and listwise.\n    - Listwise approach uses metrics like MAP to optimize ranking directly.\n    - LambdaMART implements a pairwise approach and uses gradient boosting to train the ranking function.\n\n- **Learning to Recommend:**\n    - Content-based filtering: Recommends items based on user's consumption history.\n    - Collaborative filtering: Recommends items based on preferences of similar users.\n    - Hybrid approach combines both methods.\n    - Factorization machines and denoising autoencoders are effective collaborative filtering algorithms.\n\n- **Self-Supervised Learning: Word Embeddings:**\n    - Word2vec's skip-gram model learns word embeddings by predicting context words from a central word.\n    - Embeddings capture semantic relationships between words.\n\n- **Other Topics Not Covered:**\n    - Topic modeling (Latent Dirichlet Allocat