In [1]:
from google.cloud import bigquery
import json
import os

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ""

In [2]:
# FUNCTION CODE
# from google.cloud import secretmanager
import base64
from transformers import pipeline
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
import torch
from PIL import Image


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import requests
from bs4 import BeautifulSoup
import re

def get_patent_pdf_url(publication_id):
    base_url = 'https://patents.google.com/patent/'

    # Function to find PDF URL
    def try_find_pdf_url(url):
        response = requests.get(url)

        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')

            pdf_link = None
            for link in soup.find_all('a'):
                href = link.get('href')
                if href and href.endswith('.pdf'):
                    pdf_link = href
                    break

            return pdf_link
        else:
            return None

    url = f"{base_url}{publication_id}"
    pdf_url = try_find_pdf_url(url)
    if not pdf_url:
        # If the find fails, modify the publication ID and try again
        match = re.match(r'(\D+)(\d{4})(\d*)(\D+)', publication_id)
        if match:
            new_publication_id = f"{match.group(1)}{match.group(2)}0{match.group(3)}"
            new_url = f"{base_url}{new_publication_id}"
            pdf_url = try_find_pdf_url(new_url)
            if not pdf_url:
                # If still fails, remove the end of the publication ID and try again
                new_publication_id = f"{match.group(1)}{match.group(2)}0{match.group(3)}"
                new_url = f"{base_url}{new_publication_id}"
                pdf_url = try_find_pdf_url(new_url)
                if not pdf_url:
                    print("Failed to find the PDF URL with the modified publication ID.")
            else:
                print("Failed to find the PDF URL with the original publication ID.")

    return pdf_url

In [4]:
from langchain.chat_models import ChatOpenAI
from langchain.experimental.plan_and_execute import PlanAndExecute, load_agent_executor, load_chat_planner
from langchain.llms import OpenAI
from langchain import SerpAPIWrapper
from langchain.agents.tools import Tool
from langchain import (
    LLMMathChain,
    OpenAI,
    SerpAPIWrapper,
    SQLDatabase,
    SQLDatabaseChain,
)
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from langchain.chat_models import ChatOpenAI
from langchain.tools import tool
import os
import re

my_idea="portable refrigerator for drinks"

os.environ['OPENAI_API_KEY'] = ""

llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")

@tool
def search_patents(sql_query: str) -> str:
    """Gets relevant patents based on a query. Your input should just be the BigQuery SQL Query. Your query should begin with 
        SELECT
            publication_number,
            title_localized.text AS title
        FROM
            `patents-public-data.patents.publications`,
            UNNEST(title_localized) AS title_localized"""
    client = bigquery.Client(project="patent-search-390320")
    
    
    # Execute the query
    query_job = client.query(sql_query)
    
    # Store the results
    results = []
    for row in query_job:
        results.append({'publication_number': row.publication_number, 'title': row.title})

    return json.dumps(results)

tools = [
    Tool(
        name="SearchPatents",
        func=search_patents.run,
        description="""Gets relevant patents based on a query. Your input should just be the BigQuery SQL Query. Your query should begin with SELECT
            publication_number,
            title_localized.text AS title
        FROM
            `patents-public-data.patents.publications`,
            UNNEST(title_localized) AS title_localized

        Only search for 10 patents at a time.
        """
    )
]

mrkl = initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True)
output = mrkl.run(
    "Search patents related to my idea of {}. Search multiple related queries if no patents found. Be specific enough in your queries, but not too specific that you won't find anything. You should find at least 5 related patents, going a bit more broad if nothing is found.".format(my_idea)
)

pattern = re.compile(r'\b[A-Z]{2}-\d{4,}-[A-Z0-9]{1,}\b')

patent_ids = pattern.findall(output)

def remove_dashes(publication_id):
    return publication_id.replace('-', '')

patent_urls = []

for patent in patent_ids:
    patent_urls.append(get_patent_pdf_url(remove_dashes(patent)))



[1m> Entering new  chain...[0m
[32;1m[1;3m
Invoking: `SearchPatents` with `SELECT publication_number, title_localized.text AS title FROM `patents-public-data.patents.publications`, UNNEST(title_localized) AS title_localized WHERE title_localized.text LIKE '%portable refrigerator%' AND title_localized.text LIKE '%drinks%' LIMIT 5`


[0m[36;1m[1;3m[][0m[32;1m[1;3m
Invoking: `SearchPatents` with `SELECT publication_number, title_localized.text AS title FROM `patents-public-data.patents.publications`, UNNEST(title_localized) AS title_localized WHERE title_localized.text LIKE '%portable refrigerator%' LIMIT 5`


[0m[36;1m[1;3m[{"publication_number": "WO-0026591-A1", "title": "Transportable refrigerator for bottles and cardboard packages"}, {"publication_number": "CN-109237853-A", "title": "A kind of detachable portable refrigerator"}, {"publication_number": "CN-107782063-A", "title": "A kind of new portable refrigerator"}, {"publication_number": "ES-238228-U", "title": "Condi

In [6]:
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.pinecone import Pinecone
import pinecone

embeddings = OpenAIEmbeddings()
pinecone.init(api_key="")

pages = []
for patent_id in patent_ids:
    pdf_url = get_patent_pdf_url(remove_dashes(patent))
    if pdf_url:
        loader = PyPDFLoader(pdf_url)
        pages += loader.load_and_split()
vectordb = Pinecone.from_documents(pages, embeddings, index_name="patents")

In [20]:
search = vectordb.similarity_search("Invensas")
print(len(search))

4


In [27]:
# first step: use gpt 3.5 turbo to generate a list of vector search queries
query_gen_llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")

from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

from langchain.schema import (
    SystemMessage,
    HumanMessage
)
from concurrent.futures import ThreadPoolExecutor, as_completed

from langchain import LLMMathChain, OpenAI, SerpAPIWrapper, SQLDatabase, SQLDatabaseChain
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from langchain.chat_models import ChatOpenAI
import json

llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")

messages = [
    SystemMessage(content=f"Based on the following idea, generate a list of vector search queries focusing on different aspects of the idea that could be used to search for similar patents, then return them as a json list. \n\n{my_idea}"),
    HumanMessage(content=my_idea)
]

queries = json.loads(llm(messages).content)

def search_vectors(queries: list[str]) -> list[str]:
    """Gets information about patents related to the search query. Your input should be a list of vector search queries focusing on different aspects of the idea."""

    patents = []
    with ThreadPoolExecutor() as executor:
        futures = {executor.submit(vectordb.similarity_search, query, 1): query for query in queries}
        for future in as_completed(futures):
            patents.extend([patent.page_content for patent in future.result()])
    return patents

patents = search_vectors(queries)
print(len(patents))
print(patents)

10
['一种保温或保鲜的冰箱杯\n技术领域\n[0001] 本实用新型属于冰箱杯领域， 具体涉及一种顶部制冷且便于携带的小型保温或保\n鲜的冰箱杯。\n背景技术\n[0002] 目前， 针对小型冰箱， 尤其是车载或户外使用的冰箱， 其制冷效果将决定其实用\n性。\n[0003] 然而， 这种小型冰箱基本上满足了使用需求了， 但是， 其制冷效果一般， 主要原因\n是位于箱体内的冷、 热气流无法对流， 造成能耗大， 制冷效率低， 尤其在户外使用时， 其可使\n用的时间往往达到使用者的要求； 再加上特殊场合中， 不可能随身携带一个小型冰箱， 因\n此， 本申请所涉及的冰箱杯， 其不仅能够快速制冷实现保温， 而且体积更小， 便于携带。\n发明内容\n[0004] 本实用新型所要解决的技术问题是克服现有技术的不足， 提供一种全新的顶部制\n冷且便于携带的小型保温或保鲜的冰箱杯， 其将制冷部件置顶， 使得冷气流在自重下自动\n下沉， 热气流自动上升， 从而在杯体内形成对流， 进而提高制冷效率， 降低能耗； 同时体积\n小， 更方便携带。\n[0005] 为解决以上技术问题， 本实用新型采用如下技术方案：\n[0006] 一种保温或保鲜的冰箱杯， 其包括杯体和杯盖， 其中杯体内部能够放置待保温或\n保鲜物品， 在杯盖上设有散热孔， 冰箱杯还包括设置在杯盖内部的制冷部件， 该制冷部件包\n括将杯体顶部开口闭合且能够形成冷气流的制冷单元、 设置在制冷单元上方且用于对制冷\n单元散热的散热单元， 其中冷气流自杯体顶部向底部自流、 并与杯体内的热气流进行对流\n换热实现快速制冷； 所产生热气流自散热孔散出。\n[0007] 优选地， 制冷单元包括半导体制冷器件； 散热单元包括设置在半导体制冷器件散\n热端面并将热气流导向散热孔的风扇。\n[0008] 根据本实用新型的一个具体实施和优选方面， 制冷部件还包括设置在半导体制冷\n器件制冷端面的内部换热片、 设置在半导体制冷器件散热端面且处于半导体制冷器件与风\n扇之间的外部散热片。 由内部换热片提高制冷效率和效果， 而外部散热片能够实现半导体\n制冷器件的快速散热， 进而提高半导体制冷器件的制冷效果。\n[0009] 进一步的， 制冷部件还包括套设在半导体制冷器件外周且用于将半导体制冷器件\n与杯盖内壁隔开设置的隔热垫。 由隔热垫的设置，

In [7]:
from langchain.agents import Tool, AgentExecutor, ZeroShotAgent
from langchain.tools import HumanInputRun
from langchain.memory import ConversationBufferMemory
from langchain import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, validator

new_llm = ChatOpenAI(model='gpt-4')

class SleuthResponse(BaseModel):
    score: str = Field(description="")
    analysis: str = Field(description="answer to resolve the joke")
    suggestions: str = Field(description="answer to resolve the joke")

tools = [
    Tool(
        name="Relevant Patent Search",
        func=vectordb.similarity_search,
        description=f"Search the 5 most relevant patents in a vector database. Input should be a search query."
    ),
]

parser = PydanticOutputParser(pydantic_object=SleuthResponse)

prefix = """You are a lawyer giving an inventor advice on how patentable their idea is. You have access to the following tools:"""
suffix = """You must return 3 pieces of information:
1. Give an at least two-paragraph-long detailed analysis on the IP landscape surrounding my idea of '{my_idea}' and relevant patents and how my idea is similar/different. Go into specifics about what the existing patents have to offer. 
2. Give a score from 0 to 100 summarizing how patentable my idea is, with 0 being that the patent would definitely not be approved, 100 that it would be approved with no revisions, and 50 that it the patent could potentially be approved but would definitely require revisions.
3. Give at least 3 suggestions on how to improve my idea to make it more patentable. Write a paragraph for each suggestion.
{agent_scratchpad}"""

prompt = ZeroShotAgent.create_prompt(
    tools, 
    prefix=prefix, 
    suffix=suffix, 
    input_variables=["my_idea", "agent_scratchpad"]
)

llm_chain = LLMChain(llm=new_llm, prompt=prompt)
agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools)
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, max_iterations=10)
detailed_analysis = agent_executor.run(my_idea)
print(detailed_analysis)



[1m> Entering new  chain...[0m
[32;1m[1;3mThought: I need to search for relevant patents to analyze the IP landscape.
Action: Relevant Patent Search
Action Input: portable refrigerator for drinks[0m
Observation: [36;1m[1;3m[Document(page_content='一种保温或保鲜的冰箱杯\n技术领域\n[0001] 本实用新型属于冰箱杯领域， 具体涉及一种顶部制冷且便于携带的小型保温或保\n鲜的冰箱杯。\n背景技术\n[0002] 目前， 针对小型冰箱， 尤其是车载或户外使用的冰箱， 其制冷效果将决定其实用\n性。\n[0003] 然而， 这种小型冰箱基本上满足了使用需求了， 但是， 其制冷效果一般， 主要原因\n是位于箱体内的冷、 热气流无法对流， 造成能耗大， 制冷效率低， 尤其在户外使用时， 其可使\n用的时间往往达到使用者的要求； 再加上特殊场合中， 不可能随身携带一个小型冰箱， 因\n此， 本申请所涉及的冰箱杯， 其不仅能够快速制冷实现保温， 而且体积更小， 便于携带。\n发明内容\n[0004] 本实用新型所要解决的技术问题是克服现有技术的不足， 提供一种全新的顶部制\n冷且便于携带的小型保温或保鲜的冰箱杯， 其将制冷部件置顶， 使得冷气流在自重下自动\n下沉， 热气流自动上升， 从而在杯体内形成对流， 进而提高制冷效率， 降低能耗； 同时体积\n小， 更方便携带。\n[0005] 为解决以上技术问题， 本实用新型采用如下技术方案：\n[0006] 一种保温或保鲜的冰箱杯， 其包括杯体和杯盖， 其中杯体内部能够放置待保温或\n保鲜物品， 在杯盖上设有散热孔， 冰箱杯还包括设置在杯盖内部的制冷部件， 该制冷部件包\n括将杯体顶部开口闭合且能够形成冷气流的制冷单元、 设置在制冷单元上方且用于对制冷\n单元散热的散热单元， 其中冷气流自杯体顶部向底部自流、 并与杯体内的热气流进行对流\n换热实现快速制冷； 所产生热气流自散热孔散出。\n[0007] 优选地， 制冷单元包括半导体制冷器件； 散热单元包括设置在半导体制冷器

OutputParserException: Could not parse LLM output: `I found several relevant patents related to portable refrigerators for drinks. The first patent (CN 208463549 U) is for a thermally insulated refrigerator cup that has a cooling component placed at the top of the cup. The cooling component includes a cooling unit that can generate a cold air flow and a heat dissipation unit. The cold air flow moves from the top to the bottom of the cup, while the hot air flow moves upward, forming convection within the cup, thus improving cooling efficiency and reducing energy consumption. The compact size of the invention makes it easy to carry.

The second patent (CN 109237853 A) is for a detachable portable refrigerator, which includes a cooling machine connected to a thermally insulated box through a heat pipe. The connection between the cooling machine and the heat pipe is a threaded connection, ensuring a reliable structure and easy disassembly. This invention addresses shortcomings in existing small refrigerators by creating a portable refrigerator that can be easily transported and maintained.

Based on these existing patents, I would give your idea of a portable refrigerator for drinks a patentability score of 60. This means that your idea has potential for being patented, but may require revisions to differentiate it from existing patents.

Here are three suggestions to improve your idea and make it more patentable:

1. Integrate a temperature control system that allows users to set a specific temperature for their drinks. This feature could provide more customization options and differentiate your portable refrigerator from existing patents.

2. Incorporate a solar-powered charging system, which would allow the portable refrigerator to charge and operate using solar energy. This environmentally friendly feature could appeal to eco-conscious consumers and set your invention apart from competitors.

3. Design a modular system that allows users to expand the refrigerator's capacity by connecting multiple portable refrigerator units. This feature could give users the flexibility to adapt the refrigerator's size based on their needs, making it more versatile and unique compared to existing patents.`

In [13]:
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"score": {"title": "Score", "type": "string"}, "analysis": {"title": "Analysis", "description": "answer to resolve the joke", "type": "string"}, "suggestions": {"title": "Suggestions", "description": "answer to resolve the joke", "type": "string"}}, "required": ["score", "analysis", "suggestions"]}
```


In [None]:
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, PageBreak, Flowable
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch

class CustomDivider(Flowable):
    def __init__(self, width=1, color=colors.lightgrey):
        Flowable.__init__(self)
        self.width = width
        self.color = color
    
    def wrap(self, availWidth, availHeight):
        return (availWidth, self.width)
    
    def draw(self):
        self.canv.saveState()
        self.canv.setStrokeColor(self.color)
        self.canv.setLineWidth(self.width)
        self.canv.line(-80, 0, letter[0] - 80, 0)  # Starts from -80 pixels
        self.canv.restoreState()

def draw_top_bar(canvas, doc, score_color):
    canvas.saveState()
    canvas.setFillColor(score_color)
    canvas.rect(0, letter[1] - 0.5*inch, letter[0], 0.5*inch, fill=1, stroke=0)
    canvas.restoreState()

def on_first_page(canvas, doc):
    draw_top_bar(canvas, doc, score_color)

    box_size = 40
    padding = 5
    radius = 8
    border_thickness = 2
    box_top = letter[1] - 0.5*inch - 10
    canvas.saveState()
    canvas.setStrokeColor(score_color)
    canvas.setLineWidth(border_thickness)
    canvas.roundRect(20, box_top - box_size, box_size, box_size, radius)
    canvas.drawImage("emoji.png", 20 + padding, box_top - box_size + padding + border_thickness/2, width=box_size - 2*padding - border_thickness, height=box_size - 2*padding - border_thickness, mask='auto')
    canvas.restoreState()

    score_box_width = 60
    score_box_height = 40
    radius = 8
    score_box_left = letter[0] - 80  # position the score box on the right
    canvas.saveState()
    canvas.setStrokeColor(score_color)
    canvas.setFillColor(score_color)
    canvas.roundRect(score_box_left, box_top - score_box_height, score_box_width, score_box_height, radius, fill=1, stroke=1)
    
    score_text = str(idea_sleuth_score)
    text_width = canvas.stringWidth(score_text, "Helvetica-Bold", 18)
    text_x = score_box_left + (score_box_width - text_width) / 2
    text_y = box_top - score_box_height + (score_box_height - 18) / 2
    canvas.setFillColor(colors.white)
    canvas.setFont("Helvetica-Bold", 18)
    canvas.drawString(text_x, text_y, score_text)
    canvas.restoreState()

def on_later_pages(canvas, doc):
    draw_top_bar(canvas, doc, score_color)

output = 'IdeaReport.pdf'
doc = SimpleDocTemplate(output, pagesize=letter)

idea_sleuth_score = 75

if idea_sleuth_score >= 80:
    score_color = colors.green
elif idea_sleuth_score >= 50:
    score_color = colors.orange
else:
    score_color = colors.red

styles = getSampleStyleSheet()
header_style = ParagraphStyle('HeaderStyle', fontName='Helvetica-Bold', fontSize=20, fontWeight=900, alignment=0, spaceAfter=6, leftIndent=-60)  # alignment 0 is for left alignment
subtitle_style = ParagraphStyle('SubtitleStyle', fontSize=12, alignment=0, spaceAfter=12, leftIndent=-60)  # alignment 0 is for left alignment
idea_style = ParagraphStyle('IdeaStyle', fontName='Helvetica-Bold', fontSize=12, alignment=0, spaceAfter=6, leftIndent=-60)  # alignment 0 is for left alignment
text_style = ParagraphStyle('TextStyle', fontSize=10, alignment=0, spaceAfter=6, leftIndent=-60)

related_patents = ""

for i, patent_id in enumerate(patent_ids):
    related_patents += "- " + patent_id + ": " + "https://patent.google.com/123" + "<br/>"

print(related_patents)
    
content = [
    Spacer(1, 0.5*inch),
    Paragraph('Your Idea Report', header_style),
    Spacer(1, 0.1*inch),
    Paragraph('By IdeaSleuth', subtitle_style),
    Spacer(1, 0.3*inch),
    Paragraph('YOUR IDEA', idea_style),
    Paragraph(my_idea, text_style),
    Spacer(1, 0.2*inch),
    CustomDivider(),
    Spacer(1, 0.3*inch),
    Paragraph('RELATED INTELLECTUAL PROPERTY', idea_style),
    Paragraph(related_patents, text_style),
    Spacer(1, 0.2*inch),
    CustomDivider(),
    Spacer(1, 0.3*inch),
    Paragraph('DETAILED ANALYSIS', idea_style),
    Paragraph(detailed_analysis.replace("\n", "<br/>"), text_style),
    Spacer(1, 0.2*inch),
    CustomDivider(),
    Spacer(1, 0.3*inch),
    Paragraph('SUGGESTIONS TO IMPROVE SCORE', idea_style),
    Paragraph('This is where your idea content will be displayed...', text_style),
    Spacer(1, 0.2*inch),
    CustomDivider()
]

doc.build(content, onFirstPage=on_first_page, onLaterPages=on_later_pages)
