From 2d01743a3b200210182f5f359757725a056a43c8 Mon Sep 17 00:00:00 2001 From: Francisco Ingham <24279597+fpingham@users.noreply.github.com> Date: Tue, 16 Jan 2024 15:06:10 -0300 Subject: [PATCH 1/5] competitors matrix example for image extraction --- examples/vision/competitors.py | 257 +++++++++++++++++++++++++++++++++ 1 file changed, 257 insertions(+) create mode 100644 examples/vision/competitors.py diff --git a/examples/vision/competitors.py b/examples/vision/competitors.py new file mode 100644 index 000000000..64b4ca3a9 --- /dev/null +++ b/examples/vision/competitors.py @@ -0,0 +1,257 @@ +import json +import logging +import os +import sys +from typing import Dict, List, Optional + +import instructor +from dotenv import find_dotenv, load_dotenv +from openai import OpenAI +from pydantic import BaseModel, Field +from rich import print as rprint + +load_dotenv(find_dotenv()) + +# Add logger +logging.basicConfig() +logger = logging.getLogger("app") +logger.setLevel("INFO") + +class Competitor(BaseModel): + name: str + features: Optional[List[str]] + + +# Define models +class Industry(BaseModel): + """ + Represents competitors from a specific industry extracted from an image using AI. + """ + + name: str = Field( + description="the name of the industry for these competitors" + ) + competitor_list: List[Competitor] = Field( + description="A dict of competitors where each key is an industry" + ) + +class Competition(BaseModel): + """ + Represents competitors extracted from an image using AI. + + This class serves as a structured representation of + competitors and their qualities. + """ + + industry_list: List[Industry] = Field( + description="A list of industries and their competitors" + ) + +# Define clients +client_image = instructor.patch( + OpenAI(api_key=os.getenv("OPENAI_API_KEY")), mode=instructor.Mode.MD_JSON +) + +# Define functions +def read_images(image_urls: List[str]) -> Competition: + """ + Given a list of image URLs, identify the competitors in the images. + """ + + logger.info(f"Identifying competitors in images... {len(image_urls)} images") + + return client_image.chat.completions.create( + model="gpt-4-vision-preview", + response_model=Competition, + max_tokens=2048, + temperature=0, + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Identify competitors and generate key features for each competitor.", + }, + *[ + {"type": "image_url", "image_url": {"url": url}} + for url in image_urls + ], + ], + } + ], + ) + + + +def run(images: List[str]) -> Competition: + """ + Given a list of images, identify the industries and the competitors in the images. + """ + + competitors: Competition = read_images(images) + + return competitors + + +if __name__ == "__main__": + # Run logger + logger.info("Starting app...") + + if len(sys.argv) != 2: + print("Usage: python app.py ") + sys.exit(1) + + image_file = sys.argv[1] + with open(image_file, "r") as file: + logger.info(f"Reading images from file: {image_file}") + try: + image_list = file.read().splitlines() + logger.info(f"{len(image_list)} images read from file: {image_file}") + except Exception as e: + logger.error(f"Error reading images from file: {image_file}") + logger.error(e) + sys.exit(1) + + competitors = run(image_list) + + rprint(f"[green]{len(competitors.industry_list)} industries identified:[/green]") + for industry in competitors.industry_list: + rprint(f"[green]{industry.name}[/green]") + rprint(f"[blue]Features: {industry.competitor_list}[/blue]") + + logger.info("Writing results to file...") + + with open("results.json", "w") as f: + json.dump( + { + "competitors": competitors.model_dump(), + }, + f, + indent=4, + ) + +""" +Example output: +{ + "competitors": { + "industry_list": [ + { + "name": "Accommodation and Hospitality", + "competitor_list": [ + { + "name": "craigslist", + "features": [ + "Transactions Offline", + "Inexpensive" + ] + }, + { + "name": "couchsurfing", + "features": [ + "Transactions Offline", + "Inexpensive" + ] + }, + { + "name": "BedandBreakfast.com", + "features": [ + "Transactions Offline", + "Inexpensive" + ] + }, + { + "name": "airbnb", + "features": [ + "Transactions Online", + "Inexpensive" + ] + }, + { + "name": "HOSTELS.com", + "features": [ + "Transactions Online", + "Inexpensive" + ] + }, + { + "name": "VRBO", + "features": [ + "Transactions Offline", + "Costly" + ] + }, + { + "name": "Rentahome", + "features": [ + "Transactions Online", + "Costly" + ] + }, + { + "name": "Orbitz", + "features": [ + "Transactions Online", + "Costly" + ] + }, + { + "name": "Hotels.com", + "features": [ + "Transactions Online", + "Costly" + ] + } + ] + }, + { + "name": "E-commerce Wine Retailers", + "competitor_list": [ + { + "name": "winesimple", + "features": [ + "Ecommerce Retailers", + "True Personalized Selections", + "Brand Name Wine", + "No Inventory Cost", + "Target Mass Market" + ] + }, + { + "name": "nakedwines.com", + "features": [ + "Ecommerce Retailers", + "Target Mass Market" + ] + }, + { + "name": "Club W", + "features": [ + "Ecommerce Retailers", + "Brand Name Wine", + "Target Mass Market" + ] + }, + { + "name": "Tasting Room", + "features": [ + "Ecommerce Retailers", + "True Personalized Selections", + "Brand Name Wine" + ] + }, + { + "name": "hellovino", + "features": [ + "Ecommerce Retailers", + "True Personalized Selections", + "No Inventory Cost", + "Target Mass Market" + ] + } + ] + } + ] + } +} +""" \ No newline at end of file From 3ecdddfe2712a27039aaa84d9aaa130265c6207a Mon Sep 17 00:00:00 2001 From: Francisco Ingham <24279597+fpingham@users.noreply.github.com> Date: Tue, 16 Jan 2024 15:56:34 -0300 Subject: [PATCH 2/5] fixed bug in field description --- examples/vision/competitors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/vision/competitors.py b/examples/vision/competitors.py index 64b4ca3a9..96142fa87 100644 --- a/examples/vision/competitors.py +++ b/examples/vision/competitors.py @@ -29,10 +29,10 @@ class Industry(BaseModel): """ name: str = Field( - description="the name of the industry for these competitors" + description="The name of the industry" ) competitor_list: List[Competitor] = Field( - description="A dict of competitors where each key is an industry" + description="A list of competitors for this industry" ) class Competition(BaseModel): From ba521968668e9bed651c72983f7fd4fc2911afd2 Mon Sep 17 00:00:00 2001 From: Francisco Ingham <24279597+fpingham@users.noreply.github.com> Date: Mon, 29 Jan 2024 09:03:31 -0300 Subject: [PATCH 3/5] added cookbook for extracting slides --- docs/examples/extract_slides.md | 113 ++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 docs/examples/extract_slides.md diff --git a/docs/examples/extract_slides.md b/docs/examples/extract_slides.md new file mode 100644 index 000000000..d086fe871 --- /dev/null +++ b/docs/examples/extract_slides.md @@ -0,0 +1,113 @@ +# Data extraction from slides + +In this guide, we demonstrate how to extract data from slides. + +!!! tips "Motivation" + + When we want to translate key information from slides into structured data, simply isolating the text and running extraction might not be enough. Sometimes the important data is in the images on the slides, so we should consider including them in our extraction pipeline. + +## Defining the necessary Data Structures + +Let's say we want to extract the competitors from various presentations and categorize them according to their respective industries. + +Our data model will have `Industry` which will be a list of `Competitor`'s for a specific industry, and `Competition` which will aggregate the competitors for all the industries. + +```python +from openai import OpenAI +from pydantic import BaseModel, Field +from typing import Optional, List + +class Competitor(BaseModel): + name: str + features: Optional[List[str]] + + +# Define models +class Industry(BaseModel): + """ + Represents competitors from a specific industry extracted from an image using AI. + """ + + name: str = Field( + description="The name of the industry" + ) + competitor_list: List[Competitor] = Field( + description="A list of competitors for this industry" + ) + +class Competition(BaseModel): + """ + This class serves as a structured representation of + competitors and their qualities. + """ + + industry_list: List[IndustryCompetition] = Field( + description="A list of industries and their competitors" + ) +``` + +## Competitors extraction + +To extract competitors from slides we will define a function which will read images from urls and extract the relevant information from them. + +```python +import instructor +from openai import OpenAI + +# Apply the patch to the OpenAI client +# enables response_model keyword +client = instructor.patch( + OpenAI(mode=instructor.Mode.MD_JSON) +) + +# Define functions +def read_images(image_urls: List[str]) -> Competition: + """ + Given a list of image URLs, identify the competitors in the images. + """ + return client.chat.completions.create( + model="gpt-4-vision-preview", + response_model=Competition, + max_tokens=2048, + temperature=0, + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Identify competitors and generate key features for each competitor.", + }, + *[ + {"type": "image_url", "image_url": {"url": url}} + for url in image_urls + ], + ], + } + ], + ) +``` + +## Execution + +Finally, we will run the previous function with a few sample slides to see the data extractor in action. + +As we can see, our model extracted the relevant information for each competitor regardless of how this information was formatted in the original presentations. + +```python +url = [ + 'https://miro.medium.com/v2/resize:fit:1276/0*h1Rsv-fZWzQUyOkt', + 'https://earlygame.vc/wp-content/uploads/2020/06/startup-pitch-deck-5.jpg' + ] +model = read_images(url) +print(model) +``` + industry_list=[ + + Industry(name='Accommodation and Hospitality', competitor_list=[Competitor(name='CouchSurfing', features=['Affordable', 'Online Transaction']), Competitor(name='Craigslist', features=['Affordable', 'Offline Transaction']), Competitor(name='BedandBreakfast.com', features=['Affordable', 'Offline Transaction']), Competitor(name='AirBed&Breakfast', features=['Affordable', 'Online Transaction']), Competitor(name='Hostels.com', features=['Affordable', 'Online Transaction']), Competitor(name='VRBO', features=['Expensive', 'Offline Transaction']), Competitor(name='Rentahome', features=['Expensive', 'Online Transaction']), Competitor(name='Orbitz', features=['Expensive', 'Online Transaction']), Competitor(name='Hotels.com', features=['Expensive', 'Online Transaction'])]), + + Industry(name='Wine E-commerce', competitor_list=[Competitor(name='WineSimple', features=['Ecommerce Retailers', 'True Personalized Selections', 'Brand Name Wine', 'No Inventory Cost', 'Target Mass Market']), Competitor(name='NakedWines', features=['Ecommerce Retailers', 'Target Mass Market']), Competitor(name='Club W', features=['Ecommerce Retailers', 'Brand Name Wine', 'Target Mass Market']), Competitor(name='Tasting Room', features=['Ecommerce Retailers', 'True Personalized Selections', 'Brand Name Wine']), Competitor(name='Drync', features=['Ecommerce Retailers', 'True Personalized Selections', 'No Inventory Cost']), Competitor(name='Hello Vino', features=['Ecommerce Retailers', 'Brand Name Wine', 'Target Mass Market'])]) + + ] +``` +``` \ No newline at end of file From 3295467eea7b9af741803badc7b71217baa66821 Mon Sep 17 00:00:00 2001 From: Francisco Ingham <24279597+fpingham@users.noreply.github.com> Date: Mon, 29 Jan 2024 09:07:02 -0300 Subject: [PATCH 4/5] added extract slides cookbook to mkdocs --- mkdocs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/mkdocs.yml b/mkdocs.yml index bf2b9987d..f7eba4da4 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -151,6 +151,7 @@ nav: - Text Classification: 'examples/classification.md' - LLM Self Critique: 'examples/self_critique.md' - Extracting Tables with GPT-V: 'examples/extracting_tables.md' + - Extracting From Slides with GPT-V: 'examples/extract_slides.md' - Content Moderation: 'examples/moderation.md' - Citing Sources (RAG): 'examples/exact_citations.md' - Extracting Knowledge Graphs: 'examples/knowledge_graph.md' From 83eebf219da8890fa79da9f755315a4fe7d342ae Mon Sep 17 00:00:00 2001 From: Francisco Ingham <24279597+fpingham@users.noreply.github.com> Date: Fri, 9 Feb 2024 19:47:25 -0300 Subject: [PATCH 5/5] slides examples uses typer cli --- examples/vision/{competitors.py => slides.py} | 34 +++++++++++-------- 1 file changed, 19 insertions(+), 15 deletions(-) rename examples/vision/{competitors.py => slides.py} (93%) diff --git a/examples/vision/competitors.py b/examples/vision/slides.py similarity index 93% rename from examples/vision/competitors.py rename to examples/vision/slides.py index 96142fa87..37219eafd 100644 --- a/examples/vision/competitors.py +++ b/examples/vision/slides.py @@ -4,12 +4,14 @@ import sys from typing import Dict, List, Optional -import instructor +import typer from dotenv import find_dotenv, load_dotenv from openai import OpenAI from pydantic import BaseModel, Field from rich import print as rprint +import instructor + load_dotenv(find_dotenv()) # Add logger @@ -93,25 +95,24 @@ def run(images: List[str]) -> Competition: return competitors +import typer -if __name__ == "__main__": - # Run logger - logger.info("Starting app...") - if len(sys.argv) != 2: - print("Usage: python app.py ") - sys.exit(1) +def main(image_file: str = typer.Argument(..., help="Path to the image list file")): + """ + Main function to process the image list file and identify competitors. + """ + logger.info("Starting app...") - image_file = sys.argv[1] - with open(image_file, "r") as file: - logger.info(f"Reading images from file: {image_file}") - try: + try: + with open(image_file, "r") as file: + logger.info(f"Reading images from file: {image_file}") image_list = file.read().splitlines() logger.info(f"{len(image_list)} images read from file: {image_file}") - except Exception as e: - logger.error(f"Error reading images from file: {image_file}") - logger.error(e) - sys.exit(1) + except Exception as e: + logger.error(f"Error reading images from file: {image_file}") + logger.error(e) + raise typer.Exit(code=1) competitors = run(image_list) @@ -131,6 +132,9 @@ def run(images: List[str]) -> Competition: indent=4, ) +if __name__ == "__main__": + typer.run(main) + """ Example output: {