In [4]:
#pip install OpenAI

In [5]:
import requests
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import List, Optional
from pprint import pprint

# Define structured models
class Ingredient(BaseModel):
    amount: Optional[float] = Field(description="Quantity of the ingredient")
    unit: Optional[str] = Field(description="Unit of measurement (e.g., cup, tbsp, oz)")
    name: str = Field(description="Name of the ingredient")

class Recipe(BaseModel):
    title: str = Field(description="Name of the recipe")
    ingredients: List[Ingredient] = Field(description="List of ingredients needed for the recipe")
    instructions: List[str] = Field(description="Step-by-step instructions to prepare the recipe")

def get_recipe_from_text(recipe_text: str) -> Recipe:
    """
    Convert recipe text into a structured Recipe object using OpenAI structured output.
    """
    client = OpenAI()

    response = client.responses.parse(
        model="gpt-4o-mini-2024-07-18",
        input=[{"role": "user", "content": f"Convert this recipe into the specified format:\n\n{recipe_text}"}],
        text_format=Recipe
    )

    return response.output_parsed

# ---- Load recipe text from GitHub ----
url = "https://raw.githubusercontent.com/section4-ed/mmba-ai-programmers/section/week_2/recipe_ingredients/solution/mac_and_cheese_recipe.txt"

response = requests.get(url)
response.raise_for_status()
recipe_text = response.text

# ---- Parse with LLM ----
recipe = get_recipe_from_text(recipe_text)

# ---- Display Results ----
pprint(recipe)


Recipe(title='Cheesy Macaroni and Cheese', ingredients=[Ingredient(amount=16.0, unit='oz', name='macaroni pasta'), Ingredient(amount=0.5, unit='C', name='unsalted butter'), Ingredient(amount=1.0, unit='tsp', name='salt'), Ingredient(amount=0.5, unit='tsp', name='pepper'), Ingredient(amount=0.5, unit='C', name='flour'), Ingredient(amount=3.5, unit='C', name='milk, warmed'), Ingredient(amount=0.25, unit='C', name='chicken broth'), Ingredient(amount=0.5, unit='lb', name='Velveeta cheese, cubed'), Ingredient(amount=0.5, unit='lb', name='cheddar cheese, cubed'), Ingredient(amount=0.5, unit='C', name='panko bread crumbs or Ritz crackers'), Ingredient(amount=1.0, unit='Tbl', name='parsley'), Ingredient(amount=2.0, unit='Tbl', name='butter, melted')], instructions=['Preheat oven to 350¬∞F.', 'Grease a 9x13 glass dish (or an 8x8 or round casserole dish if making half the recipe).', 'Cook the macaroni pasta for about 2-3 minutes less than directed on the package, drain, and set aside.', 'In a sa

In [7]:
#pip install numpy

In [9]:
import requests, zipfile, io, os, geopandas as gpd

# Download Census ZCTA shapefile (2023)
url = "https://www2.census.gov/geo/tiger/TIGER2023/ZCTA520/tl_2023_us_zcta520.zip"
target_dir = "zcta_shapefile"
os.makedirs(target_dir, exist_ok=True)

print("üì¶ Downloading ZIP...")
resp = requests.get(url)
with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
    zf.extractall(target_dir)

# Find the .shp file
for f in os.listdir(target_dir):
    if f.endswith(".shp"):
        shp_path = os.path.join(target_dir, f)
        print("‚úÖ Shapefile found:", shp_path)

# Load into GeoPandas
zip_gdf = gpd.read_file(shp_path).to_crs(epsg=4326)
zip_gdf["ZIP3"] = zip_gdf["ZCTA5CE20"].str[:3]

print("Total ZIPs loaded:", len(zip_gdf))
zip_gdf.head()

üì¶ Downloading ZIP...
‚úÖ Shapefile found: zcta_shapefile\tl_2023_us_zcta520.shp
Total ZIPs loaded: 33791


Unnamed: 0,ZCTA5CE20,GEOID20,GEOIDFQ20,CLASSFP20,MTFCC20,FUNCSTAT20,ALAND20,AWATER20,INTPTLAT20,INTPTLON20,geometry,ZIP3
0,47236,47236,860Z200US47236,B5,G6350,S,1029063,0,39.1517426,-85.7252769,"POLYGON ((-85.7341 39.15597, -85.72794 39.1561...",472
1,47870,47870,860Z200US47870,B5,G6350,S,8830,0,39.3701518,-87.4735141,"POLYGON ((-87.47414 39.37016, -87.47409 39.370...",478
2,47851,47851,860Z200US47851,B5,G6350,S,53326,0,39.5735839,-87.2459559,"POLYGON ((-87.24769 39.5745, -87.24711 39.5744...",478
3,47337,47337,860Z200US47337,B5,G6350,S,303089,0,39.8027537,-85.437285,"POLYGON ((-85.44357 39.80328, -85.44346 39.803...",473
4,47435,47435,860Z200US47435,B5,G6350,S,13302,0,39.2657557,-86.2951577,"POLYGON ((-86.29592 39.26547, -86.29592 39.266...",474


In [4]:
pip install pytesseract
pip install pdf2image
pip install pillow

SyntaxError: invalid syntax (3309320229.py, line 1)

In [7]:
import os
import pytesseract
from pdf2image import convert_from_path
from PIL import Image

# --- CONFIG ---
PDF_PATH = r"C:\Users\smapo\OneDrive\Desktop\AI API Class\RVIA\RV Report Aug-25_Sum.pdf"
OUTPUT_TXT = r"C:\Users\smapo\OneDrive\Desktop\AI API Class\RVIA\RV_Report_Aug25_Page3.txt"
TESSERACT_PATH = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

# --- Set up path to Tesseract manually ---
pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH

# --- Step 1: Convert page 3 of PDF to image ---
print("üìÑ Converting page 3 to image...")
images = convert_from_path(PDF_PATH, dpi=300, first_page=3, last_page=3)

if not images:
    raise ValueError("‚ùå Page 3 not found in PDF!")

page_image = images[0]

# --- Step 2: OCR the image ---
print("üîç Running OCR on page 3...")
text = pytesseract.image_to_string(page_image)

# --- Step 3: Output results ---
print("‚úÖ OCR complete! Preview:\n")
print(text[:800])  # show first 800 chars

# Save to file
with open(OUTPUT_TXT, "w", encoding="utf-8") as f:
    f.write(text)

print(f"\n‚úÖ Text saved to: {OUTPUT_TXT}")

üìÑ Converting page 3 to image...
üîç Running OCR on page 3...
‚úÖ OCR complete! Preview:

August 2025 PMRV Destination Summary

Aska | 0] 0.00%
Hawai |__| 0.00%

Nevada | ~‚Äî‚Äî~‚Äî-O| 0.00%
NewMexico | | 0.00%

lowa | Of 0.00%
Missour_-| | 0.00%
Nebraska | 00.00%
No.Dakota | | __0.00%

Total Units = 400

Arkansas |] _(0.00%
Louisiana ‚Äî‚Äî‚Äî~¬´‚Äî~S~¬´ =i
Oklahoma_‚Äî_‚Äî+| | 0.00%

a
Michigan ‚Äî‚Äî=‚Äî¬´dYSSCSC¬´*YC¬´*
Ohio ‚ÄîS=~dSC*~C~*~‚Äò YSC*t TG

Mississippi ‚Äî‚Äî=¬´t~SC¬´tY~S=¬´

Asia |] (0.00%
Europe ‚Äî‚ÄîS*dYSC*C~*~*~¬´iYSC*¬´tl

* Destination totals might not match shipment totals
‚Ñ¢ Canadian shipment totals removed from shipment total

Delaware |] 2.18%
Florida | ‚Äî-9i|__2.18%
Georgia ‚Äî~| SC C=it 7G
WestVirginia | | 0.00%

NewJersey | 4] 0.97%
NewYork | __-9|__‚Äî-2.18%

Connecticut | 8] 1.94%
Maine | _‚Äî‚Äî-8| 1.94%

INDUSTRY
ASSOCIATION

WN


‚úÖ Text saved to: C:\Users\smapo\OneDrive\Desktop\AI API Class\RVIA\RV_Report_Aug25_Page3.txt
