In [None]:
import os

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = ""
os.environ["LANGSMITH_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGSMITH_PROJECT"] = "default"
os.environ["OPENAI_API_KEY"] = ""
from langchain.chat_models import init_chat_model
from langchain_core.messages import HumanMessage, SystemMessage

model = init_chat_model("gpt-4o-mini", model_provider="openai")


## Example Translation

In [42]:
messages = [
    SystemMessage("Translate the following from English into french"),
    HumanMessage("bring me a beer immediately"),
]

model.invoke(messages)

from langchain_core.prompts import ChatPromptTemplate

system_template = "Translate the following from English into {language}"

prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template), ("user", "{text}")]
)
prompt = prompt_template.invoke({"language": "Italian", "text": "hi!"})
response = model.invoke(prompt)
print(response.content)

Ciao!


## Create custom patent analyst

## 2 criteria simpler

In [77]:
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field
from typing import Optional

# Define the structured output schema
class PatentScore(BaseModel):
    """Structured output for patent scoring results."""
    score: float = Field(
        description="Final score between 0-1. Use 0 for FAILED patents, 0.01-1 for PASSED patents based on scoring criteria.",
        ge=0.0,
        le=1.0
    )
    passed_initial_triage: bool = Field(
        description="Whether the patent passed the initial triage (both criteria: big pain and platform potential)"
    )
    total_criteria_score: int = Field(
        description="Total score from the 10 criteria (0-20 points)",
        ge=0,
        le=20
    )
    primary_sector: str = Field(
        description="Primary technology sector identified (Biotech/Medtech, AI/ICT/Chips, Clean Energy/Storage, Advanced Materials, Quantum/Space, or Other)"
    )
    reasoning: str = Field(
        description="Detailed reasoning for the score, including which criteria were met and why"
    )
    key_strengths: list[str] = Field(
        description="List of key strengths identified in the patent"
    )
    key_weaknesses: list[str] = Field(
        description="List of key weaknesses or concerns identified in the patent"
    )

# Create the system message template with comprehensive scoring criteria
#
# system_template = """
# You are an intellectual property and technology analyst scoring university patents for venture formation.
# Decide strictly from title/abstract-level text.

# PASS GATE — all three must be true:
# 1) Big pain implied (disease, energy density, latency, yield, cost, safety, security).
# 2) Step-change language present (“orders-of-magnitude”, “real-time”, “low-cost scalable”, “room-temperature”, “in vivo”, “edge/on-device”, “field-deployable”).
# 3) Platform potential (platform/architecture/family; multiple indications or industries).

# If any of the three are missing → FAIL (score=0.0). If all present → compute 10 quick subscores (0/1/2 each):
# 1 Market gravity (regulated / mission-critical buyer)
# 2 Magnitude (≥2× improvement implied)
# 3 Moat hint (mechanism terms: ADC, LNP, AAV, photonic, ZK, solid-state, etc.)
# 4 Platform scope (broad / multi-use)
# 5 Deployability (scalable, CMOS-compatible, room-temp, drop-in)
# 6 Data hint (prototype / preclinical / field test)
# 7 Regulatory pull (Dx, safety-critical, compliance, privacy-preserving)
# 8 Cost lever (low-cost, mass-manufacturable, scarce-material-free)
# 9 Time-to-market (nearer path vs long cycle)
# 10 Buzz → substance (low hype, clear mechanism/result)

# Total = sum(subscores) in [0,20]; shortlist if Total ≥ 14.
# Final score rule:
# - If PASS gate fails: score = 0.0
# - Else: score = round(Total/20, 4)

# Sector green-word cues to reward when present (examples):
# - Biotech/Medtech: bispecific, ADC, LNP, AAV variant, non-viral delivery, CAR-T/NK, point-of-care, multiplex, LoD/sensitivity/specificity %.
# - AI/ICT/Chips: edge/on-device, MoE/sparse, ZK/FHE/SMPC, secure enclave, neuromorphic/photonic accelerator, 6G, beamforming/RIS, latency/throughput metrics, CMOS-compatible.
# - Clean Energy/Storage: Li-S, Na-ion, solid-state, anode-free; cycle life, Coulombic efficiency, dendrite suppression; DAC kJ/mol; PGM-free catalyst.
# - Advanced Materials: printable/solution-processed, wide-bandgap, κ, OTR/WVTR, fracture toughness, thermal conductivity, optical loss dB/cm.
# - Quantum/Space: fidelity %, T1/T2, error-correction/mitigation, cryo-CMOS, quantum-safe crypto; SWaP, SAR resolution, radiation-hard.

# Return ONLY the structured object; do not include prose outside fields.
# """
system_template = """You are an intellectual property and technology analyst working in the biotech and biochemistry sectors. You are assessing university-origin patent descriptions to determine their commercial potential for venture formation.

## SCORING CRITERIA

### INITIAL TRIAGE (Must pass BOTH criteria to proceed):
1. **Big pain implied** – significant unmet need in health, manufacturing, diagnostics, or bio-process efficiency (e.g., disease burden, yield, toxicity, cold-chain limitations, sensitivity/specificity).
2. **Platform potential** – compositional/methodological platform or architecture with multi-target, multi-indication, or multi-pathway applicability.

### DETAILED SCORING (0-2 points each, max 20):
1. **Market gravity** – presence of a regulated or mission-critical buyer (e.g., hospital, lab, payer, bioprocess operator).
2. **Magnitude** – ≥2× improvement implied (e.g., LoD, titer, sensitivity, yield, IC50, KD, AUROC).
3. **Moat hint** – mentions of defensible mechanism terms (e.g., ADC, bispecifics, LNP, AAV, directed evolution, engineered CAR, synthetic enzymes).
4. **Platform scope** – broad multi-use or modular application (e.g., adaptable vector or multiplex assay).
5. **Deployability** – scalable, GMP/CMC friendly, room-temperature stable, drop-in to existing workflows.
6. **Data hint** – evidence of a prototype, in vitro data, preclinical model, or functional testing beyond in silico.
7. **Regulatory pull** – Dx relevance, safety-critical claims, compliance or privacy-preserving architecture, or known regulatory pathway (e.g., IVD, CLIA, BLA).
8. **Cost lever** – low-cost inputs, simple manufacturing, cold-chain independence, fewer unit ops.
9. **Time-to-market** – enables pilot data within 6–12 months (Dx preferred), or plausible path for therapeutics.
10. **Buzz → substance** – specific mechanism and result over vague hype ("AI-designed biology" must have data).

### SECTOR-SPECIFIC ANALYSIS:

**Biotech/Medtech Green Words:** bispecific/ADC, LNP, AAV variant, non-viral delivery, CAR-T/NK, point-of-care, multiplex, IC50/KD, LoD, sensitivity/specificity %, stability @ 25°C, yield/titer, endotoxin, aggregation, on-target/off-tumor

**Biotech/Medtech Red Flags:** in silico only; biomarker with no diagnostic utility; wellness language; claims with no effect size or reproducibility; unclear ownership of components

**Bio-Manufacturing Green Words:** scalable fermentation, synthetic biology platform, directed evolution, enzyme cascade, serum-free, single-use, cold-chain elimination, simplified purification

**Bio-Manufacturing Red Flags:** exotic precursors, non-scalable chemistries, energy-intensive steps without cost/throughput data

### SCORING RULES:
- **FAIL (score = 0)**: If patent doesn't pass initial triage OR is hype-only, method-only with easy workarounds, or shows no path to decision-grade data
- **PASS (score = 0.01–1.0)**: Convert total criteria score (0–20) to 0.01–1 scale (rounded to 4 decimal places). If PASS gate fails, force score = 0.0
- **Decision Rule**: GO if platform language + mechanism term + quantitative delta, OR clear regulated pain + deployability cue

Analyze the provided patent description thoroughly and provide a structured assessment."""
#  system_template = """You are an intellectual property and technology analyst working in the banking sector looking to form a company. You are assessing patent descriptions to determine their commercial potential.

# ## SCORING CRITERIA

# ### INITIAL TRIAGE (Must pass BOTH criteria to proceed):
# 1. **Big pain implied** - disease, energy density, latency, yield, cost, safety, security
# 2. **Platform potential** - platform/architecture/family; multiple indications or industries

# ### DETAILED SCORING (0-2 points each, max 20):
# 1. **Market gravity** - regulated/mission-critical buyer
# 2. **Magnitude** - ≥2× improvement implied
# 3. **Moat hint** - mechanism terms (ADC, LNP, AAV, photonic, ZK, solid-state, etc.)
# 4. **Platform scope** - broad/multi-use
# 5. **Deployability** - scalable, CMOS-compatible, room-temp, drop-in
# 6. **Data hint** - prototype/preclinical/field test
# 7. **Regulatory pull** - Dx, safety-critical, compliance, privacy-preserving
# 8. **Cost lever** - low-cost, mass-manufacturable, scarce-material-free
# 9. **Time-to-market** - nearer path vs long cycle
# 10. **Buzz → substance** - low hype, clear mechanism/result

# ### SECTOR-SPECIFIC ANALYSIS:

# **Biotech/Medtech Green Words:** bispecific/ADC, LNP, AAV variant, non-viral delivery, CAR-T/NK, point-of-care, multiplex, LoD/sensitivity/specificity %
# **Biotech/Medtech Red Flags:** in silico only; biomarker with no Dx utility; vague wellness

# **AI/ICT/Chips Green Words:** edge/on-device, MoE/sparse, ZK/FHE/SMPC, secure enclave, neuromorphic/photonic accelerator, 6G, beamforming/RIS, latency/throughput numbers, CMOS-compatible
# **AI/ICT/Chips Red Flags:** "uses AI to..." with no architecture or metrics

# **Clean Energy/Storage Green Words:** Li-S, Na-ion, solid-state, anode-free; cycle life, Coulombic efficiency, dendrite suppression; DAC kJ/mol; PGM-free catalyst
# **Clean Energy/Storage Red Flags:** "novel catalyst" without turnover numbers/energy balance

# **Advanced Materials Green Words:** printable/solution-processed, wide-bandgap, κ, OTR/WVTR, fracture toughness, thermal conductivity, optical loss dB/cm
# **Advanced Materials Red Flags:** nano-claims without manufacturability or stability; exotic precursors

# **Quantum/Space Green Words:** fidelity %, T1/T2, error-mitigation/codes, cryo-CMOS, quantum-safe crypto; SWaP, SAR resolution, radiation-hard
# **Quantum/Space Red Flags:** "quantum advantage" with no metric; payload with no mass/power

# ## SCORING RULES:
# - **FAIL (score = 0)**: If patent doesn't pass initial triage OR is hype-only, method-only with easy workarounds, or no path to decision-grade data
# - **PASS (score = 0.01-1)**: Convert criteria score (0-20) to 0.01-1 scale proportionally
# - **Decision Rule**: GO if platform language + mechanism term + quantitative delta, OR clear regulated pain + deployability cue

# Analyze the provided patent description thoroughly and provide a structured assessment."""

# Create the human message template
human_template = """Patent Description:
{patent_text}

Please analyze this patent according to the scoring criteria and provide a structured assessment."""

# Create the chat prompt template
chat_prompt = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(system_template),
    HumanMessagePromptTemplate.from_template(human_template)
])

# Function to create the scoring model
def create_patent_scorer(model_name="gpt-4o-mini", temperature=0):
    """
    Create a patent scoring model with structured output.
    
    Args:
        model_name: The OpenAI model to use
        temperature: Model temperature (0 for consistent scoring)
    
    Returns:
        Model with structured output capability
    """
    # Initialize the chat model
    model = ChatOpenAI(model=model_name, temperature=temperature)
    
    # Bind the structured output schema
    model_with_structure = model.with_structured_output(PatentScore)
    
    return model_with_structure, chat_prompt

# Function to score a patent
def score_patent(patent_text: str, model_name="gpt-4o-mini", temperature=0):
    """
    Score a patent using the structured scoring system.
    
    Args:
        patent_text: The patent description text
        model_name: The OpenAI model to use
        temperature: Model temperature
    
    Returns:
        PatentScore object with structured results
    """
    # Create the model and prompt
    model_with_structure, prompt = create_patent_scorer(model_name, temperature)
    
    # Format the prompt with the patent text
    formatted_prompt = prompt.format_messages(patent_text=patent_text)
    
    # Get the structured output
    result = model_with_structure.invoke(formatted_prompt)
    
    return result


### 2 criterion + DEFENSE!!!

In [50]:
# from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
# from langchain_openai import ChatOpenAI
# from pydantic import BaseModel, Field
# from typing import Optional

# # Define the structured output schema
# class PatentScore(BaseModel):
#     """Structured output for patent scoring results."""
#     score: float = Field(
#         description="Final score between 0-1. Use 0 for FAILED patents, 0.01-1 for PASSED patents based on scoring criteria. Defense applications receive scoring bonuses.",
#         ge=0.0,
#         le=1.0
#     )
#     passed_initial_triage: bool = Field(
#         description="Whether the patent passed the initial triage (both criteria: big pain and platform potential)"
#     )
#     total_criteria_score: int = Field(
#         description="Total score from the 10 criteria (0-20 points) plus defense bonus points",
#         ge=0,
#         le=25
#     )
#     defense_bonus: int = Field(
#         description="Additional points awarded for defense/military applications (0-5 points)",
#         ge=0,
#         le=5
#     )
#     primary_sector: str = Field(
#         description="Primary technology sector identified (Biotech/Medtech, AI/ICT/Chips, Clean Energy/Storage, Advanced Materials, Quantum/Space, Defense/Military, or Other)"
#     )
#     defense_applications: list[str] = Field(
#         description="List of identified defense or military applications"
#     )
#     reasoning: str = Field(
#         description="Detailed reasoning for the score, including which criteria were met and why, with emphasis on defense relevance"
#     )
#     key_strengths: list[str] = Field(
#         description="List of key strengths identified in the patent, highlighting defense advantages"
#     )
#     key_weaknesses: list[str] = Field(
#         description="List of key weaknesses or concerns identified in the patent"
#     )

# # Create the system message template with comprehensive scoring criteria
# system_template = """You are an intellectual property and technology analyst working in the banking sector looking to form a company with a focus on defense and military applications. You are assessing patent descriptions to determine their commercial potential, with particular emphasis on defense-related technologies.

# ## SCORING CRITERIA

# ### INITIAL TRIAGE (Must pass BOTH criteria to proceed):
# 1. **Big pain implied** - disease, energy density, latency, yield, cost, safety, security, national security, battlefield advantage
# 2. **Platform potential** - platform/architecture/family; multiple indications or industries, dual-use applications

# ### DETAILED SCORING (0-2 points each, max 20):
# 1. **Market gravity** - regulated/mission-critical buyer, defense contractors, government agencies
# 2. **Magnitude** - ≥2× improvement implied, especially in defense contexts
# 3. **Moat hint** - mechanism terms (ADC, LNP, AAV, photonic, ZK, solid-state, etc.)
# 4. **Platform scope** - broad/multi-use, dual-use potential
# 5. **Deployability** - scalable, CMOS-compatible, room-temp, drop-in, field-deployable
# 6. **Data hint** - prototype/preclinical/field test, military testing
# 7. **Regulatory pull** - Dx, safety-critical, compliance, privacy-preserving, export controls
# 8. **Cost lever** - low-cost, mass-manufacturable, scarce-material-free
# 9. **Time-to-market** - nearer path vs long cycle, rapid deployment capability
# 10. **Buzz → substance** - low hype, clear mechanism/result

# ### DEFENSE BONUS SCORING (0-5 additional points):
# - **+1 point**: Any mention of military, defense, national security, or government applications
# - **+1 point**: Dual-use technology (civilian + military applications)
# - **+1 point**: Battlefield advantage, tactical superiority, or strategic importance
# - **+1 point**: Export control considerations or ITAR/EAR compliance
# - **+1 point**: Integration with existing military systems or platforms

# ### SECTOR-SPECIFIC ANALYSIS:

# **Defense/Military Green Words:** autonomous systems, cybersecurity, electronic warfare, hypersonic, stealth, countermeasures, battlefield, tactical, strategic, command & control, ISR (intelligence, surveillance, reconnaissance), precision strike, force protection, logistics, supply chain security
# **Defense/Military Red Flags:** purely civilian applications with no dual-use potential

# **Biotech/Medtech Green Words:** bispecific/ADC, LNP, AAV variant, non-viral delivery, CAR-T/NK, point-of-care, multiplex, LoD/sensitivity/specificity %, biodefense, medical countermeasures, battlefield medicine, rapid diagnostics
# **Biotech/Medtech Red Flags:** in silico only; biomarker with no Dx utility; vague wellness

# **AI/ICT/Chips Green Words:** edge/on-device, MoE/sparse, ZK/FHE/SMPC, secure enclave, neuromorphic/photonic accelerator, 6G, beamforming/RIS, latency/throughput numbers, CMOS-compatible, AI/ML for defense, autonomous decision-making, threat detection
# **AI/ICT/Chips Red Flags:** "uses AI to..." with no architecture or metrics

# **Clean Energy/Storage Green Words:** Li-S, Na-ion, solid-state, anode-free; cycle life, Coulombic efficiency, dendrite suppression; DAC kJ/mol; PGM-free catalyst, portable power, energy density for military applications
# **Clean Energy/Storage Red Flags:** "novel catalyst" without turnover numbers/energy balance

# **Advanced Materials Green Words:** printable/solution-processed, wide-bandgap, κ, OTR/WVTR, fracture toughness, thermal conductivity, optical loss dB/cm, armor materials, stealth coatings, high-temperature materials
# **Advanced Materials Red Flags:** nano-claims without manufacturability or stability; exotic precursors

# **Quantum/Space Green Words:** fidelity %, T1/T2, error-mitigation/codes, cryo-CMOS, quantum-safe crypto; SWaP, SAR resolution, radiation-hard, quantum sensors, space-based systems, satellite communications
# **Quantum/Space Red Flags:** "quantum advantage" with no metric; payload with no mass/power

# ## SCORING RULES:
# - **FAIL (score = 0)**: If patent doesn't pass initial triage OR is hype-only, method-only with easy workarounds, or no path to decision-grade data
# - **PASS (score = 0.01-1)**: Convert criteria score (0-25 including defense bonus) to 0.01-1 scale proportionally
# - **Defense Bonus**: Add 0-5 points for defense applications, then scale to 0.01-1
# - **Decision Rule**: GO if platform language + mechanism term + quantitative delta + defense relevance, OR clear regulated pain + deployability cue + military application

# Analyze the provided patent description thoroughly and provide a structured assessment with emphasis on defense and military applications."""

# # Create the human message template
# human_template = """Patent Description:
# {patent_text}

# Please analyze this patent according to the scoring criteria and provide a structured assessment. Pay special attention to any defense or military applications."""

# # Create the chat prompt template
# chat_prompt = ChatPromptTemplate.from_messages([
#     SystemMessagePromptTemplate.from_template(system_template),
#     HumanMessagePromptTemplate.from_template(human_template)
# ])

# # Function to create the scoring model
# def create_patent_scorer(model_name="gpt-4o-mini", temperature=0):
#     """
#     Create a patent scoring model with structured output, optimized for defense applications.
    
#     Args:
#         model_name: The OpenAI model to use
#         temperature: Model temperature (0 for consistent scoring)
    
#     Returns:
#         Model with structured output capability
#     """
#     # Initialize the chat model
#     model = ChatOpenAI(model=model_name, temperature=temperature)
    
#     # Bind the structured output schema
#     model_with_structure = model.with_structured_output(PatentScore)
    
#     return model_with_structure, chat_prompt

# # Function to score a patent
# def score_patent(patent_text: str, model_name="gpt-4o-mini", temperature=0):
#     """
#     Score a patent using the structured scoring system with defense focus.
    
#     Args:
#         patent_text: The patent description text
#         model_name: The OpenAI model to use
#         temperature: Model temperature
    
#     Returns:
#         PatentScore object with structured results including defense analysis
#     """
#     # Create the model and prompt
#     model_with_structure, prompt = create_patent_scorer(model_name, temperature)
    
#     # Format the prompt with the patent text
#     formatted_prompt = prompt.format_messages(patent_text=patent_text)
    
#     # Get the structured output
#     result = model_with_structure.invoke(formatted_prompt)
    
#     return result


## score a known good patent


In [79]:
# abcellera patent
# patent_id="US10087408B2"
# patent_text = "Microfluidic devices and methods for, perfusing a cell with perfusion fluid are provided herein, wherein the gravitational forces acting on the cell to keep the cell at or near a retainer or a retaining position exceed the hydrodynamic forces acting on the cell to move it toward an outlet."
patent_text="The present disclosure relates to systems and methods for the amplification of nucleic acids, including, but not limited to, the amplification of nucleic acid libraries and whole genome amplification."
result = score_patent(patent_text, model_name="gpt-4o", temperature=0)
print(result)

score=0.0 passed_initial_triage=False total_criteria_score=0 primary_sector='Biotech/Medtech' reasoning='The patent description provided is very brief and lacks specific details that would allow for a thorough assessment. It mentions systems and methods for nucleic acid amplification, which is a well-established area in biotechnology. However, the description does not specify any novel mechanism, platform potential, or significant improvement over existing technologies. Without details on how this method is different or better than current methods, it is difficult to assess its commercial potential or platform applicability. Additionally, there is no mention of any specific unmet need or pain point that this technology addresses, nor is there any indication of data or evidence supporting its efficacy or advantages.' key_strengths=['Potential relevance to nucleic acid amplification, a critical process in biotechnology'] key_weaknesses=['Lack of detail on novelty or improvement over exis

In [52]:
infn="/Users/mingay/coding/market_research/agents/patent_list_w_descriptions.csv"
import pandas as pd
import random

# Load the data
df = pd.read_csv(infn, header=None, names=['patent_name', 'patent_id', 'patent_holder', 'patent_type', 
                                           'year_applied', 'date_published', 'link_isde', 'description'])


In [46]:
# Create a copy of df with the first 500 rows
# df_500 = df.head(n=500).copy()

df_500 = df.copy()


In [48]:

# # Loop through the first 500 rows in chunks of 50
# # for i in range(0, 500, 50):
# for i in range(500, 2000, 50):
#     # Initialize lists to store the results for each chunk
#     scores = []
#     passed_initial_triages = []
#     total_criteria_scores = []
#     primary_sectors = []
#     reasonings = []
#     key_strengths_list = []
#     key_weaknesses_list = []

#     # Process each patent in the current chunk
#     for j, patent_text in enumerate(df_500['description'][i:i+50], start=i):
#         result = score_patent(patent_text)
#         scores.append(result.score)
#         passed_initial_triages.append(result.passed_initial_triage)
#         total_criteria_scores.append(result.total_criteria_score)
#         primary_sectors.append(result.primary_sector)
#         reasonings.append(result.reasoning)
#         key_strengths_list.append(result.key_strengths)
#         key_weaknesses_list.append(result.key_weaknesses)

#         # Print the counter if divisible by 25
#         if j % 25 == 0:
#             print(j)

#     # Create a DataFrame for the current chunk
#     df_chunk = df_500.iloc[i:i+50].copy()
#     df_chunk['score'] = scores
#     df_chunk['passed_initial_triage'] = passed_initial_triages
#     df_chunk['total_criteria_score'] = total_criteria_scores
#     df_chunk['primary_sector'] = primary_sectors
#     df_chunk['reasoning'] = reasonings
#     df_chunk['key_strengths'] = key_strengths_list
#     df_chunk['key_weaknesses'] = key_weaknesses_list

#     # Save the current chunk to a CSV file
#     df_chunk.to_csv(f"/Users/mingay/coding/market_research/agents/patent_groups/patent_list_w_descriptions_LLMscored_{i}_{i+50}.csv", index=False)

In [None]:
indir = "/Users/mingay/coding/market_research/agents/patent_groups/"
master_df = pd.DataFrame()

for file in os.listdir(indir):
    df = pd.read_csv(indir + file)
    master_df = pd.concat([master_df, df], ignore_index=True)

print(master_df.head())

                                         patent_name     patent_id  \
0  System for monitoring deflection of turbine bl...  US2022235742   
1  Infrared scene projector has conversion chip t...     CA2946474   
2  Treating or preventing itch caused by microbia...  WO2024151802   
3  Method and Apparatus for Reducing Solar Cell D...  US2024178793   
4  Increased Drone Detection Range with Asymmetri...  WO2024105618   

                      patent_holder patent_type  year_applied date_published  \
0                               INO      Patent        2016.0     2024-07-24   
1                               INO      Patent        2016.0     2024-07-24   
2                Western University      Patent        2023.0     2024-07-22   
3  National Research Council Canada      Patent        2022.0     2024-06-05   
4  National Research Council Canada      Patent        2022.0     2024-06-05   

                                           link_isde  \
0  https://ised-isde.canada.ca/ipm-mcpi/pa

In [None]:
master_df.to_csv("/Users/mingay/coding/market_research/agents/patent_list_w_descriptions_LLMscored_top500_gpt40mini.csv", index=False)

In [None]:
mdf = master_df.sort_values(by='score', ascending=False).reset_index(drop=True)
mdf.head(10)

# mdf.to_csv("/Users/mingay/coding/market_research/agents/patent_list_w_descriptions_LLMscored_sorted.csv", index=False)

Unnamed: 0,patent_name,patent_id,patent_holder,patent_type,year_applied,date_published,link_isde,description,score,passed_initial_triage,total_criteria_score,primary_sector,reasoning,key_strengths,key_weaknesses
0,Method for detection and synthetic aperture (S...,CA3089990,INO,Patent,2019.0,2024-07-24,https://ised-isde.canada.ca/ipm-mcpi/patent-br...,A method and a system for detection and synthe...,0.76,True,15,AI/ICT/Chips,The patent describes a method and system for d...,['Addresses critical military needs for target...,['Potential challenges in real-time processing...
1,Opto-Electronic Detection of Multi-Copters Usi...,WO2024105622,National Research Council Canada,Patent,2022.0,2024-06-05,https://ised-isde.canada.ca/ipm-mcpi/patent-br...,A method and apparatus for the detection and t...,0.76,True,15,AI/ICT/Chips,The patent describes a method and apparatus fo...,['Addresses critical national security concern...,"['Lack of specific deployment details', 'Cost ..."
2,Method for synthetic aperture (SA) imaging of ...,CA2968794,INO,Patent,2017.0,2024-07-24,https://ised-isde.canada.ca/ipm-mcpi/patent-br...,Synthetic aperture (SA) imaging methods and sy...,0.68,True,14,AI/ICT/Chips,The patent describes a method for enhancing sy...,['Enhances imaging capabilities for military a...,['Potential competition from existing imaging ...
3,Optical Dispersive Element for Use With Neurom...,WO2024218684,National Research Council Canada,Patent,2023.0,2024-11-27,https://ised-isde.canada.ca/ipm-mcpi/patent-br...,A target located method and apparatus for the ...,0.68,True,13,AI/ICT/Chips,The patent describes a method and apparatus fo...,['Addresses critical defense needs for laser d...,['Lack of specific metrics on performance impr...
4,Neuromorphic Camera in a Laser Warning Systems...,WO2023248190,National Research Council Canada,Patent,2022.0,2024-03-13,https://ised-isde.canada.ca/ipm-mcpi/patent-br...,A target located method and apparatus for the ...,0.68,True,13,AI/ICT/Chips,The patent describes a method and apparatus fo...,['Improved detection capabilities for laser th...,['Lack of specific data on prototype or field ...
5,Method and System for Target Detection and Cla...,WO2025003954,National Research Council Canada,Patent,2023.0,2025-04-28,https://ised-isde.canada.ca/ipm-mcpi/patent-br...,A model-based artificial intelligence (Al) sys...,0.68,True,14,AI/ICT/Chips,The patent describes a model-based AI system d...,['Addresses critical military need for accurat...,['Lacks detailed information on scalability an...
6,SYSTEM AND METHOD FOR PROCESSING FIBER-REINFOR...,WO2022120468,Innovate Calgary,Patent,2020.0,2025-04-09,https://ised-isde.canada.ca/ipm-mcpi/patent-br...,A method is provided for producing a fiber-rei...,0.65,True,13,Advanced Materials,The patent describes a method for producing fi...,['Addresses critical issues in composite manuf...,['Specific military applications are not expli...
7,Multi-Channel Analog-Signal-Input to Digital-S...,WO2023159320,National Research Council Canada,Patent,2022.0,2023-09-19,https://ised-isde.canada.ca/ipm-mcpi/patent-br...,Methods and systems for down-conversion of hig...,0.6,True,12,AI/ICT/Chips,The patent describes methods and systems for d...,['Addresses critical communication needs in mi...,['Specific metrics on performance improvements...
8,Encryption/decryption using key encapsulation/...,US11601260,Innovate Calgary,Patent,2021.0,2025-04-09,https://ised-isde.canada.ca/ipm-mcpi/patent-br...,Systems and methods relating to the encryption...,0.6,True,12,AI/ICT/Chips,The patent describes a system for secure encry...,['Addresses critical need for secure communica...,['Lacks specific data on prototype testing or ...
9,SNAPSHOT GNSS RECEIVER AND METHOD USING SUPER-...,US2024295663,Innovate Calgary,Patent,2023.0,2025-04-09,https://ised-isde.canada.ca/ipm-mcpi/patent-br...,Snapshot receiver that comprises a correlator ...,0.6,True,12,AI/ICT/Chips,The patent describes a snapshot receiver that ...,['Enhances GNSS signal processing for better a...,['Dependence on GNSS signals which may be jamm...


In [None]:
df_sample.to_csv("/Users/mingay/coding/market_research/agents/patent_list_w_descriptions_LLMscored_random50.csv", index=False)

In [None]:
df_sample[df_sample.columns[-7:]]

Unnamed: 0,score,passed_initial_triage,total_criteria_score,primary_sector,reasoning,key_strengths,key_weaknesses
2037,0.5,True,10,Other,The patent describes a system for detecting co...,[Addresses a significant pain point in infrast...,"[Magnitude of improvement not clearly defined,..."
1978,0.01,False,0,Other,The patent does not pass the initial triage be...,[],[Does not imply a significant pain point in or...
855,0.0,False,0,Other,The patent does not imply a significant pain p...,[],[Lacks indication of a significant pain point ...
1719,0.0,False,0,Other,"The patent description provided is 'nan', whic...",[],"[No information provided for assessment, Fails..."
2019,0.01,False,5,Biotech/Medtech,The patent does not pass the initial triage be...,"[Innovative method for hydrogel coating, Poten...","[Does not address a significant pain point, Li..."
134,0.01,False,0,AI/ICT/Chips,The patent does not clearly address a signific...,[],[Lacks clear indication of a significant pain ...
1375,0.4,True,8,AI/ICT/Chips,The patent describes an electronic device for ...,[Addresses a significant pain point in analyte...,"[Lacks prototype or testing data, No clear reg..."
1711,0.65,True,13,Clean Energy/Storage,The patent describes an integrated microgrid m...,[Addresses significant pain in energy manageme...,[Lacks explicit data on prototype testing or v...
1289,0.55,True,11,Biotech/Medtech,The patent describes a gene therapy approach u...,[Addresses a significant medical need for Frag...,[Lacks specific quantitative improvement metri...
1068,0.65,True,13,Biotech/Medtech,The patent describes a radiation dosimeter tha...,[Addresses a critical need for accurate radiat...,[Limited information on scalability and mass-m...


#### 3 criteria

In [None]:
# from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
# from langchain_openai import ChatOpenAI
# from pydantic import BaseModel, Field
# from typing import Optional

# # Define the structured output schema
# class PatentScore(BaseModel):
#     """Structured output for patent scoring results."""
#     score: float = Field(
#         description="Final score between 0-1. Use 0 for FAILED patents, 0.01-1 for PASSED patents based on scoring criteria.",
#         ge=0.0,
#         le=1.0
#     )
#     passed_initial_triage: bool = Field(
#         description="Whether the patent passed the initial triage (all three criteria: big pain, step-change language, platform potential)"
#     )
#     total_criteria_score: int = Field(
#         description="Total score from the 10 criteria (0-20 points)",
#         ge=0,
#         le=20
#     )
#     primary_sector: str = Field(
#         description="Primary technology sector identified (Biotech/Medtech, AI/ICT/Chips, Clean Energy/Storage, Advanced Materials, Quantum/Space, or Other)"
#     )
#     reasoning: str = Field(
#         description="Detailed reasoning for the score, including which criteria were met and why"
#     )
#     key_strengths: list[str] = Field(
#         description="List of key strengths identified in the patent"
#     )
#     key_weaknesses: list[str] = Field(
#         description="List of key weaknesses or concerns identified in the patent"
#     )

# # Create the system message template with comprehensive scoring criteria
# system_template = """You are an intellectual property and technology analyst working in the banking sector looking to form a company. You are assessing patent titles and abstracts to determine their commercial potential.

# ## SCORING CRITERIA

# ### INITIAL TRIAGE (Must pass ALL THREE to proceed):
# 1. **Big pain implied** - disease, energy density, latency, yield, cost, safety, security
# 2. **Step-change language** - "orders-of-magnitude", "real-time", "low-cost scalable", "room-temperature", "in vivo", "edge/on-device", "field-deployable"
# 3. **Platform potential** - platform/architecture/family; multiple indications or industries

# ### DETAILED SCORING (0-2 points each, max 20):
# 1. **Market gravity** - regulated/mission-critical buyer
# 2. **Magnitude** - ≥2× improvement implied
# 3. **Moat hint** - mechanism terms (ADC, LNP, AAV, photonic, ZK, solid-state, etc.)
# 4. **Platform scope** - broad/multi-use
# 5. **Deployability** - scalable, CMOS-compatible, room-temp, drop-in
# 6. **Data hint** - prototype/preclinical/field test
# 7. **Regulatory pull** - Dx, safety-critical, compliance, privacy-preserving
# 8. **Cost lever** - low-cost, mass-manufacturable, scarce-material-free
# 9. **Time-to-market** - nearer path vs long cycle
# 10. **Buzz → substance** - low hype, clear mechanism/result

# ### SECTOR-SPECIFIC ANALYSIS:

# **Biotech/Medtech Green Words:** bispecific/ADC, LNP, AAV variant, non-viral delivery, CAR-T/NK, point-of-care, multiplex, LoD/sensitivity/specificity %
# **Biotech/Medtech Red Flags:** in silico only; biomarker with no Dx utility; vague wellness

# **AI/ICT/Chips Green Words:** edge/on-device, MoE/sparse, ZK/FHE/SMPC, secure enclave, neuromorphic/photonic accelerator, 6G, beamforming/RIS, latency/throughput numbers, CMOS-compatible
# **AI/ICT/Chips Red Flags:** "uses AI to..." with no architecture or metrics

# **Clean Energy/Storage Green Words:** Li-S, Na-ion, solid-state, anode-free; cycle life, Coulombic efficiency, dendrite suppression; DAC kJ/mol; PGM-free catalyst
# **Clean Energy/Storage Red Flags:** "novel catalyst" without turnover numbers/energy balance

# **Advanced Materials Green Words:** printable/solution-processed, wide-bandgap, κ, OTR/WVTR, fracture toughness, thermal conductivity, optical loss dB/cm
# **Advanced Materials Red Flags:** nano-claims without manufacturability or stability; exotic precursors

# **Quantum/Space Green Words:** fidelity %, T1/T2, error-mitigation/codes, cryo-CMOS, quantum-safe crypto; SWaP, SAR resolution, radiation-hard
# **Quantum/Space Red Flags:** "quantum advantage" with no metric; payload with no mass/power

# ## SCORING RULES:
# - **FAIL (score = 0)**: If patent doesn't pass initial triage OR is hype-only, method-only with easy workarounds, or no path to decision-grade data
# - **PASS (score = 0.01-1)**: Convert criteria score (0-20) to 0.01-1 scale proportionally
# - **Decision Rule**: GO if platform language + mechanism term + quantitative delta, OR clear regulated pain + deployability cue

# Analyze the provided patent text thoroughly and provide a structured assessment."""

# # Create the human message template
# human_template = """Patent Text:
# {patent_text}

# Please analyze this patent according to the scoring criteria and provide a structured assessment."""

# # Create the chat prompt template
# chat_prompt = ChatPromptTemplate.from_messages([
#     SystemMessagePromptTemplate.from_template(system_template),
#     HumanMessagePromptTemplate.from_template(human_template)
# ])

# # Function to create the scoring model
# def create_patent_scorer(model_name="gpt-4o", temperature=0):
#     """
#     Create a patent scoring model with structured output.
    
#     Args:
#         model_name: The OpenAI model to use
#         temperature: Model temperature (0 for consistent scoring)
    
#     Returns:
#         Model with structured output capability
#     """
#     # Initialize the chat model
#     model = ChatOpenAI(model=model_name, temperature=temperature)
    
#     # Bind the structured output schema
#     model_with_structure = model.with_structured_output(PatentScore)
    
#     return model_with_structure, chat_prompt

# # Function to score a patent
# def score_patent(patent_text: str, model_name="gpt-4o", temperature=0):
#     """
#     Score a patent using the structured scoring system.
    
#     Args:
#         patent_text: The patent title and/or abstract text
#         model_name: The OpenAI model to use
#         temperature: Model temperature
    
#     Returns:
#         PatentScore object with structured results
#     """
#     # Create the model and prompt
#     model_with_structure, prompt = create_patent_scorer(model_name, temperature)
    
#     # Format the prompt with the patent text
#     formatted_prompt = prompt.format_messages(patent_text=patent_text)
    
#     # Get the structured output
#     result = model_with_structure.invoke(formatted_prompt)
    
#     return result

# # Example usage
# if __name__ == "__main__":
#     # Example patent text
#     example_patent = """
#     Title: "Real-time edge computing system for autonomous vehicle safety with 10x faster response time"
#     Abstract: "A novel edge computing architecture utilizing neuromorphic processors for real-time safety-critical decision making in autonomous vehicles. The system achieves 10x faster response times compared to cloud-based solutions while maintaining 99.9% accuracy in obstacle detection. CMOS-compatible design enables mass manufacturing at low cost."
#     """
    
#     # Score the patent
#     result = score_patent(example_patent)
    
#     print(f"Score: {result.score}")
#     print(f"Passed Initial Triage: {result.passed_initial_triage}")
#     print(f"Total Criteria Score: {result.total_criteria_score}/20")
#     print(f"Primary Sector: {result.primary_sector}")
#     print(f"Reasoning: {result.reasoning}")
#     print(f"Key Strengths: {result.key_strengths}")
#     print(f"Key Weaknesses: {result.key_weaknesses}")