In [None]:
# Install necessary libraries
!pip install -q faiss-cpu google-generativeai wordcloud matplotlib requests

In [None]:
serp_api_key = input("Enter your SerpAPI Key: ")
gemini_api_key = input("Enter your Gemini API Key: ")

Enter your SerpAPI Key: f3661376f24a7f33d3d2cd8b7e064cc4d6db646efe60e8831c81f769e11b0788
Enter your Gemini API Key: AIzaSyDgRIkZ-MD74Ox5UMFf6QB4SeMQZqtbxm0


In [None]:
#  Inputs from user
import json
from datetime import datetime, timedelta


industry = input("Enter Industry (Default: Mobile Phone Manufacturing): ") or "Mobile Phone Manufacturing"
topic = input("Enter Topic (e.g. Foldable Displays, Battery Tech, etc.): ") or "Battery Technology"


Enter Industry (Default: Mobile Phone Manufacturing): 
Enter Topic (e.g. Foldable Displays, Battery Tech, etc.): 


In [None]:
# 📦 Import required libraries
import requests
import pprint
from datetime import datetime, timedelta
import json
from dateutil import parser  # Helps parse messy date strings

#  Agent 1: PatentFetcher
class PatentFetcher:
    def __init__(self, api_key):
        self.api_key = api_key  # Save the SerpAPI key

    def fetch(self, industry, topic):
        # Define the API endpoint and search parameters
        url = "https://serpapi.com/search"
        params = {
            "engine": "google_patents",
            "q": f"{topic} {industry}",
            "hl": "en",
            "api_key": self.api_key,
            "num": 50  # Try to fetch up to 50 results
        }

        # Make the GET request to SerpAPI
        response = requests.get(url, params=params)
        data = response.json()

        patents = []
        for i, item in enumerate(data.get("organic_results", [])):
            # Print first 3 patent items for structure inspection
            if i < 3:
                print(f"--- Item {i + 1} ---")
                pprint.pprint(item)

            # Try to parse the publication date
            date_str = item.get("publication_date")
            try:
                publication_date = parser.parse(date_str).date() if date_str else None
            except ValueError:
                publication_date = None  # If parsing fails, skip date

            # Extract relevant patent info
            patents.append({
                "title": item.get("title", ""),
                "abstract": item.get("snippet", ""),
                "assignee": item.get("assignee", ""),  # ✅ Try to extract assignee (correct field name)
                "keywords": topic.lower().split(),
                "publication_date": publication_date  # Stored as date object
            })

        # Filter patents from the last 90 days only
        cutoff = datetime.now().date() - timedelta(days=360)
        filtered_patents = [
            p for p in patents if p["publication_date"] and p["publication_date"] >= cutoff

        ]

        # If you want to see the patents that didn't pass the filter:
        for patent in patents:
            if not patent["publication_date"] or patent["publication_date"] < cutoff:
                print(f"Skipping: {patent['title']} with date {patent['publication_date']}")

        return filtered_patents  # Return filtered patents instead of all patents


#  Get user inputs and run fetcher
serp_api_key = input("Enter your SerpAPI Key: ")
industry = input("Enter Industry: ") or "Mobile Phone Manufacturing"
topic = input("Enter Topic: ") or "Battery Technology"

#  Instantiate the PatentFetcher agent and fetch data
fetcher = PatentFetcher(serp_api_key)
all_patents = fetcher.fetch(industry, topic)

# Filter only patents that have valid publication dates
filtered_patents = [p for p in all_patents if p["publication_date"]]


#  Save filtered patents to JSON file
with open("patents.json", "w") as f:
    for patent in filtered_patents:
        # Convert datetime.date to ISO string for JSON
        if patent["publication_date"]:
            patent["publication_date"] = patent["publication_date"].isoformat()
    json.dump(filtered_patents, f, indent=4)

#  Final output
print(f"{len(filtered_patents)} recent patents saved to patents.json")


Enter your SerpAPI Key: ce12685cace21890b9f30bf40bedfc7ebaff073cee1210567f1e15c1440d0846
Enter Industry: 
Enter Topic: 
--- Item 1 ---
{'assignee': 'Motorola, Inc.',
 'country_status': {'US': 'NOT_ACTIVE'},
 'figures': [{'full': 'https://patentimages.storage.googleapis.com/2d/6a/7d/30e12fff104fc6/US20090023480A1-20090122-D00000.png',
              'thumbnail': 'https://patentimages.storage.googleapis.com/04/7a/a9/d787edff08a83a/US20090023480A1-20090122-D00000.png'},
             {'full': 'https://patentimages.storage.googleapis.com/15/7a/ea/951944281b8b25/US20090023480A1-20090122-D00001.png',
              'thumbnail': 'https://patentimages.storage.googleapis.com/9c/35/ad/b88920023f9056/US20090023480A1-20090122-D00001.png'},
             {'full': 'https://patentimages.storage.googleapis.com/c1/6c/e2/b9178f3dae1f3f/US20090023480A1-20090122-D00002.png',
              'thumbnail': 'https://patentimages.storage.googleapis.com/01/02/43/02d50cfb5c9b14/US20090023480A1-20090122-D00002.png'},
 

In [None]:
#  Embed with Gemini
import google.generativeai as genai
import numpy as np
import faiss
import json

# Configure Gemini
genai.configure(api_key=gemini_api_key)
embedding_model = "models/embedding-001"

#  Load patents data from the saved file
with open("patents.json", "r") as f:
    patents = json.load(f)

# Generate document embeddings
abstracts = [p["abstract"] for p in patents]

abstract_embeddings = []
valid_patents = []  # to track which patents got embedded
for i, text in enumerate(abstracts):
    if not text.strip():
        continue
    try:
        emb = genai.embed_content(
            model=embedding_model,
            content=text,
            task_type="retrieval_document"
        )["embedding"]
        abstract_embeddings.append(emb)
        valid_patents.append(patents[i])
    except Exception as e:
        print(f"Embedding failed for abstract {i + 1}: {text[:50]}... | Error: {e}")
        continue


abstract_embeddings = np.array(abstract_embeddings)

#  Store in FAISS
dimension = len(abstract_embeddings[0])
index = faiss.IndexFlatL2(dimension)
index.add(abstract_embeddings)

print(" FAISS index created using Gemini embeddings.")

 FAISS index created using Gemini embeddings.


In [None]:
#  Query embedding
query = f"What all new technologies/innovations to focus on, related to {topic} in {industry}?"

query_embedding = genai.embed_content(
    model=embedding_model,
    content=query,
    task_type="retrieval_query"
)["embedding"]

#  Search FAISS
D, I = index.search(np.array([query_embedding]), 10)
top_patents = [valid_patents[i] for i in I[0]]
print(f" Retrieved {len(top_patents)} patents for trend analysis.")


 Retrieved 10 patents for trend analysis.


In [None]:
# 📦 Import and configure Gemini API
import google.generativeai as genai

genai.configure(api_key=gemini_api_key)  # Set your Gemini API key here

#  Agent 2: TrendAnalyzer
class TrendAnalyzer:
    def __init__(self):
        # Initialize the Gemini Pro model
        self.model = genai.GenerativeModel(model_name="gemini-1.5-pro")

    def analyze(self, patents, industry, topic):
        # Prepare context from the top 5 patents
        context = ""
        for p in patents[:5]:
            # Skip if abstract is missing or too short
            # Remove strict abstract length filter
            if not p["abstract"].strip():
                continue

            # Truncate after 500 chars if needed
            abstract = p["abstract"][:500] + ("..." if len(p["abstract"]) > 500 else "")


            # Format the patent details for prompt
            context += f"""
    Patent Title: {p['title']}
    Assignee: {p['assignee']}
    Publication Date: {p['publication_date']}
    Abstract: {abstract}

    """

        prompt = f"""
        You are a top-tier patent analyst. The following are recent patents in the '{industry}' sector related to '{topic}'.

        Analyze and return:

        1. 🔍 **Top 5 Patents** - Each with:
            - Title
            - Assignee
            - Publication Date
            - Abstract (summarized in 1-2 lines)

        2. 🚀 **Emerging Innovations (5)** - Identify and briefly describe novel technologies evident from the patents.

        3. 📊 **Trend Summary (max 100 words)** - What's the emerging direction in this domain?

        4. 🧪 **R&D Suggestions** - Suggest 3 actionable ideas for phone manufacturers based on these findings.

        Patent Data:
        {context}
        """



        # Send the prompt to Gemini and return the response text
        response = self.model.generate_content(prompt)
        return response.text


In [None]:
analyzer = TrendAnalyzer()
summary = analyzer.analyze(top_patents, industry, topic)
print(summary)


## Mobile Phone Battery Technology Patent Analysis

**1. 🔍 Top 5 Patents:**

1. **Title:** Flexible battery and preparation method thereof
    **Assignee:** Huawei Technologies Co., Ltd.
    **Publication Date:** 2025-03-06
    **Abstract:** A flexible battery with an energy-absorbing layer between the electrochemical cell and wrapping layer for enhanced safety and durability.

2. **Title:** A band with a built-in curvable battery
    **Assignee:** 日商半導體能源研究所股份有限公司
    **Publication Date:** 2024-10-11
    **Abstract:** A bendable, high-capacity battery utilizing a periodically waved film exterior for wearables.

3. **Title:** Positive electrode active material, method for producing same, positive …
    **Assignee:** 컨템포러리 엠퍼렉스 테크놀로지 (홍콩) 리미티드
    **Publication Date:** 2024-10-21
    **Abstract:**  A novel positive electrode active material and manufacturing method for improved lithium-ion battery performance.

4. **Title:** Negative electrode material, composite negative electrode mate

In [None]:
# ✅ BONUS: Generate WordCloud from the retrieved patent abstracts
from wordcloud import WordCloud
import matplotlib.pyplot as plt

# Combine all abstracts into a single string
text = " ".join(p["abstract"] for p in top_patents if p["abstract"].strip())

# Create and configure the word cloud
wordcloud = WordCloud(width=1000, height=500, background_color='white', colormap='viridis').generate(text)

# Display the word cloud using matplotlib
plt.figure(figsize=(12, 6))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.title("Frequently Occurring Words in Patent Abstracts", fontsize=16)
plt.show()


NameError: name 'top_patents' is not defined