Setup
Start by installing and importing the Python SDK.

In [1]:
!pip uninstall -qqy jupyterlab  # Remove unused conflicting packages
!pip install -U -q "google-genai==1.7.0"

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m144.7/144.7 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.9/100.9 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
jupyterlab-lsp 3.10.2 requires jupyterlab<4.0.0a0,>=3.1.0, which is not installed.[0m[31m
[0m

In [2]:
from google import genai
from google.genai import types

genai.__version__

'1.7.0'

In [3]:
from kaggle_secrets import UserSecretsClient

GOOGLE_API_KEY = UserSecretsClient().get_secret("GOOGLE_API_KEY")

Create Deal database

In [4]:
from google import genai
from google.genai import types
from google.api_core import retry


# Initialize Gemini client


client = genai.Client(api_key=GOOGLE_API_KEY)

few_shot_prompt = """You are a deal searching tool, search recent pharma/biotech licensing in/out deals, M&A deals and strategic collaboration announcement based on user requests; Fierce Pharma and BioSpace are good websites to track deals
Then you parse deal stakeholders, date, deal amount, indication, medication (or drug) into valid JSON. Do not show other information.
Example:
Give me 3 striking deals in licensing, M&A and collaboration
JSON Response:
    [{
        "deal_type": "licensing",
        "companies": ["AstraZeneca", "Daiichi Sankyo"],
        "date": "2023-07-15",
        "upfront": "$1 billion",
        "total_deal_value":"6.9 billion"
        "indication": "oncology",
        "asset": "DS-1062 (trastuzumab deruxtecan)",
        "deal_details": "AstraZeneca licensed global rights to Daiichi Sankyo's antibody-drug conjugate for multiple cancer types."
    },
    {
        "deal_type": "M&A",
        "companies": ["Pfizer", "Seagen"],
        "date": "2023-03-12",
        "upfront": "$5 billion",
        "total_deal_value": "$43 billion",
        "indication": "oncology",
        "asset": "Multiple ADC technologies",
        "deal_details": "Pfizer acquired Seagen to enhance its cancer therapy portfolio with antibody-drug conjugate technology."
    },
    {
        "deal_type": "collaboration",
        "companies": ["Moderna", "Merck"],
        "date": "2023-05-02",
        "amount": "$250 million upfront",
        "total_deal_value": "N/A"
        "indication": "immuno-oncology",
        "asset": "mRNA-4157",
        "deal_details": "Strategic collaboration to develop and commercialize personalized cancer vaccines using mRNA technology."
    }]
    """
user_input='what are the 2024 top 10 deals in oncology?'
response = client.models.generate_content(
    model="gemini-2.0-flash",
    contents=[few_shot_prompt, user_input])

print(response.text)

```json
[
    {
        "deal_type": "M&A",
        "companies": ["Johnson & Johnson", "Ambrx"],
        "date": "2024-01-08",
        "upfront": null,
        "total_deal_value": "$2.0 billion",
        "indication": "Oncology",
        "asset": "ADC Platform",
        "deal_details": "Johnson & Johnson acquired Ambrx, gaining access to its ADC platform and pipeline."
    },
    {
        "deal_type": "Collaboration",
        "companies": ["Astellas", "CG Oncology"],
        "date": "2024-01-03",
        "upfront": "$450 million",
        "total_deal_value": "$1.5 billion",
        "indication": "Bladder Cancer",
        "asset": "CG0070",
        "deal_details": "Astellas and CG Oncology collaborate to develop and commercialize CG0070, an oncolytic virus therapy for bladder cancer."
    },
    {
        "deal_type": "Licensing",
        "companies": ["Merck", "Harpoon Therapeutics"],
        "date": "2024-01-08",
        "upfront": "$680 million",
        "total_deal_value": "$680 mi

load the response to local database

In [5]:
%load_ext sql
%sql sqlite:///pharma_deals.db

In [6]:
import sqlite3
import pandas as pd
from datetime import datetime
import json
import re


# The LLM response now contains a JSON array
response_text = response.text

# Clean up the response text
# Remove any markdown code block markers if present
cleaned_text = re.sub(r'```language=json|```json|```|language=json', '', response_text)
# Remove any leading/trailing whitespace
cleaned_text = cleaned_text.strip()

try:
    deals = json.loads(cleaned_text)
except json.JSONDecodeError:
    # If the response isn't valid JSON, we need to handle it differently
    print("Response is not valid JSON. Using raw text.")
    deals = [{"deal_text": response.text}]
print(deals)


[{'deal_type': 'M&A', 'companies': ['Johnson & Johnson', 'Ambrx'], 'date': '2024-01-08', 'upfront': None, 'total_deal_value': '$2.0 billion', 'indication': 'Oncology', 'asset': 'ADC Platform', 'deal_details': 'Johnson & Johnson acquired Ambrx, gaining access to its ADC platform and pipeline.'}, {'deal_type': 'Collaboration', 'companies': ['Astellas', 'CG Oncology'], 'date': '2024-01-03', 'upfront': '$450 million', 'total_deal_value': '$1.5 billion', 'indication': 'Bladder Cancer', 'asset': 'CG0070', 'deal_details': 'Astellas and CG Oncology collaborate to develop and commercialize CG0070, an oncolytic virus therapy for bladder cancer.'}, {'deal_type': 'Licensing', 'companies': ['Merck', 'Harpoon Therapeutics'], 'date': '2024-01-08', 'upfront': '$680 million', 'total_deal_value': '$680 million', 'indication': 'Oncology', 'asset': 'T-cell engagers', 'deal_details': 'Merck acquired Harpoon Therapeutics for $680 million to expand its oncology pipeline with novel T-cell engagers.'}, {'deal_

In [7]:
# Create a connection to the SQLite database
conn = sqlite3.connect('pharma_deals.db')
cursor = conn.cursor()

# Check if the deals table exists and get its column information
cursor.execute("PRAGMA table_info(deals)")
columns = cursor.fetchall()
column_names = [column[1] for column in columns]
print(f"Existing columns in deals table: {column_names}")

# If the table exists but doesn't have the right columns, drop and recreate it
if columns and 'upfront' not in column_names:
    print("Table exists but has incorrect schema. Dropping and recreating...")
    cursor.execute("DROP TABLE deals")
    columns = []

# Create table for deals if it doesn't exist or was dropped
if not columns:
    cursor.execute('''
    CREATE TABLE deals (
        deal_id INTEGER PRIMARY KEY,
        deal_type TEXT,
        licensor TEXT,
        licensee TEXT,
        date TEXT,
        upfront TEXT,
        total_deal_value TEXT,
        indication TEXT,
        asset TEXT,
        deal_details TEXT
    )
    ''')
    print("Created new deals table with correct schema")

Existing columns in deals table: []
Created new deals table with correct schema


In [8]:
# Insert deals into the database
for deal in deals:
    # For simplicity, assuming first company is licensor/acquirer and second is licensee/target
    licensor = deal['companies'][0] if len(deal['companies']) > 0 else None
    licensee = deal['companies'][1] if len(deal['companies']) > 1 else None
    
    # Insert deal
    cursor.execute('''
    INSERT INTO deals (deal_type, licensor, licensee, date, upfront, total_deal_value, indication, asset, deal_details)
    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
    ''', (
        deal.get('deal_type', ''),
        licensor,
        licensee,
        deal.get('date', ''),
        deal.get('upfront', ''),
        deal.get('total_deal_value', ''),
        deal.get('indication', ''),
        deal.get('asset', ''),
        deal.get('deal_details', '')
    ))

# Commit the changes
conn.commit()

In [9]:
# Verify the data was inserted correctly
print("Deals in the database:")
for row in conn.execute("SELECT * FROM deals"):
    print(row)

Deals in the database:
(1, 'M&A', 'Johnson & Johnson', 'Ambrx', '2024-01-08', None, '$2.0 billion', 'Oncology', 'ADC Platform', 'Johnson & Johnson acquired Ambrx, gaining access to its ADC platform and pipeline.')
(2, 'Collaboration', 'Astellas', 'CG Oncology', '2024-01-03', '$450 million', '$1.5 billion', 'Bladder Cancer', 'CG0070', 'Astellas and CG Oncology collaborate to develop and commercialize CG0070, an oncolytic virus therapy for bladder cancer.')
(3, 'Licensing', 'Merck', 'Harpoon Therapeutics', '2024-01-08', '$680 million', '$680 million', 'Oncology', 'T-cell engagers', 'Merck acquired Harpoon Therapeutics for $680 million to expand its oncology pipeline with novel T-cell engagers.')
(4, 'M&A', 'Bristol Myers Squibb', 'RayzeBio', '2024-01-16', None, '$4.1 billion', 'Oncology', 'Radiopharmaceutical therapeutics', 'Bristol Myers Squibb acquired RayzeBio to expand its oncology pipeline with radiopharmaceutical therapeutics.')
(5, 'M&A', 'Bristol Myers Squibb', 'Mirati Therapeuti