# KI-gestützte Diagnostik: Patentanalyse für MedTech KMU
---
PATSTAT BigQuery | Stand: 2026-02-05
Kontext: WIK-Papier Nr.535 + Hightech-Agenda Deutschland

---
IPC-Strategie:
AI-Seite:        G06N (Neural Networks/ML) + G16H (Health Informatics)
Diagnostik-Seite: A61B 5/ (Messung), A61B 6/ (Bildgebung), A61B 8/ (Ultraschall)
Ausschlüsse:     A61B 34/ (Robotik), G01N (Labor), A61B 1/ (Endoskopie)

---
REGEXP löst Whitespace-Problem: '^A61B\s*5/' matcht "A61B 5/", "A61B  5/", "A61B5/"

---
### QUERY A: Trend nach AI-Klasse und Diagnostik-Subklasse (FUNKTIONIERT ✅)
Ergebnis: 20 Zeilen, G06N zeigt echten AI-Trend (15→56 bei A61B 5/, 2015-2018)
G16H dominiert volumenmäßig (265→351), ist aber breiter gefasst

In [6]:
from epo.tipdata.patstat import PatstatClient
import pandas as pd
import time

# Connect to PATSTAT
patstat = PatstatClient(env='PROD')

def timed_query(query):
    """Execute query and return DataFrame with timing."""
    start = time.time()
    res = patstat.sql_query(query, use_legacy_sql=False)
    print(f"Query took {time.time() - start:.2f}s ({len(res)} rows)")
    return pd.DataFrame(res)

# Run the query
df = timed_query("""
WITH ai_diag AS (
  SELECT DISTINCT
    a.appln_id,
    a.appln_filing_year,
    a.appln_auth,
    CASE
      WHEN EXISTS (
        SELECT 1 FROM tls209_appln_ipc x 
        WHERE x.appln_id = a.appln_id 
          AND REGEXP_CONTAINS(x.ipc_class_symbol, r'^G06N')
      ) THEN 'G06N'
      ELSE 'G16H'
    END AS ai_class,
    CASE
      WHEN EXISTS (
        SELECT 1 FROM tls209_appln_ipc x 
        WHERE x.appln_id = a.appln_id 
          AND REGEXP_CONTAINS(x.ipc_class_symbol, r'^A61B\s*5/')
      ) THEN 'A61B 5/ (Messung/Diagnose)'
      WHEN EXISTS (
        SELECT 1 FROM tls209_appln_ipc x 
        WHERE x.appln_id = a.appln_id 
          AND REGEXP_CONTAINS(x.ipc_class_symbol, r'^A61B\s*6/')
      ) THEN 'A61B 6/ (Bildgebung)'
      WHEN EXISTS (
        SELECT 1 FROM tls209_appln_ipc x 
        WHERE x.appln_id = a.appln_id 
          AND REGEXP_CONTAINS(x.ipc_class_symbol, r'^A61B\s*8/')
      ) THEN 'A61B 8/ (Ultraschall)'
    END AS diag_class
  FROM tls201_appln a
  WHERE a.appln_auth IN ('DE', 'EP')
    AND a.appln_filing_year >= 2015
    AND (
      EXISTS (SELECT 1 FROM tls209_appln_ipc i1 
              WHERE i1.appln_id = a.appln_id 
                AND REGEXP_CONTAINS(i1.ipc_class_symbol, r'^G06N'))
      OR EXISTS (SELECT 1 FROM tls209_appln_ipc i2 
                 WHERE i2.appln_id = a.appln_id 
                   AND REGEXP_CONTAINS(i2.ipc_class_symbol, r'^G16H'))
    )
    AND EXISTS (
      SELECT 1 FROM tls209_appln_ipc i3 
      WHERE i3.appln_id = a.appln_id 
        AND REGEXP_CONTAINS(i3.ipc_class_symbol, r'^A61B\s*(5|6|8)/')
    )
    AND NOT EXISTS (
      SELECT 1 FROM tls209_appln_ipc ex 
      WHERE ex.appln_id = a.appln_id 
        AND REGEXP_CONTAINS(ex.ipc_class_symbol, r'^A61B\s*34/')
    )
    AND NOT EXISTS (
      SELECT 1 FROM tls209_appln_ipc ex2 
      WHERE ex2.appln_id = a.appln_id 
        AND REGEXP_CONTAINS(ex2.ipc_class_symbol, r'^G01N')
    )
    AND NOT EXISTS (
      SELECT 1 FROM tls209_appln_ipc ex3 
      WHERE ex3.appln_id = a.appln_id 
        AND REGEXP_CONTAINS(ex3.ipc_class_symbol, r'^A61B\s*1/')
    )
)

SELECT 
  appln_filing_year,
  ai_class,
  diag_class,
  COUNT(DISTINCT appln_id) AS applications
FROM ai_diag
WHERE diag_class IS NOT NULL
GROUP BY appln_filing_year, ai_class, diag_class
ORDER BY appln_filing_year, ai_class, diag_class;
""")

# Display results
df
# df.to_csv('ki_diagnostik_query_a.csv', index=False)


  AND REGEXP_CONTAINS(x.ipc_class_symbol, r'^A61B\s*5/')


Query took 0.31s (59 rows)


Unnamed: 0,appln_filing_year,ai_class,diag_class,applications
0,2015,G06N,A61B 5/ (Messung/Diagnose),15
1,2015,G16H,A61B 5/ (Messung/Diagnose),265
2,2015,G16H,A61B 6/ (Bildgebung),21
3,2015,G16H,A61B 8/ (Ultraschall),17
4,2016,G06N,A61B 5/ (Messung/Diagnose),12
5,2016,G06N,A61B 8/ (Ultraschall),1
6,2016,G16H,A61B 5/ (Messung/Diagnose),283
7,2016,G16H,A61B 6/ (Bildgebung),21
8,2016,G16H,A61B 8/ (Ultraschall),18
9,2017,G06N,A61B 5/ (Messung/Diagnose),40


In [4]:
from epo.tipdata.patstat import PatstatClient
import pandas as pd
import time

# Connect to PATSTAT
patstat = PatstatClient(env='PROD')

def timed_query(query):
    """Execute query and return DataFrame with timing."""
    start = time.time()
    res = patstat.sql_query(query, use_legacy_sql=False)
    print(f"Query took {time.time() - start:.2f}s ({len(res)} rows)")
    return pd.DataFrame(res)

# Run the query
df = timed_query("""
WITH ai_diag AS (
  SELECT DISTINCT
    a.appln_id,
    a.appln_filing_year,
    a.appln_auth,
    CASE
      WHEN EXISTS (
        SELECT 1 FROM tls209_appln_ipc x 
        WHERE x.appln_id = a.appln_id 
          AND REGEXP_CONTAINS(x.ipc_class_symbol, r'^G06N')
      ) THEN 'G06N'
      ELSE 'G16H'
    END AS ai_class,
    CASE
      WHEN EXISTS (
        SELECT 1 FROM tls209_appln_ipc x 
        WHERE x.appln_id = a.appln_id 
          AND REGEXP_CONTAINS(x.ipc_class_symbol, r'^A61B\s*5/')
      ) THEN 'A61B 5/'
      WHEN EXISTS (
        SELECT 1 FROM tls209_appln_ipc x 
        WHERE x.appln_id = a.appln_id 
          AND REGEXP_CONTAINS(x.ipc_class_symbol, r'^A61B\s*6/')
      ) THEN 'A61B 6/'
      WHEN EXISTS (
        SELECT 1 FROM tls209_appln_ipc x 
        WHERE x.appln_id = a.appln_id 
          AND REGEXP_CONTAINS(x.ipc_class_symbol, r'^A61B\s*8/')
      ) THEN 'A61B 8/'
    END AS diag_class
  FROM tls201_appln a
  WHERE a.appln_auth IN ('DE', 'EP')
    AND a.appln_filing_year >= 2015
    AND (
      EXISTS (SELECT 1 FROM tls209_appln_ipc i1 
              WHERE i1.appln_id = a.appln_id 
                AND REGEXP_CONTAINS(i1.ipc_class_symbol, r'^G06N'))
      OR EXISTS (SELECT 1 FROM tls209_appln_ipc i2 
                 WHERE i2.appln_id = a.appln_id 
                   AND REGEXP_CONTAINS(i2.ipc_class_symbol, r'^G16H'))
    )
    AND EXISTS (
      SELECT 1 FROM tls209_appln_ipc i3 
      WHERE i3.appln_id = a.appln_id 
        AND REGEXP_CONTAINS(i3.ipc_class_symbol, r'^A61B\s*(5|6|8)/')
    )
    AND NOT EXISTS (
      SELECT 1 FROM tls209_appln_ipc ex 
      WHERE ex.appln_id = a.appln_id 
        AND REGEXP_CONTAINS(ex.ipc_class_symbol, r'^A61B\s*34/')
    )
    AND NOT EXISTS (
      SELECT 1 FROM tls209_appln_ipc ex2 
      WHERE ex2.appln_id = a.appln_id 
        AND REGEXP_CONTAINS(ex2.ipc_class_symbol, r'^G01N')
    )
    AND NOT EXISTS (
      SELECT 1 FROM tls209_appln_ipc ex3 
      WHERE ex3.appln_id = a.appln_id 
        AND REGEXP_CONTAINS(ex3.ipc_class_symbol, r'^A61B\s*1/')
    )
),

applicants AS (
  SELECT
    ad.appln_id,
    ad.appln_filing_year,
    ad.ai_class,
    ad.diag_class,
    COALESCE(p.han_name, p.person_name) AS applicant_name,
    p.person_ctry_code,
    p.psn_sector
  FROM ai_diag ad
  JOIN tls207_pers_appln pa ON ad.appln_id = pa.appln_id
  JOIN tls206_person p ON pa.person_id = p.person_id
  WHERE pa.applt_seq_nr > 0
    AND ad.diag_class IS NOT NULL
)

SELECT
  ai_class,
  diag_class,
  applicant_name,
  person_ctry_code AS country,
  psn_sector AS sector,
  COUNT(DISTINCT appln_id) AS patent_count,
  MIN(appln_filing_year) AS first_year,
  MAX(appln_filing_year) AS last_year
FROM applicants
GROUP BY ai_class, diag_class, applicant_name, person_ctry_code, psn_sector
HAVING COUNT(DISTINCT appln_id) >= 3
ORDER BY ai_class, diag_class, patent_count DESC;
""")

# Display results
df
# df.to_csv('ki_diagnostik_query_b.csv', index=False)


  AND REGEXP_CONTAINS(x.ipc_class_symbol, r'^A61B\s*5/')


Query took 0.37s (331 rows)


### QUERY B: Top-Anmelder nach AI-Klasse und Diagnostik-Subklasse

Erweiterung: Wer sind die Spieler? han_name für harmonisierte Namen.
applt_seq_nr > 0 = nur Anmelder (keine Erfinder)

In [2]:
from epo.tipdata.patstat import PatstatClient
import pandas as pd
import time

# Connect to PATSTAT
patstat = PatstatClient(env='PROD')

def timed_query(query):
    """Execute query and return DataFrame with timing."""
    start = time.time()
    res = patstat.sql_query(query, use_legacy_sql=False)
    print(f"Query took {time.time() - start:.2f}s ({len(res)} rows)")
    return pd.DataFrame(res)

# Run the query
df = timed_query("""
WITH ai_diag AS (
  SELECT DISTINCT
    a.appln_id,
    a.appln_filing_year,
    a.appln_auth,
    CASE
      WHEN EXISTS (
        SELECT 1 FROM tls209_appln_ipc x 
        WHERE x.appln_id = a.appln_id 
          AND REGEXP_CONTAINS(x.ipc_class_symbol, r'^G06N')
      ) THEN 'G06N'
      ELSE 'G16H'
    END AS ai_class,
    CASE
      WHEN EXISTS (
        SELECT 1 FROM tls209_appln_ipc x 
        WHERE x.appln_id = a.appln_id 
          AND REGEXP_CONTAINS(x.ipc_class_symbol, r'^A61B\s*5/')
      ) THEN 'A61B 5/'
      WHEN EXISTS (
        SELECT 1 FROM tls209_appln_ipc x 
        WHERE x.appln_id = a.appln_id 
          AND REGEXP_CONTAINS(x.ipc_class_symbol, r'^A61B\s*6/')
      ) THEN 'A61B 6/'
      WHEN EXISTS (
        SELECT 1 FROM tls209_appln_ipc x 
        WHERE x.appln_id = a.appln_id 
          AND REGEXP_CONTAINS(x.ipc_class_symbol, r'^A61B\s*8/')
      ) THEN 'A61B 8/'
    END AS diag_class
  FROM tls201_appln a
  WHERE a.appln_auth IN ('DE', 'EP')
    AND a.appln_filing_year >= 2015
    AND (
      EXISTS (SELECT 1 FROM tls209_appln_ipc i1 
              WHERE i1.appln_id = a.appln_id 
                AND REGEXP_CONTAINS(i1.ipc_class_symbol, r'^G06N'))
      OR EXISTS (SELECT 1 FROM tls209_appln_ipc i2 
                 WHERE i2.appln_id = a.appln_id 
                   AND REGEXP_CONTAINS(i2.ipc_class_symbol, r'^G16H'))
    )
    AND EXISTS (
      SELECT 1 FROM tls209_appln_ipc i3 
      WHERE i3.appln_id = a.appln_id 
        AND REGEXP_CONTAINS(i3.ipc_class_symbol, r'^A61B\s*(5|6|8)/')
    )
    AND NOT EXISTS (
      SELECT 1 FROM tls209_appln_ipc ex 
      WHERE ex.appln_id = a.appln_id 
        AND REGEXP_CONTAINS(ex.ipc_class_symbol, r'^A61B\s*34/')
    )
    AND NOT EXISTS (
      SELECT 1 FROM tls209_appln_ipc ex2 
      WHERE ex2.appln_id = a.appln_id 
        AND REGEXP_CONTAINS(ex2.ipc_class_symbol, r'^G01N')
    )
    AND NOT EXISTS (
      SELECT 1 FROM tls209_appln_ipc ex3 
      WHERE ex3.appln_id = a.appln_id 
        AND REGEXP_CONTAINS(ex3.ipc_class_symbol, r'^A61B\s*1/')
    )
),

applicants AS (
  SELECT
    ad.appln_id,
    ad.appln_filing_year,
    ad.ai_class,
    ad.diag_class,
    COALESCE(p.han_name, p.person_name) AS applicant_name,
    p.person_ctry_code,
    p.psn_sector
  FROM ai_diag ad
  JOIN tls207_pers_appln pa ON ad.appln_id = pa.appln_id
  JOIN tls206_person p ON pa.person_id = p.person_id
  WHERE pa.applt_seq_nr > 0
    AND ad.diag_class IS NOT NULL
)

SELECT
  ai_class,
  diag_class,
  applicant_name,
  person_ctry_code AS country,
  psn_sector AS sector,
  COUNT(DISTINCT appln_id) AS patent_count,
  MIN(appln_filing_year) AS first_year,
  MAX(appln_filing_year) AS last_year
FROM applicants
GROUP BY ai_class, diag_class, applicant_name, person_ctry_code, psn_sector
HAVING COUNT(DISTINCT appln_id) >= 3
ORDER BY ai_class, diag_class, patent_count DESC;
""")

# Display results
df
# df.to_csv('ki_diagnostik_query_b.csv', index=False)

  AND REGEXP_CONTAINS(x.ipc_class_symbol, r'^A61B\s*5/')


Query took 0.41s (331 rows)


Unnamed: 0,ai_class,diag_class,applicant_name,country,sector,patent_count,first_year,last_year
0,G06N,A61B 5/,KON PHILIPS ELECT NV,NL,COMPANY,52,2016,2024
1,G06N,A61B 5/,SIEMENS HEALTHCARE GMBH,DE,COMPANY,33,2015,2022
2,G06N,A61B 5/,BAYER AG,DE,COMPANY,14,2020,2024
3,G06N,A61B 5/,SAMSUNG ELECT CO LTD,KR,COMPANY,14,2018,2023
4,G06N,A61B 5/,SIEMENS HEALTHINEERS AG,DE,UNKNOWN,11,2018,2024
...,...,...,...,...,...,...,...,...
326,G16H,A61B 8/,FUJIFILM SONOSITE INC,US,COMPANY,5,2016,2019
327,G16H,A61B 8/,BUTTERFLY NETWORK INC,US,COMPANY,5,2018,2020
328,G16H,A61B 8/,BFLY OPERATIONS INC,US,UNKNOWN,4,2020,2023
329,G16H,A61B 8/,SAMSUNG ELECT CO LTD,KR,COMPANY,4,2015,2015


### QUERY C: Top-20 Gesamtranking (über alle Klassen)
Kompakte Übersicht: Wer hat die meisten KI-Diagnostik-Patente insgesamt?


In [3]:
from epo.tipdata.patstat import PatstatClient
import pandas as pd
import time

# Connect to PATSTAT
patstat = PatstatClient(env='PROD')

def timed_query(query):
    """Execute query and return DataFrame with timing."""
    start = time.time()
    res = patstat.sql_query(query, use_legacy_sql=False)
    print(f"Query took {time.time() - start:.2f}s ({len(res)} rows)")
    return pd.DataFrame(res)

# Run the query
df = timed_query("""
WITH ai_diag AS (
  SELECT DISTINCT
    a.appln_id,
    a.appln_filing_year
  FROM tls201_appln a
  WHERE a.appln_auth IN ('DE', 'EP')
    AND a.appln_filing_year >= 2015
    AND (
      EXISTS (SELECT 1 FROM tls209_appln_ipc i1 
              WHERE i1.appln_id = a.appln_id 
                AND REGEXP_CONTAINS(i1.ipc_class_symbol, r'^G06N'))
      OR EXISTS (SELECT 1 FROM tls209_appln_ipc i2 
                 WHERE i2.appln_id = a.appln_id 
                   AND REGEXP_CONTAINS(i2.ipc_class_symbol, r'^G16H'))
    )
    AND EXISTS (
      SELECT 1 FROM tls209_appln_ipc i3 
      WHERE i3.appln_id = a.appln_id 
        AND REGEXP_CONTAINS(i3.ipc_class_symbol, r'^A61B\s*(5|6|8)/')
    )
    AND NOT EXISTS (
      SELECT 1 FROM tls209_appln_ipc ex 
      WHERE ex.appln_id = a.appln_id 
        AND REGEXP_CONTAINS(ex.ipc_class_symbol, r'^A61B\s*34/')
    )
    AND NOT EXISTS (
      SELECT 1 FROM tls209_appln_ipc ex2 
      WHERE ex2.appln_id = a.appln_id 
        AND REGEXP_CONTAINS(ex2.ipc_class_symbol, r'^G01N')
    )
    AND NOT EXISTS (
      SELECT 1 FROM tls209_appln_ipc ex3 
      WHERE ex3.appln_id = a.appln_id 
        AND REGEXP_CONTAINS(ex3.ipc_class_symbol, r'^A61B\s*1/')
    )
)

SELECT
  COALESCE(p.han_name, p.person_name) AS applicant_name,
  p.person_ctry_code AS country,
  p.psn_sector AS sector,
  COUNT(DISTINCT ad.appln_id) AS total_patents,
  MIN(ad.appln_filing_year) AS first_year,
  MAX(ad.appln_filing_year) AS last_year,
  MAX(ad.appln_filing_year) - MIN(ad.appln_filing_year) AS active_years
FROM ai_diag ad
JOIN tls207_pers_appln pa ON ad.appln_id = pa.appln_id
JOIN tls206_person p ON pa.person_id = p.person_id
WHERE pa.applt_seq_nr > 0
GROUP BY COALESCE(p.han_name, p.person_name), p.person_ctry_code, p.psn_sector
HAVING COUNT(DISTINCT ad.appln_id) >= 5
ORDER BY total_patents DESC
LIMIT 20;
""")

# Display results
df
# df.to_csv('ki_diagnostik_query_c.csv', index=False)

  AND REGEXP_CONTAINS(i3.ipc_class_symbol, r'^A61B\s*(5|6|8)/')


Query took 0.34s (20 rows)
