In [3]:
from epo.tipdata.patstat import PatstatClient
patstat = PatstatClient()
db = patstat.orm()

This client instance is currently configured to use a test dataset with reduced number of publications (~10K).
Use PatstatClient(env='PROD') to use the complete PATSTAT dataset (>140M publications).



In [4]:
query="""SELECT
    EXTRACT(YEAR FROM t1.APPLN_FILING_DATE) AS filing_year,
    COUNT(DISTINCT t1.APPLN_ID) AS patent_count
FROM TLS201_APPLN AS t1
JOIN TLS209_APPLN_IPC AS t2
    ON t1.APPLN_ID = t2.APPLN_ID
WHERE
    t2.IPC_CLASS_SYMBOL LIKE 'H02S%'
    AND t1.APPLN_FILING_DATE >= DATE '2000-01-01'
GROUP BY
    EXTRACT(YEAR FROM t1.APPLN_FILING_DATE)
ORDER BY
    filing_year"""
query = """
SELECT
    EXTRACT(YEAR FROM a.appln_filing_date) AS filing_year,
    COUNT(DISTINCT a.appln_id) AS patent_count
FROM tls201_appln a
JOIN tls209_appln_ipc i
  ON a.appln_id = i.appln_id
WHERE
    i.ipc_class_symbol LIKE 'H02S%'
    AND a.appln_filing_date >= DATE '2000-01-01'
GROUP BY
    filing_year
ORDER BY
    filing_year
"""


In [5]:
result = patstat.sql_query(query, use_legacy_sql=False)

In [6]:
import pandas as pd
from sqlalchemy import text
from epo.tipdata.patstat import PatstatClient
patstat = PatstatClient()
db = patstat.orm()

query = """
SELECT
    EXTRACT(YEAR FROM a.appln_filing_date) AS filing_year,
    COUNT(DISTINCT a.appln_id) AS patent_count
FROM tls201_appln a
JOIN tls209_appln_ipc i
  ON a.appln_id = i.appln_id
WHERE
    i.ipc_class_symbol LIKE 'H02S%'
    AND a.appln_filing_date >= DATE '2000-01-01'
GROUP BY
    filing_year
ORDER BY
    filing_year
"""
 
result = db.execute(text(query))
df = pd.DataFrame(result.fetchall(), columns=result.keys())
 
df.head()

This client instance is currently configured to use a test dataset with reduced number of publications (~10K).
Use PatstatClient(env='PROD') to use the complete PATSTAT dataset (>140M publications).



Unnamed: 0,filing_year,patent_count
0,2000,5
1,2001,7
2,2002,5
3,2003,5
4,2004,6


In [7]:
len(df)

25

In [None]:
CEU_sqls =[{"query":"""SELECT
    EXTRACT(YEAR FROM t1.appln_filing_date) AS filing_year,
    COUNT(
        DISTINCT CASE
            WHEN t2.ipc_class_symbol LIKE 'H02S%' THEN t1.appln_id
        END
    ) AS h02s_count,
    COUNT(
        DISTINCT CASE
            WHEN t2.ipc_class_symbol LIKE 'H10F%' THEN t1.appln_id
        END
    ) AS h10f_count,
    COUNT(
        DISTINCT CASE
            WHEN t2.ipc_class_symbol LIKE 'H01L  31%' THEN t1.appln_id
        END
    ) AS h01l31_count
FROM tls201_appln AS t1
JOIN tls209_appln_ipc AS t2
    ON t1.appln_id = t2.appln_id
WHERE
    t1.appln_filing_date >= DATE '2000-01-01'
    AND (
        t2.ipc_class_symbol LIKE 'H02S%'
        OR t2.ipc_class_symbol LIKE 'H10F%'
        OR t2.ipc_class_symbol LIKE 'H01L  31%'
    )
GROUP BY
    EXTRACT(YEAR FROM t1.appln_filing_date)
ORDER BY
    filing_year;
""", "name":"comparison of PV installation mechanics vs tech in semiconductors","description":"this compares the patstat data classified under H02S which is the part of PV tech dealing with mechanics of installations vs the technology in semiconductors technology itself"},
          ]

q={"query":"""SELECT
    EXTRACT(YEAR FROM t1.appln_filing_date) AS filing_year,
    COUNT(
        DISTINCT CASE
            WHEN t2.ipc_class_symbol LIKE 'H01L  31%' THEN t1.appln_id
        END
    ) AS h01l31_count,
    COUNT(
        DISTINCT CASE
            WHEN t2.ipc_class_symbol LIKE 'H10K  30%' THEN t1.appln_id
        END
    ) AS h10k30_count
FROM tls201_appln AS t1
JOIN tls209_appln_ipc AS t2
    ON t1.appln_id = t2.appln_id
WHERE
    t1.appln_filing_date >= DATE '2000-01-01'
    AND (
        t2.ipc_class_symbol LIKE 'H01L  31%'
        OR t2.ipc_class_symbol LIKE 'H10K  30%'
    )
GROUP BY
    EXTRACT(YEAR FROM t1.appln_filing_date)
ORDER BY
    filing_year;
""", "name":"comparing pv based on semiconductors technology vs organic based materials","description":"This data compares the applciations of pv modules based on inorganic materials, i.e. clasical semiconductors materials, versus ornaic based materials"}
CEU_sqls.append(q)

In [21]:
CEU_sqls

[{'query': "SELECT\n    EXTRACT(YEAR FROM t1.appln_filing_date) AS filing_year,\n    COUNT(\n        DISTINCT CASE\n            WHEN t2.ipc_class_symbol LIKE 'H02S%' THEN t1.appln_id\n        END\n    ) AS h02s_count,\n    COUNT(\n        DISTINCT CASE\n            WHEN t2.ipc_class_symbol LIKE 'H10F%' THEN t1.appln_id\n        END\n    ) AS h10f_count,\n    COUNT(\n        DISTINCT CASE\n            WHEN t2.ipc_class_symbol LIKE 'H01L  31%' THEN t1.appln_id\n        END\n    ) AS h01l31_count\nFROM tls201_appln AS t1\nJOIN tls209_appln_ipc AS t2\n    ON t1.appln_id = t2.appln_id\nWHERE\n    t1.appln_filing_date >= DATE '2000-01-01'\n    AND (\n        t2.ipc_class_symbol LIKE 'H02S%'\n        OR t2.ipc_class_symbol LIKE 'H10F%'\n        OR t2.ipc_class_symbol LIKE 'H01L  31%'\n    )\nGROUP BY\n    EXTRACT(YEAR FROM t1.appln_filing_date)\nORDER BY\n    filing_year;\n",
  'name': 'comparison of PV installation mechanics vs tech in semiconductors',
  'description': 'this compares the pat

In [22]:
import json
with open("queries.json", "w", encoding="utf-8") as f:
    json.dump(CEU_sqls, f, ensure_ascii=False, indent=2)
    

In [26]:
with open("queries.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# `data` is now a Python list of dicts again
print(data[0]["query"])

SELECT
    EXTRACT(YEAR FROM t1.appln_filing_date) AS filing_year,
    COUNT(
        DISTINCT CASE
            WHEN t2.ipc_class_symbol LIKE 'H02S%' THEN t1.appln_id
        END
    ) AS h02s_count,
    COUNT(
        DISTINCT CASE
            WHEN t2.ipc_class_symbol LIKE 'H10F%' THEN t1.appln_id
        END
    ) AS h10f_count,
    COUNT(
        DISTINCT CASE
            WHEN t2.ipc_class_symbol LIKE 'H01L  31%' THEN t1.appln_id
        END
    ) AS h01l31_count
FROM tls201_appln AS t1
JOIN tls209_appln_ipc AS t2
    ON t1.appln_id = t2.appln_id
WHERE
    t1.appln_filing_date >= DATE '2000-01-01'
    AND (
        t2.ipc_class_symbol LIKE 'H02S%'
        OR t2.ipc_class_symbol LIKE 'H10F%'
        OR t2.ipc_class_symbol LIKE 'H01L  31%'
    )
GROUP BY
    EXTRACT(YEAR FROM t1.appln_filing_date)
ORDER BY
    filing_year;



In [9]:
q="""SELECT
    EXTRACT(YEAR FROM t1.appln_filing_date) AS filing_year,
    COUNT(
        DISTINCT CASE
            WHEN t2.ipc_class_symbol LIKE 'H02S%' THEN t1.appln_id
        END
    ) AS h02s_count,
    COUNT(
        DISTINCT CASE
            WHEN t2.ipc_class_symbol LIKE 'H10F%' THEN t1.appln_id
        END
    ) AS h10f_count,
    COUNT(
        DISTINCT CASE
            WHEN t2.ipc_class_symbol LIKE 'H01L  31%' THEN t1.appln_id
        END
    ) AS h01l31_count
FROM tls201_appln AS t1
JOIN tls209_appln_ipc AS t2
    ON t1.appln_id = t2.appln_id
WHERE
    t1.appln_filing_date >= DATE '2000-01-01'
    AND (
        t2.ipc_class_symbol LIKE 'H02S%'
        OR t2.ipc_class_symbol LIKE 'H10F%'
        OR t2.ipc_class_symbol LIKE 'H01L  31%'
    )
GROUP BY
    EXTRACT(YEAR FROM t1.appln_filing_date)
ORDER BY
    filing_year;
"""

In [10]:
CEU_sqls.append(q)

In [None]:
query = q#CEU_sqls[1]['query']

In [28]:
query = """SELECT
    EXTRACT(YEAR FROM t1.appln_filing_date) AS filing_year,
    COUNT(
        DISTINCT CASE
            WHEN t2.ipc_class_symbol LIKE 'H02S%' THEN t1.appln_id
        END
    ) AS h02s_count,
    COUNT(
        DISTINCT CASE
            WHEN t2.ipc_class_symbol LIKE 'H10F%' THEN t1.appln_id
        END
    ) AS h10f_count,
    COUNT(
        DISTINCT CASE
            WHEN t2.ipc_class_symbol LIKE 'H01L  31%' THEN t1.appln_id
        END
    ) AS h01l31_count
FROM tls201_appln AS t1
JOIN tls209_appln_ipc AS t2
    ON t1.appln_id = t2.appln_id
WHERE
    t1.appln_filing_date >= DATE '2000-01-01'
    AND (
        t2.ipc_class_symbol LIKE 'H02S%'
        OR t2.ipc_class_symbol LIKE 'H10F%'
        OR t2.ipc_class_symbol LIKE 'H01L  31%'
    )
GROUP BY
    EXTRACT(YEAR FROM t1.appln_filing_date)
ORDER BY
    filing_year;
"""
result = db.execute(text(query))
df = pd.DataFrame(result.fetchall(), columns=result.keys())
 
df.head()

Unnamed: 0,filing_year,h02s_count,h10f_count,h01l31_count
0,2000,5,0,28
1,2001,7,0,51
2,2002,5,0,39
3,2003,5,0,52
4,2004,6,0,51
