In [2]:
import pandas as pd
import ast
import re
from tqdm import tqdm
tqdm.pandas()

# EPO

In [3]:
# Load Data
df_yake_claims_ep = pd.read_json('/mnt/hdd01/PATSTAT Working Directory/PATSTAT/cleantech_epo_text_data_pivot_cleaned_yake.json')

In [4]:
# Iterate over rows in dataframe
for index, row in tqdm(df_yake_claims_ep.iterrows()):
    # Cast row 'keywords_yake_claims' column to string
    row['keywords_yake_claims'] = str(row['keywords_yake_claims']).lower()
    # Check if keywords_yake_claims column starts with "[[[" and ends with "]]]""
    if row['keywords_yake_claims'].startswith('[[[') and row['keywords_yake_claims'].endswith(']]]'):
        # Remove first "[" and last "]" from keywords_yake_claims column
        row['keywords_yake_claims'] = row['keywords_yake_claims'][1:-1]
    # Cast row 'keywords_yake_claims' column to list
    row['keywords_yake_claims'] = ast.literal_eval(row['keywords_yake_claims'])
    # Assign modified 'keywords_yake_claims' list to temporary variable
    keywords_yake_temp = row['keywords_yake_claims']
    # Assign temporary variable to 'keywords_yake_claims' column
    df_yake_claims_ep.at[index, 'keywords_yake_claims'] = keywords_yake_temp

182369it [00:24, 7384.10it/s]


In [5]:
def parse_strings(s):
    # Check if the string starts with [' and ends with ']
    if s.startswith("['") and s.endswith("']"):
        # Use a regular expression to find all sequences of characters enclosed in single or double quotes
        return re.findall(r"['\"]([^'\"]*)['\"]", s)
    else:
        # Split the string by commas
        return s.split(', ')
    
# Apply parse_strings function to 'cpc_class_symbol' column
df_yake_claims_ep['cpc_class_symbol'] = df_yake_claims_ep['cpc_class_symbol'].progress_apply(parse_strings)

100%|██████████| 182369/182369 [00:00<00:00, 676120.22it/s]


In [7]:
keywords_list_ep = []
yake_conf_score_list = []
publn_nr_list = []
cpc_symbol_list = []
min_yake_conf = 0.05

# Iterate over rows in dataframe
for index, row in tqdm(df_yake_claims_ep.iterrows()):
    # Check if 'keywords_yake_claims' column is not a list
    if not isinstance(row['keywords_yake_claims'], list):
        continue
    # Check if 'keywords_yake_claims' column is an empty list or contains only empty lists
    if not any(keyword for keyword in row['keywords_yake_claims']):
        continue
    # Iterate over keywords in 'keywords_yake_claims' column and append to keywords_list_ep
    else:
        for keyword in row['keywords_yake_claims']:
            if keyword[1] <= min_yake_conf:
                keywords_list_ep.append(keyword[0])
                yake_conf_score_list.append(keyword[1])
                publn_nr_list.append(row['publn_nr'])
                cpc_symbol_list.append(row['cpc_class_symbol'])

# Create new dataframe
df_keywords_list_ep = pd.DataFrame({
    'keyword_yake': keywords_list_ep,
    'yake_conf_score': yake_conf_score_list,
    'publn_nr': publn_nr_list,
    'cpc_class_symbol': cpc_symbol_list,
    'abs_frequency': 1
})

182369it [00:09, 18555.05it/s]


In [10]:
# Filter out non-alphanumeric keywords
df_keywords_list_ep = df_keywords_list_ep[
    df_keywords_list_ep['keyword_yake'].progress_apply(lambda x: all(word.isalnum() for word in x.split()))
]

# Filter out all keywords shorter than 3 characters
df_keywords_list_ep = df_keywords_list_ep[
    df_keywords_list_ep['keyword_yake'].progress_apply(lambda x: len(x) > 2)
]

# Define a function to check if a string is an abbreviation
def is_abbreviation(keyword):
    # Regular expression to identify abbreviations (typically all uppercase and periods)
    # and check for all-uppercase abbreviations with 3 or fewer characters
    pattern = re.compile(r'\b(?:[A-Z]{1,}\.){2,}\b|\b[A-Z]{1,3}\b')
    return pattern.match(keyword) is not None

# Apply the function to filter out abbreviations
df_keywords_list_ep = df_keywords_list_ep[
    df_keywords_list_ep['keyword_yake'].progress_apply(lambda x: not is_abbreviation(x))
]

100%|██████████| 1427209/1427209 [00:01<00:00, 1162132.13it/s]


In [11]:
# Lemmatize keywords
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

def lemmatize_keywords(keyword):
    return ' '.join([lemmatizer.lemmatize(word) for word in keyword.split()])

df_keywords_list_ep['keyword_yake_lemma'] = df_keywords_list_ep['keyword_yake'].progress_apply(lemmatize_keywords)

100%|██████████| 1427209/1427209 [00:08<00:00, 162644.80it/s]


In [None]:
stopwords = set(nltk.corpus.stopwords.words('english'))

# Function to remove keywords that are only one stopword or start/end with a stopword
def remove_stopwords(keyword):
    words = keyword.split()
    
    # If the keyword is a single stopword, remove it
    if len(words) == 1 and words[0] in stopwords:
        return ''
    
    # If the keyword starts or ends with a stopword, remove line
    if words[0] in stopwords:
        return ''
    if words and words[-1] in stopwords:
        return ''
    
    return ' '.join(words)

# Apply the function to remove stopwords
df_keywords_list_ep['keyword_yake'] = df_keywords_list_ep['keyword_yake'].progress_apply(remove_stopwords)

# Remove empty keywords
df_keywords_list_ep = df_keywords_list_ep[
    df_keywords_list_ep['keyword_yake'].progress_apply(lambda x: len(x) > 0)
]

In [None]:
# Aggregate df_keywords_list_ep by 'keyword'
df_keywords_list_ep_agg = df_keywords_list_ep.groupby(['keyword_yake']).agg({
    'yake_conf_score': 'mean',
    'publn_nr': list,
    'cpc_class_symbol': list,
    'abs_frequency': 'count'
}).reset_index()

# Flatten nested lists in 'cpc_class_symbol' column
df_keywords_list_ep_agg['cpc_class_symbol'] = df_keywords_list_ep_agg['cpc_class_symbol'].progress_apply(lambda x: [item for sublist in x for item in sublist])

In [None]:
df_keywords_list_ep_agg.to_json('/mnt/hdd01/patentsview/Similarity Search - CPC Classification and Claims/epo_yake_keywords_list.json', orient='records')

# USPTO

In [None]:
# Load Data
df_yake_claims_uspto = pd.read_json('/mnt/hdd01/patentsview/Patentsview - Cleantech Patents/Cleantech Concepts/Yake/g_patent_claims_cleantech_yake.json')

In [None]:
# Iterate over rows in dataframe
for index, row in tqdm(df_yake_claims_uspto.iterrows()):
    # Cast row 'keywords_yake' column to string
    row['keywords_yake'] = str(row['keywords_yake']).lower()
    # Check if keywords_yake column starts with "[[[" and ends with "]]]""
    if row['keywords_yake'].startswith('[[[') and row['keywords_yake'].endswith(']]]'):
        # Remove first "[" and last "]" from keywords_yake column
        row['keywords_yake'] = row['keywords_yake'][1:-1]
    # Cast row 'keywords_yake' column to list
    row['keywords_yake'] = ast.literal_eval(row['keywords_yake'])
    # Assign modified 'keywords_yake' list to temporary variable
    keywords_yake_temp = row['keywords_yake']
    # Assign temporary variable to 'keywords_yake' column
    df_yake_claims_uspto.at[index, 'keywords_yake'] = keywords_yake_temp

In [None]:
keywords_list_uspto = []
yake_conf_score_list = []
patent_id_list = []
min_yake_conf = 0.05

# Iterate over rows in dataframe
for index, row in tqdm(df_yake_claims_uspto.iterrows()):
    # Check if 'keywords_yake' column is not a list
    if not isinstance(row['keywords_yake'], list):
        continue
    # Check if 'keywords_yake' column is an empty list or contains only empty lists
    if not any(keyword for keyword in row['keywords_yake']):
        continue
    # Iterate over keywords in 'keywords_yake' column and append to keywords_list_uspto
    else:
        for keyword in row['keywords_yake']:
            if keyword[1] <= min_yake_conf:
                keywords_list_uspto.append(keyword[0])
                yake_conf_score_list.append(keyword[1])
                patent_id_list.append(row['patent_id'])

# Create new dataframe
df_keywords_list_uspto = pd.DataFrame({
    'keyword_yake': keywords_list_uspto,
    'yake_conf_score': yake_conf_score_list,
    'patent_id': patent_id_list,
    'abs_frequency': 1
})

In [None]:
# Filter out non-alphanumeric keywords
df_keywords_list_uspto = df_keywords_list_uspto[
    df_keywords_list_uspto['keyword_yake'].progress_apply(lambda x: all(word.isalnum() for word in x.split()))
]

# Filter out all keywords shorter than 3 characters
df_keywords_list_uspto = df_keywords_list_uspto[
    df_keywords_list_uspto['keyword_yake'].progress_apply(lambda x: len(x) > 2)
]

# Define a function to check if a string is an abbreviation
def is_abbreviation(keyword):
    # Regular expression to identify abbreviations (typically all uppercase and periods)
    # and check for all-uppercase abbreviations with 3 or fewer characters
    pattern = re.compile(r'\b(?:[A-Z]{1,}\.){2,}\b|\b[A-Z]{1,3}\b')
    return pattern.match(keyword) is not None

# Apply the function to filter out abbreviations
df_keywords_list_uspto = df_keywords_list_uspto[
    df_keywords_list_uspto['keyword_yake'].progress_apply(lambda x: not is_abbreviation(x))
]

In [None]:
df_yake_claims_uspto['keywords_yake_exploded'] = ''
# Iterate over rows in dataframe
for index,row in tqdm(df_yake_claims_uspto.iterrows()):
    # Check if 'keywords_yake' column is not a list
    if not isinstance(row['keywords_yake'], list):
        continue
    # Check if 'keywords_yake' column is an empty list or contains only empty lists
    if not any(keyword for keyword in row['keywords_yake']):
        continue
    # Iterate over keywords in 'keywords_yake' column and append to 'keywords_yake_exploded' column
    else:
        for keyword in row['keywords_yake']:
            if keyword[1] <= min_yake_conf:
                df_yake_claims_uspto.at[index, 'keywords_yake_exploded'] += keyword[0] + ', '

# Cast 'keywords_yake_exploded' column to list
df_yake_claims_uspto['keywords_yake_exploded'] = df_yake_claims_uspto['keywords_yake_exploded'].str.split(', ')

# Delete last item in 'keywords_yake_exploded' cells if empty string
df_yake_claims_uspto['keywords_yake_exploded'] = df_yake_claims_uspto['keywords_yake_exploded'].progress_apply(lambda x: x[:-1] if x and x[-1] == '' else x)

## Match Patents to CPC Classification

In [None]:
# PatentsView - Merge with CPC Classification
df_cpc_uspto = pd.read_json('/mnt/hdd01/patentsview/Patentsview - Cleantech Patents/df_patentsview_patent_cpc_grouped_cleantech.json')
# Merge df_yake_claims with df_cpc only keep 'cpc' from df_cpc
df_yake_claims_cpc_uspto = df_yake_claims_uspto.merge(df_cpc_uspto[['patent_id', 'cpc']], on='patent_id', how='left')

In [None]:
rows_list = []

# Iterate over rows in dataframe
for index, row in tqdm(df_yake_claims_cpc_uspto.iterrows()):
    if row['keywords_yake_exploded'] == []:
        continue
    if not any(keyword for keyword in row['keywords_yake_exploded']):
        continue
    # Iterate over keywords in 'keywords_yake' column and append patent_id and cpc information
    else:
        for keyword in row['keywords_yake_exploded']:
            # PatentsView
            rows_list.append({'patent_id': row['patent_id'], 
                              'cpc': row['cpc'], 
                              'keyword_yake': keyword})

df_cpc_keywords_uspto = pd.DataFrame(rows_list)

In [None]:
# Extract 'cpc_group' into a new column
df_cpc_keywords_uspto['cpc_group'] = df_cpc_keywords_uspto['cpc'].progress_apply(
    lambda x: [entry['cpc_group'] for entry in x.values() if 'cpc_group' in entry]
)

# Aggregate df_cpc_keywords on 'keyword_yake' column
df_cpc_keywords_uspto_agg = df_cpc_keywords_uspto.groupby('keyword_yake').agg(
    patent_id_list = pd.NamedAgg(column='patent_id', aggfunc=list),
    cpc_group_list = pd.NamedAgg(column='cpc_group', aggfunc=list)
)

# Define a function to flatten nested lists
def flatten_nested_list(nested_list):
    return [item for sublist in nested_list for item in sublist]

# Convert nested lists in 'cpc_subclass_list' and 'cpc_group_list' columns to lists of strings
df_cpc_keywords_uspto_agg[['cpc_group_list']] = df_cpc_keywords_uspto_agg[['cpc_group_list']].applymap(flatten_nested_list)

In [None]:
# Remove duplicates from 'cpc_group_list'
df_cpc_keywords_uspto_agg['cpc_group_list'] = df_cpc_keywords_uspto_agg['cpc_group_list'].progress_apply(lambda x: list(set(x)))

## Aggregate USPTO Keyword List and Merge with CPC Classification

In [None]:
# Aggregate df_keywords_list_ep by 'keyword'
df_keywords_list_uspto_agg = df_keywords_list_uspto.groupby(['keyword_yake']).agg({
    'yake_conf_score': 'mean',
    'patent_id': list,
    'abs_frequency': 'count'
}).reset_index()

In [None]:
# Merge df_cpc_keywords_uspto_agg with df_keywords_list_uspto on 'keyword_yake' column and generate new dataframe
df_keywords_list_uspto_agg = pd.merge(df_keywords_list_uspto_agg, df_cpc_keywords_uspto_agg, on='keyword_yake', how='left')

In [None]:
df_keywords_list_uspto_agg.to_json('/mnt/hdd01/patentsview/Similarity Search - CPC Classification and Claims/uspto_yake_keywords_list.json', orient='records')

# Reliance on Science - USPTO and EPO

In [None]:
# Load Data
df_rel_on_science_uspto = pd.read_json('/mnt/hdd01/patentsview/Reliance on Science - Cleantech Patents/df_oaid_Cleantech_Y02_individual_works_yake.json')
df_rel_on_science_ep = pd.read_json('/mnt/hdd01/PATSTAT Working Directory/Reliance on Science/cleantech_epo_rel_on_science_abstract_yake.json')

In [None]:
# Concatenate dataframes, reset index and drop duplicates
df_rel_on_science = pd.concat([df_rel_on_science_uspto, df_rel_on_science_ep], ignore_index=True)
df_rel_on_science = df_rel_on_science.drop_duplicates(subset=['oaid'], keep='first').reset_index(drop=True)

In [None]:
# If keywords_yake column is empty, assign keywords_yake_abstract column
df_rel_on_science['keywords_yake'] = df_rel_on_science.progress_apply(
    lambda row: row['keywords_yake_abstract'] if type(row['keywords_yake']) == float else row['keywords_yake'], axis=1
)

# Drop keywords_yake_abstract column
del df_rel_on_science['keywords_yake_abstract']

In [None]:
# Iterate over rows in dataframe
for index, row in tqdm(df_rel_on_science.iterrows()):
    try:
        # Cast row 'keywords_yake' column to string
        row['keywords_yake'] = str(row['keywords_yake']).lower()
        # Check if keywords_yake column starts with "[[[" and ends with "]]]""
        if row['keywords_yake'].startswith('[[[') and row['keywords_yake'].endswith(']]]'):
            # Remove first "[" and last "]" from keywords_yake_claims column
            row['keywords_yake'] = row['keywords_yake'][1:-1]
        # Cast row 'keywords_yake_claims' column to list
        row['keywords_yake'] = ast.literal_eval(row['keywords_yake'])
        # Assign modified 'keywords_yake_claims' list to temporary variable
        keywords_yake_temp = row['keywords_yake']
        # Assign temporary variable to 'keywords_yake_claims' column
        df_rel_on_science.at[index, 'keywords_yake'] = keywords_yake_temp
    except:
        print(index)
        print(row['keywords_yake'])

In [None]:
keywords_list_rel = []
yake_conf_score_list = []
oaid_list = []
min_yake_conf = 0.05

# Iterate over rows in dataframe
for index, row in tqdm(df_rel_on_science.iterrows()):
    # Check if 'keywords_yake' column is not a list
    if not isinstance(row['keywords_yake'], list):
        continue
    # Check if 'keywords_yake' column is an empty list or contains only empty lists
    if not any(keyword for keyword in row['keywords_yake']):
        continue
    # Iterate over keywords in 'keywords_yake' column and append to keywords_list_rel
    else:
        for keyword in row['keywords_yake']:
            if keyword[1] <= min_yake_conf:
                keywords_list_rel.append(keyword[0])
                yake_conf_score_list.append(keyword[1])
                oaid_list.append(row['oaid'])

# Create new dataframe
df_keywords_list_rel = pd.DataFrame({
    'keyword_yake': keywords_list_rel,
    'yake_conf_score': yake_conf_score_list,
    'oaid': oaid_list,
    'abs_frequency': 1
})

In [None]:
# Filter out non-alphanumeric keywords
df_keywords_list_rel = df_keywords_list_rel[
    df_keywords_list_rel['keyword_yake'].progress_apply(lambda x: all(word.isalnum() for word in x.split()))
]

In [None]:
# Aggregate df_keywords_list_ep by 'keyword'
df_keywords_list_rel_agg = df_keywords_list_rel.groupby(['keyword_yake']).agg({
    'yake_conf_score': 'mean',
    'oaid': list,
    'abs_frequency': 'count'
}).reset_index()

In [None]:
df_keywords_list_rel_agg.to_json('/mnt/hdd01/patentsview/Similarity Search - CPC Classification and Claims/rel_on_science_yake_keywords_list.json', orient='records')

# Merge EP, USPTO and Reliance on Science

In [2]:
# Load Data
df_keywords_list_uspto_agg = pd.read_json('/mnt/hdd01/patentsview/Similarity Search - CPC Classification and Claims/uspto_yake_keywords_list.json')
df_keywords_list_ep_agg = pd.read_json('/mnt/hdd01/patentsview/Similarity Search - CPC Classification and Claims/epo_yake_keywords_list.json')
df_keywords_list_rel_agg = pd.read_json('/mnt/hdd01/patentsview/Similarity Search - CPC Classification and Claims/rel_on_science_yake_keywords_list.json')

In [9]:
frames = [df_keywords_list_uspto_agg, df_keywords_list_ep_agg, df_keywords_list_rel_agg]
df_keywords_list = pd.concat(frames)

In [10]:
df_keywords_list_agg = df_keywords_list.groupby('keyword_yake').agg({
    'yake_conf_score': 'mean',
    'abs_frequency': 'sum',
    'patent_id': 'first',
    'publn_nr': 'first',
    'oaid': 'first',
    'cpc_group_list': 'first',
    'cpc_class_symbol': 'first',
})

df_keywords_list_agg.reset_index(inplace=True)

In [33]:
# Cast entries in lists of column patent_id, publn_nr and oaid to strings
df_keywords_list_agg['patent_id'] = df_keywords_list_agg['patent_id'].progress_apply(
    lambda x: [str(entry) for entry in x] if isinstance(x, list) else x
)
df_keywords_list_agg['publn_nr'] = df_keywords_list_agg['publn_nr'].progress_apply(
    lambda x: [str(entry) for entry in x] if isinstance(x, list) else x
)
df_keywords_list_agg['oaid'] = df_keywords_list_agg['oaid'].progress_apply(
    lambda x: [str(entry) for entry in x] if isinstance(x, list) else x
)

100%|██████████| 2742286/2742286 [00:04<00:00, 552464.69it/s] 
100%|██████████| 2742286/2742286 [00:01<00:00, 2273362.76it/s]
100%|██████████| 2742286/2742286 [00:01<00:00, 1925345.15it/s]


## Postprocessing

In [34]:
# Prune dataframe by document frequency and absolute frequency
min_abs_frequency = 5
max_abs_frequency = 1000
# max_doc_frequency = 0.3

df_keywords_list_agg_pruned = df_keywords_list_agg[(df_keywords_list_agg['abs_frequency'] >= min_abs_frequency) & (df_keywords_list_agg['abs_frequency'] <= max_abs_frequency)]

# Reset index
df_keywords_list_agg_pruned.reset_index(drop=True, inplace=True)

In [37]:
### MAYBE I NEED TO REDO THIS PART WITH THE CLAIM TEXTS INSTEAD OF THE KEYWORDS - NOT CONSIDERED FOR NOW

import spacy

# Download spacy model
nlp = spacy.load('en_core_web_lg')

# Ensure you're working on a new DataFrame, not a slice of an old one
df_keywords_list_agg_pruned_pos = df_keywords_list_agg_pruned.copy()

# Perform part-of-speech tagging on the 'keyword_yake' column 
# and save the POS tags in a new column 'keyword_yake_pos'
df_keywords_list_agg_pruned_pos['keyword_yake_pos'] = df_keywords_list_agg_pruned_pos['keyword_yake'].progress_apply(
    lambda x: [token.pos_ for token in nlp(x)]
)

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 133502/133502 [05:08<00:00, 433.35it/s]


In [38]:
# Filter out all keywords that do not contain NOUN, PRON or PROPN in their POS tags
df_keywords_list_agg_pruned_pos = df_keywords_list_agg_pruned_pos[df_keywords_list_agg_pruned_pos['keyword_yake_pos'].progress_apply(
    lambda x: any(pos in ['NOUN', 'PRON', 'PROPN'] for pos in x)
)]

# Reset index
df_keywords_list_agg_pruned_pos.reset_index(drop=True, inplace=True)

100%|██████████| 133502/133502 [00:00<00:00, 1006514.14it/s]


# Keyword Embedding

In [40]:
from sentence_transformers import SentenceTransformer
import torch

In [41]:
model_climatebert = SentenceTransformer('climatebert/distilroberta-base-climate-f')
model_bertforpatents = SentenceTransformer('anferico/bert-for-patents')
model_patentsberta = SentenceTransformer('AI-Growth-Lab/PatentSBERTa')

No sentence-transformers model found with name /home/thiesen/.cache/torch/sentence_transformers/climatebert_distilroberta-base-climate-f. Creating a new one with MEAN pooling.
Some weights of RobertaModel were not initialized from the model checkpoint at /home/thiesen/.cache/torch/sentence_transformers/climatebert_distilroberta-base-climate-f and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
No sentence-transformers model found with name /home/thiesen/.cache/torch/sentence_transformers/anferico_bert-for-patents. Creating a new one with MEAN pooling.


In [42]:
# Check if GPU is available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU available: {}".format(torch.cuda.get_device_name(0)))
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU available: NVIDIA RTX A4500


In [43]:
# Generate copy of df_claims_keywords_list
df_keywords_list_agg_embeddings = df_keywords_list_agg_pruned_pos.copy()

# Perform sentence embedding on the 'keyword_yake' (PatentsView) or 'keywords_yake_claims' (EPO) column
df_keywords_list_agg_embeddings['keyword_yake_patentsberta_embedding'] = df_keywords_list_agg_embeddings['keyword_yake'].progress_apply(
    lambda x: model_patentsberta.encode(x)
)

df_keywords_list_agg_embeddings['keyword_yake_climatebert_embedding'] = df_keywords_list_agg_embeddings['keyword_yake'].progress_apply(
    lambda x: model_climatebert.encode(x)
)

df_keywords_list_agg_embeddings['keyword_yake_bertforpatents_embedding'] = df_keywords_list_agg_embeddings['keyword_yake'].progress_apply(
    lambda x: model_bertforpatents.encode(x)
)

100%|██████████| 119212/119212 [13:24<00:00, 148.24it/s]
100%|██████████| 119212/119212 [07:57<00:00, 249.58it/s]
100%|██████████| 119212/119212 [26:34<00:00, 74.75it/s]


In [58]:
# Save dataframe to json
df_keywords_list_agg_embeddings.to_json('/mnt/hdd01/patentsview/Similarity Search - CPC Classification and Claims/df_keywords_list_agg_uspto_epo_rel_embeddings.json', orient='records')