# Imports and Helper Functions

In [None]:
import pandas as pd
import numpy as np
import random
import umap
import matplotlib.pyplot as plt
from scipy.spatial import distance
from sklearn.preprocessing import StandardScaler

import sys
import os
sys.path.append(os.path.abspath('../aipowerdatasetconstruction/'))

from bert_util import make_embeddings_in_batch
from transformers import BertTokenizer, BertModel

In [None]:
n_abstract_per_batch = 20
tokenizer = BertTokenizer.from_pretrained("allenai/scibert_scivocab_uncased", do_lower_case=True)   # import the models
# max token size is 512
model = BertModel.from_pretrained("allenai/scibert_scivocab_uncased")

# input file must have a column called Abstract
def make_embeding_with_file(input_file, output_file):
    input = pd.read_csv(input_file, encoding = "ISO-8859-1")
    embedding_tensor = make_embeddings_in_batch(input['Abstract'], n_abstract_per_batch, model, tokenizer)
    output = pd.concat([input, pd.DataFrame(embedding_tensor)], axis=1)
    output.to_csv(output_file, encoding = "ISO-8859-1")

def read_df_with_embedding(file):
    df = pd.read_csv(file, encoding = "ISO-8859-1")
    df = df.rename(columns={str(i): i for i in range(768)})
    for i in range(768):
        df[i] = df[i].astype(float)
    return df

def compute_center(df):
    return df[list(range(768))].mean().to_numpy()
    
def compute_euclidean_distances(center, df):
    return [distance.euclidean(center, df.iloc[i, :][list(range(768))].to_numpy()) for i in range(df.shape[0])]

def relevance_category(d, max_d):
    if d <= 2 * max_d: return 'relevant'
    else: return 'less relevant'

def compute_relevancies(center, max_d, df):
    distances = compute_euclidean_distances(center, df)
    r_cat = [relevance_category(d, max_d) for d in distances] 
    return pd.concat([pd.DataFrame({'distance': distances, 'relevance category': r_cat}), df], axis=1)

def umap_reduce_dimensions(x):
    return umap.UMAP(
        n_neighbors=30,
        min_dist=0.1,
        n_components=2,
        random_state=47,
    ).fit_transform(StandardScaler().fit_transform(x))

def generate_plot_df_with_reduced_dimensions(df_map):
    dfs = []
    for source, df in df_map.items():
        dfs.append(pd.concat([df, pd.DataFrame([source] * df.shape[0], columns=['Source'])], axis=1))
    plot_df = pd.concat(dfs, axis=0)
    red_dim = umap_reduce_dimensions(plot_df[list(range(768))])
    red_dim_df = pd.DataFrame(red_dim, columns=['x', 'y'], index=plot_df.index)
    return pd.concat([plot_df, red_dim_df], axis=1)

def plot_clusters(plot_df, ax):
    c = 0
    for source in plot_df['Source'].unique():
        df = plot_df[plot_df['Source'] == source][['x', 'y']]
        ax.scatter(x=df['x'], y=df['y'], marker='o', s=50, c=f'C{c}', label=source, alpha=0.7)
        c += 1
    ax.legend(loc='best', fontsize=20)
    ax.set_xlabel('arbitrary dimension x', fontsize=20)
    ax.set_ylabel('arbitrary dimension y', fontsize=20)
    ax.tick_params(axis='both', labelsize=20)

# Make Embedings

In [None]:
input_file = "../data/Saeki_with_abstract.csv"
output_file = "../data/Saeki_with_abstract_and_embedding.csv"
make_embeding_with_file(input_file, output_file)

In [None]:
input_file = "../data/search_results_OPV.csv"
output_file = "../data/search_results_OPV_with_embedding.csv"
make_embeding_with_file(input_file, output_file)

In [None]:
input_file = "../data/search_results_lithium_batteries.csv"
output_file = "../data/search_results_lithium_batteries_with_embedding.csv"
make_embeding_with_file(input_file, output_file)

In [None]:
input_file = "../data/search_results_fullerene_polymer_synthesis.csv"
output_file = "../data/search_results_fullerene_polymer_synthesis_with_embedding.csv"
make_embeding_with_file(input_file, output_file)

In [None]:
input_file = "../data/search_results_OLED.csv"
output_file = "../data/search_results_OLED_with_embedding.csv"
make_embeding_with_file(input_file, output_file)

In [None]:
input_file = "../data/fullerene_OPV_with_abstract.csv"
output_file = "../data/fullerene_OPV_with_abstract_and_embedding.csv"
make_embeding_with_file(input_file, output_file)

In [None]:
nonfullerene_OPV = pd.read_csv("../data/Nonfullerene_OPV_Abstract.csv", encoding = "ISO-8859-1")
abstract_dict = nonfullerene_OPV[['DOI', 'Abstract']].set_index('DOI').to_dict()['Abstract']
nonfullerene_OPV_df = pd.DataFrame({'DOI': abstract_dict.keys(), 'Abstract': abstract_dict.values()})
nonfullerene_OPV_df = nonfullerene_OPV_df[nonfullerene_OPV_df['Abstract'].notnull()].reset_index(drop=True)
nonfullerene_OPV_df.to_csv("../data/Nonfullerene_OPV_Abstract_cleaned.csv", encoding = "ISO-8859-1")
nonfullerene_OPV_df.head(), nonfullerene_OPV_df.shape

In [None]:
input_file = "../data/Nonfullerene_OPV_Abstract_cleaned.csv"
output_file = "../data/nonfullerene_OPV_with_abstract_and_embedding.csv"
make_embeding_with_file(input_file, output_file)

In [None]:
input_file = "../data/search_results_OPV_Stability_and_Degradation.csv"
output_file = "../data/search_results_OPV_Stability_and_Degradation_with_embedding.csv"
make_embeding_with_file(input_file, output_file)

In [None]:
input_file = "../data/search_results_OPV_Morphology_Control.csv"
output_file = "../data/search_results_OPV_Morphology_Control_with_embedding.csv"
make_embeding_with_file(input_file, output_file)

In [None]:
input_file = "../data/search_results_OPV_Fullerene_Donor.csv"
output_file = "../data/search_results_OPV_Fullerene_Donor_with_embedding.csv"
make_embeding_with_file(input_file, output_file)

# Random Seed Article Distance

In [None]:
Saeki_fullerene_OPV = read_df_with_embedding("../data/fullerene_OPV_with_abstract_and_embedding.csv")    
Saeki_fullerene_OPV_dois = Saeki_fullerene_OPV['DOI'].to_numpy()

n_sampling = 1000
Saeki_fullerene_OPV_centers = []
Saeki_fullerene_OPV_max_distances = []

for i in range(n_sampling):
    selected_dois = set(Saeki_fullerene_OPV_dois[random.sample(range(len(Saeki_fullerene_OPV_dois)), 5)])
    seeds = Saeki_fullerene_OPV[pd.Series([doi in selected_dois for doi in Saeki_fullerene_OPV_dois])]
    seeds_center = compute_center(seeds)
    distances = compute_euclidean_distances(seeds_center, seeds)
    Saeki_fullerene_OPV_centers.append(seeds_center)
    Saeki_fullerene_OPV_max_distances.append(np.max(distances))

In [None]:
Saeki_fullerene_OPV_centers_avg = np.average(np.array(Saeki_fullerene_OPV_centers), axis=0)
Saeki_fullerene_OPV_max_distances_avg = np.average(np.array(Saeki_fullerene_OPV_max_distances))
Saeki_fullerene_OPV_max_distances_std = np.std(Saeki_fullerene_OPV_max_distances)
Saeki_fullerene_OPV_max_distances_avg, Saeki_fullerene_OPV_max_distances_std

In [None]:
Saeki_nonfullerene_OPV = read_df_with_embedding("../data/nonfullerene_OPV_with_abstract_and_embedding.csv")
Saeki_nonfullerene_OPV_dois = Saeki_nonfullerene_OPV['DOI'].to_numpy()

n_sampling = 1000
Saeki_nonfullerene_OPV_centers = []
Saeki_nonfullerene_OPV_max_distances = []

for i in range(n_sampling):
    selected_dois = set(Saeki_nonfullerene_OPV_dois[random.sample(range(len(Saeki_nonfullerene_OPV_dois)), 5)])
    seeds = Saeki_nonfullerene_OPV[pd.Series([doi in selected_dois for doi in Saeki_nonfullerene_OPV_dois])]
    seeds_center = compute_center(seeds)
    distances = compute_euclidean_distances(seeds_center, seeds)
    Saeki_nonfullerene_OPV_centers.append(seeds_center)
    Saeki_nonfullerene_OPV_max_distances.append(np.max(distances))

In [None]:
Saeki_nonfullerene_OPV_centers_avg = np.average(np.array(Saeki_nonfullerene_OPV_centers), axis=0)
Saeki_nonfullerene_OPV_max_distances_avg = np.average(np.array(Saeki_nonfullerene_OPV_max_distances))
Saeki_nonfullerene_OPV_max_distances_std = np.std(Saeki_nonfullerene_OPV_max_distances)
Saeki_nonfullerene_OPV_max_distances_avg, Saeki_nonfullerene_OPV_max_distances_std

In [None]:
distance.euclidean(Saeki_fullerene_OPV_centers_avg, Saeki_nonfullerene_OPV_centers_avg)

# Select Random Seed Articles

In [None]:
Saeki_fullerene_OPV = pd.read_csv("../data/fullerene_OPV_with_abstract_and_embedding.csv", encoding = "ISO-8859-1")
Saeki_fullerene_OPV_dois = Saeki_fullerene_OPV['DOI'].to_numpy()
selected_dois = set(Saeki_fullerene_OPV_dois[random.sample(range(len(Saeki_fullerene_OPV_dois)), 5)])
selected_dois

In [None]:
seeds_Saeki = Saeki_fullerene_OPV[pd.Series([doi in selected_dois for doi in Saeki_fullerene_OPV_dois])]
seeds_Saeki.to_csv("../data/seeds_Saeki_fullerene_OPV_with_abstract_and_embedding.csv", encoding = "ISO-8859-1")
other_Saeki = Saeki_fullerene_OPV[pd.Series([doi not in selected_dois for doi in Saeki_fullerene_OPV_dois])]
other_Saeki.to_csv("../data/other_Saeki_fullerene_OPV_with_abstract_and_embedding.csv", encoding = "ISO-8859-1")

# Compute relevance based on euclidean distances

In [None]:
seeds_Saeki = read_df_with_embedding("../data/seeds_Saeki_fullerene_OPV_with_abstract_and_embedding.csv")
seeds_center = compute_center(seeds_Saeki)
distances = compute_euclidean_distances(seeds_center, seeds_Saeki)
max_d = np.max(distances)
max_d, distances

In [None]:
other_Saeki = read_df_with_embedding("../data/other_Saeki_fullerene_OPV_with_abstract_and_embedding.csv")
other_Saeki = compute_relevancies(seeds_center, max_d, other_Saeki)
other_Saeki['relevance category'].value_counts()

In [None]:
search_results_OPV = read_df_with_embedding("../data/search_results_OPV_with_embedding.csv")
search_results_OPV = compute_relevancies(seeds_center, max_d, search_results_OPV)
search_results_OPV['relevance category'].value_counts()

In [None]:
search_results_OLED = read_df_with_embedding("../data/search_results_OLED_with_embedding.csv")
search_results_OLED = compute_relevancies(seeds_center, max_d, search_results_OLED)
search_results_OLED['relevance category'].value_counts()

In [None]:
Saeki_nonfullerene_OPV = read_df_with_embedding("../data/nonfullerene_OPV_with_abstract_and_embedding.csv")
Saeki_nonfullerene_OPV = compute_relevancies(seeds_center, max_d, Saeki_nonfullerene_OPV)
Saeki_nonfullerene_OPV['relevance category'].value_counts()

In [None]:
search_results_fullerene_polymer_synthesis = read_df_with_embedding("../data/search_results_fullerene_polymer_synthesis_with_embedding.csv")
search_results_fullerene_polymer_synthesis = compute_relevancies(seeds_center, max_d, search_results_fullerene_polymer_synthesis)
search_results_fullerene_polymer_synthesis['relevance category'].value_counts()

In [None]:
search_results_lithium_batteries = read_df_with_embedding("../data/search_results_lithium_batteries_with_embedding.csv")
search_results_lithium_batteries = compute_relevancies(seeds_center, max_d, search_results_lithium_batteries)
search_results_lithium_batteries['relevance category'].value_counts()

In [None]:
search_results_OPV = read_df_with_embedding("../data/search_results_OPV_with_embedding.csv")
search_results_OPV = compute_relevancies(seeds_center, max_d, search_results_OPV)
search_results_OPV['relevance category'].value_counts()

In [None]:
search_results_OPV_Stability_and_Degradation = read_df_with_embedding("../data/search_results_OPV_Stability_and_Degradation_with_embedding.csv")
search_results_OPV_Stability_and_Degradation = compute_relevancies(seeds_center, max_d, search_results_OPV_Stability_and_Degradation)
search_results_OPV_Stability_and_Degradation['relevance category'].value_counts()

In [None]:
search_results_OPV_Morphology_Control = read_df_with_embedding("../data/search_results_OPV_Morphology_Control_with_embedding.csv")
search_results_OPV_Morphology_Control = compute_relevancies(seeds_center, max_d, search_results_OPV_Morphology_Control)
search_results_OPV_Morphology_Control['relevance category'].value_counts()

# Plots

In [None]:
seeds_center_df = pd.DataFrame(seeds_center)
plot_df = generate_plot_df_with_reduced_dimensions({
    'Seed Centroid': seeds_center_df.T,
    'Seed': seeds_Saeki,
    'Benchmark': other_Saeki,
    'API': search_results_OPV,
})
plot_df.shape

In [None]:
seed_plot_df = plot_df[plot_df['Source'] == 'Seed'][['x', 'y']]
seeds_center_df =  plot_df[plot_df['Source'] == 'Seed Centroid'][['x', 'y']]
other_plot_df = plot_df[plot_df['Source'] == 'Benchmark'][['x', 'y', 'relevance category']]
other_plot_df_rel = other_plot_df[other_plot_df['relevance category'] == 'relevant'][['x', 'y']]
api_plot_df = plot_df[plot_df['Source'] == 'API'][['x', 'y', 'relevance category']]
api_plot_df_rel = api_plot_df[api_plot_df['relevance category'] == 'relevant'][['x', 'y']]
api_plot_df_less = api_plot_df[api_plot_df['relevance category'] == 'less relevant'][['x', 'y']]

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(16, 9))

ax.scatter(x=api_plot_df_less['x'], y=api_plot_df_less['y'], marker='o', s=30, c='C0', label='API: less relevant', alpha=0.7)
ax.scatter(x=api_plot_df_rel['x'], y=api_plot_df_rel['y'], marker='o', s=30, c='C1', label='API: relevant', alpha=0.7)
ax.scatter(x=other_plot_df_rel['x'], y=other_plot_df_rel['y'], marker='*', s=150, c='C1', label='Benchmark: relevant', alpha=0.7)
ax.scatter(x=seed_plot_df['x'], y=seed_plot_df['y'], marker='X', s=200, c='C7', label='Seed', alpha=1.0)
ax.scatter(x=seeds_center_df['x'], y=seeds_center_df['y'], marker='X', s=200, c='C3', label='Seed Centroid', alpha=1.0)

ax.legend(loc='best', fontsize=20)
ax.set_xlabel('arbitrary dimension x', fontsize=20)
ax.set_ylabel('arbitrary dimension y', fontsize=20)
ax.tick_params(axis='both', labelsize=20)

plt.savefig('../plots/scibert_plot.png', bbox_inches='tight')

In [None]:
api_plot_df = plot_df[plot_df['Source'] == 'API'][['x', 'y', 'relevance category', 'DOI', 'Title', 'Abstract']]
api_plot_df_less = api_plot_df[api_plot_df['relevance category'] == 'less relevant']
api_plot_df_less = api_plot_df_less[api_plot_df_less['x'] < 0]
api_plot_df_less = api_plot_df_less[api_plot_df_less['y'] < 0]
api_plot_df_less

# Experiment with different datasets

In [None]:
plot_df = generate_plot_df_with_reduced_dimensions({
    'Saeki Fullerene OPV': read_df_with_embedding("../data/fullerene_OPV_with_abstract_and_embedding.csv"),
    'Saeki Nonfullerene OPV': read_df_with_embedding("../data/nonfullerene_OPV_with_abstract_and_embedding.csv"),
})
plot_df.shape

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(16, 9))
plot_clusters(plot_df, ax)

In [None]:
plot_df = generate_plot_df_with_reduced_dimensions({
    'OPV': read_df_with_embedding("../data/search_results_OPV_with_embedding.csv"),
    'OLED': read_df_with_embedding("../data/search_results_OLED_with_embedding.csv"),
})
plot_df.shape

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(16, 9))
plot_clusters(plot_df, ax)

In [None]:
plot_df = generate_plot_df_with_reduced_dimensions({
    'OPV': read_df_with_embedding("../data/search_results_OPV_with_embedding.csv"),
    'Lithium Batteries': read_df_with_embedding("../data/search_results_lithium_batteries_with_embedding.csv"),
})
plot_df.shape

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(16, 9))
plot_clusters(plot_df, ax)

In [None]:
plot_df = generate_plot_df_with_reduced_dimensions({
    'OPV': read_df_with_embedding("../data/search_results_OPV_with_embedding.csv"),
    'Fullerene Polymer Synthesis': read_df_with_embedding("../data/search_results_fullerene_polymer_synthesis_with_embedding.csv"),
})
plot_df.shape

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(16, 9))
plot_clusters(plot_df, ax)

In [None]:
plot_df = generate_plot_df_with_reduced_dimensions({
    'OPV': read_df_with_embedding("../data/search_results_OPV_with_embedding.csv"),
    'Fullerene Polymer Synthesis': read_df_with_embedding("../data/search_results_fullerene_polymer_synthesis_with_embedding.csv"),
    'Lithium Batteries': read_df_with_embedding("../data/search_results_lithium_batteries_with_embedding.csv")
})
plot_df.shape

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(16, 9))
plot_clusters(plot_df, ax)

In [None]:
OPV_Stability_and_Degradation = read_df_with_embedding("../data/search_results_OPV_Stability_and_Degradation_with_embedding.csv")
OPV_Morphology_Control = read_df_with_embedding("../data/search_results_OPV_Morphology_Control_with_embedding.csv")

OPV_Stability_and_Degradation_title_set = set(OPV_Stability_and_Degradation['Title'].tolist())
OPV_Morphology_Control_title_set = set(OPV_Morphology_Control['Title'].tolist())
overlap = OPV_Stability_and_Degradation_title_set.intersection(OPV_Morphology_Control_title_set)
len(OPV_Stability_and_Degradation_title_set), len(OPV_Morphology_Control_title_set), len(overlap)

In [None]:
plot_df = generate_plot_df_with_reduced_dimensions({
    'Saeki Fullerene OPV': read_df_with_embedding("../data/fullerene_OPV_with_abstract_and_embedding.csv"),
    'OPV Stability and Degradation': OPV_Stability_and_Degradation,
    # 'OPV Morphology Control': OPV_Morphology_Control,
})
plot_df.shape

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(16, 9))
plot_clusters(plot_df, ax)

In [None]:
plot_df = generate_plot_df_with_reduced_dimensions({
    'Saeki Fullerene OPV': read_df_with_embedding("../data/fullerene_OPV_with_abstract_and_embedding.csv"),
    'OPV': read_df_with_embedding("../data/search_results_OPV_with_embedding.csv"),
    # 'OPV Morphology Control': OPV_Morphology_Control,
})
plot_df.shape

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(16, 9))
plot_clusters(plot_df, ax)

In [None]:
plot_df = generate_plot_df_with_reduced_dimensions({
    'Saeki Benchmark': read_df_with_embedding("../data/fullerene_OPV_with_abstract_and_embedding.csv"),
    'OPV': read_df_with_embedding("../data/search_results_OPV_with_embedding.csv"),
    'OPV Fullerene Donor': read_df_with_embedding("../data/search_results_OPV_Fullerene_Donor_with_embedding.csv"),
    'Lithium Batteries': read_df_with_embedding("../data/search_results_lithium_batteries_with_embedding.csv")
})
plot_df.shape

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(16, 9))
plot_clusters(plot_df, ax)

In [None]:
Saeki_Fullerene_OPV = plot_df[plot_df['Source'] == 'Saeki Benchmark']
OPV = plot_df[plot_df['Source'] == 'OPV']
OPV_Fullerene_Donor = plot_df[plot_df['Source'] == 'OPV Fullerene Donor']
Lithium_Batteries = plot_df[plot_df['Source'] == 'Lithium Batteries']

In [None]:
low_dim_center = compute_center_low_dim(Saeki_Fullerene_OPV)
distances = compute_euclidean_distances_low_dim(low_dim_center, Saeki_Fullerene_OPV)
p90_d = np.percentile(distances, 90)
p90_d, distances

In [None]:
compute_relevancies_low_dim(low_dim_center, p90_d, OPV)['relevance category'].value_counts()

In [None]:
compute_relevancies_low_dim(low_dim_center, p90_d, OPV_Fullerene_Donor)['relevance category'].value_counts()

In [None]:
plot_df = generate_plot_df_with_reduced_dimensions({
    'Saeki Benchmark': read_df_with_embedding("../data/fullerene_OPV_with_abstract_and_embedding.csv"),
    'OPV': read_df_with_embedding("../data/search_results_OPV_with_embedding.csv"),
    'OPV Stability and Degradation': read_df_with_embedding("../data/search_results_OPV_Stability_and_Degradation_with_embedding.csv"),
    'Lithium Batteries': read_df_with_embedding("../data/search_results_lithium_batteries_with_embedding.csv")
})
plot_df.shape

In [None]:
plot_df.to_csv('../data/search_all_with_saeki_benchmark.csv', encoding = "ISO-8859-1")

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(16, 9))
plot_clusters(plot_df, ax)
plt.savefig('../plots/search_all_with_saeki_benchmark.png', bbox_inches='tight')

In [None]:
plot_df[plot_df['x'] < 7]['Source'].value_counts()

In [None]:
Saeki_Fullerene_OPV = plot_df[plot_df['Source'] == 'Saeki Benchmark']
OPV = plot_df[plot_df['Source'] == 'OPV']
OPV_Stability_and_Degradation = plot_df[plot_df['Source'] == 'OPV Stability and Degradation']
Lithium_Batteries = plot_df[plot_df['Source'] == 'Lithium Batteries']

In [None]:
high_dim_center = compute_center(Saeki_Fullerene_OPV)
distances = compute_euclidean_distances(high_dim_center, Saeki_Fullerene_OPV)
p90_d = np.percentile(distances, 90)
p90_d, distances

In [None]:
compute_relevancies(high_dim_center, p90_d, Saeki_Fullerene_OPV)['relevance category'].value_counts()

In [None]:
compute_relevancies(high_dim_center, p90_d, OPV)['relevance category'].value_counts()

In [None]:
compute_relevancies(high_dim_center, p90_d, OPV_Stability_and_Degradation)['relevance category'].value_counts()

In [None]:
compute_relevancies(high_dim_center, p90_d, Lithium_Batteries)['relevance category'].value_counts()

In [None]:
def compute_center_low_dim(df):
    return df[['x', 'y']].mean().to_numpy()
    
def compute_euclidean_distances_low_dim(center, df):
    return [distance.euclidean(center, df.iloc[i, :][['x', 'y']].to_numpy()) for i in range(df.shape[0])]

def compute_relevancies_low_dim(center, max_d, df):
    distances = compute_euclidean_distances_low_dim(center, df)
    r_cat = [relevance_category(d, max_d) for d in distances] 
    return pd.concat([pd.DataFrame({'distance': distances, 'relevance category': r_cat}), df], axis=1)

In [None]:
low_dim_center = compute_center_low_dim(Saeki_Fullerene_OPV)
distances = compute_euclidean_distances_low_dim(low_dim_center, Saeki_Fullerene_OPV)
p90_d = np.percentile(distances, 90)
p90_d, distances

In [None]:
compute_relevancies_low_dim(low_dim_center, p90_d, Saeki_Fullerene_OPV)['relevance category'].value_counts()

In [None]:
compute_relevancies_low_dim(low_dim_center, p90_d, OPV)['relevance category'].value_counts()

In [None]:
compute_relevancies_low_dim(low_dim_center, p90_d, OPV_Stability_and_Degradation)['relevance category'].value_counts()

In [None]:
compute_relevancies_low_dim(low_dim_center, p90_d, Lithium_Batteries)['relevance category'].value_counts()