In [None]:
import os

if "CUDA_VISIBLE_DEVICES" not in os.environ:
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices("GPU")
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(
            gpu, True
        )  # Limiting the memory growth
    logical_gpus = tf.config.experimental.list_logical_devices("GPU")
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")

from pathlib import Path
import numpy as np
import pandas as pd

import random
import matplotlib.pyplot as plt 
import tensorflow as tf 
import psmiles 
import rdkit
from rdkit import Chem 
from rdkit.Chem import Draw 
import itertools


Moieties

In [2]:
import itertools

replacement_elements_0 = ['S','*-C=O','CS','C','CN','O','N']
replacement_elements_1 = ['[H]C(=O)O-*','FC(F)F','C(=O)Cl','NC(=O)']
replacement_elements_2 = ['(COOH)','(=C=C=0)','(=S)','(CF3)','(C(=O)Cl)','(NC(=O))']
replacement_elements_3 = ['(S)', '(=N)', '(=O)','(-C=0)','(#N)','(C#N)','(CS)','(SC)','(C)','(CN)','(O)','(N)']

end_groups_redox=[]

end_groups_redox += replacement_elements_0,replacement_elements_1,replacement_elements_2,replacement_elements_3

end_groups_redoxh=list(itertools.chain.from_iterable(end_groups_redox))

In [3]:
end_groups_redoxh += "C(=O)O", "C(=O)OC"

Taking the reference candidates

In [187]:
k = dict(
    name=[
        "name of the organic material"
    ],
    smiles=[
        "SMILES string"
    ]

)

Iterative addition of the redox-active moieties with reference candidates

In [7]:
side='&'
back='^'

from itertools import permutations

def generate_all_combinations(base_string, inserts):
    results = set()  # 
    # Single insertions
    for insert in inserts:
        for i in range(len(base_string) + 1):
            results.add(base_string[:i] + insert + base_string[i:])

    # Multiple insertions
    for r in range(2, len(inserts) + 1):
        for perm in permutations(inserts, r):
            current = [base_string]
            for insert in perm:
                new_combinations = []
                for s in current:
                    for i in range(len(s) + 1):
                        new_combinations.append(s[:i] + insert + s[i:])
                current = new_combinations
            results.update(current) 
    
    return sorted(results) 

In [8]:
import itertools
inserts=[back,side]

for name,smile in zip(k['name'],k['smiles']):
    combinations=generate_all_combinations(smile,inserts)
    backbone_sidechain = []
    for init in combinations:
        if '^' in init and '&' in init:
            for side,back in itertools.permutations(end_groups_redoxh,2):
                for num_back in range(1,3):
                    temp_back= init.replace("^", back * num_back)
                    for num_side in range(1,3):
                        repl = "(" +  num_side*side + ")"
                        temp_side = temp_back.replace("&", repl)
                        backbone_sidechain.append(temp_side)
        elif '&' in init and '^' not in init:
            for side in end_groups_redoxh:
                for num_side in range(1,3):
                    repl = "(" +  num_side*side + ")"
                    temp_side = init.replace("&", repl)
                    backbone_sidechain.append(temp_side)
        elif '&' not in init and '^' in init:
            for back in end_groups_redoxh:
                for num_back in range(1,3):
                    temp_back= init.replace("^", back * num_back)
                    backbone_sidechain.append(temp_back)
        else:
            continue
    df=pd.DataFrame(backbone_sidechain,columns=['smiles'])
    df=df.dropna(axis=0,ignore_index=True)

Verifying and storing the correct polymer SMILES

In [None]:
import psmiles 
from psmiles import PolymerSmiles as ps
def verify_smiles(row):
    try:
        s=ps(row)
    except:
        s=None
    return s.psmiles

df['smiles']=df['smiles'].apply(verify_smiles)

Fingerprinting with sentence transformers 

In [None]:
import sentence_transformers 
from sentence_transformers import SentenceTransformer

sms=list(df['smiles'])
polyBERT = SentenceTransformer("kuelumbus/polyBERT")
fps=polyBERT.encode(sms, show_progress_bar=False)
df['fps']=list(fps)

Concatenation with the selector vector (for voltage and specific capacity with lithium charge carrier at 1C and 50% active material content )

In [None]:
df['selector_sc']=[[0.5,1.0,0.0,1.0,1.0]]*len(df)
df['selector_v']=[[0.5,0.0,0.0,1.0,0.0]]*len(df)

In [None]:
sc = list(df.apply(lambda row: np.concatenate([row['fps'], row['selector_sc']]), axis=1))
v = list(df.apply(lambda row: np.concatenate([row['fps'], row['selector_v']]), axis=1))

Property prediction by the meta-learner model 

In [None]:
model = tf.keras.models.load_model('load your keras model here')

voltage_predictions = model.predict(tf.convert_to_tensor(v))
capacity_predictions = model.predict(tf.convert_to_tensor(sc))