In [1]:
%matplotlib notebook

from torch import load
import torch
import numpy as np
from schnetpack.data import ASEAtomsData as AtomsData
import schnetpack as spk
import ase.db
import ase.io
from ase.io import read,write
import joblib
import lightgbm as lgb
from openbabel import openbabel
from rdkit import Chem
import matplotlib.pyplot as plt
import os
from qcnico.plt_utils import histogram




INFO:numexpr.utils:NumExpr defaulting to 4 threads.
INFO:rdkit:Enabling RDKit 2024.03.6 jupyter extensions


In [2]:
def load_model(target,path):
    if target!='PCE':
        model1=load(path, map_location=torch.device('cpu'))
        model1.eval()
        return model1
    else:
        model= joblib.load(path)
        
        return model
    

In [3]:
from rdkit.Chem import rdDetermineBonds

def ase_to_rdkit(atoms):
    """
    Convert an ASE Atoms object to an RDKit Mol object.
    
    Args:
        atoms (ase.Atoms): The input ASE Atoms object.
    
    Returns:
        Chem.Mol: The corresponding RDKit Mol object.
    """
    # Convert ASE Atoms to XYZ string
    print('[ase_to_rdkit] Start...',flush=True)
    xyz_str = f"{len(atoms)}\n\n"
    for symbol, position in zip(atoms.get_chemical_symbols(), atoms.positions):
        xyz_str += f"{symbol} {position[0]:.6f} {position[1]:.6f} {position[2]:.6f}\n"
    
    print('[ase_to_rdkit] xyz string acquired',flush=True)
    # Convert XYZ to RDKit Mol
    raw_mol = Chem.MolFromXYZBlock(xyz_str)
    mol = Chem.Mol(raw_mol)
    print('[ase_to_rdkit] mol object constructed',flush=True)
    mol.UpdatePropertyCache() # this calculates implicit valence of each atom and prevents this from crashing
    # mol = Chem.AddHs(mol)
    # print('[ase_to_rdkit] added Hs',flush=True)

    if mol is None:
        raise ValueError("RDKit failed to create a Mol object from XYZ data.")
        # Infer bonds based on atomic distances
    rdDetermineBonds.DetermineBonds(mol)
    print('[ase_to_rdkit] End!',flush=True)
    return mol


    

def cal_nd(mol):
    print('Starting cal_nd...',flush=True)
    atoms=mol.toatoms()
    # print(atoms)
    mol = ase_to_rdkit(atoms)
    # print(mol)

    # print(atoms)
    # write('mol.xyz',atoms)
    # obConversion = openbabel.OBConversion()
    # obConversion.SetInAndOutFormats("xyz", "mol")
    # mol = openbabel.OBMol()
    # obConversion.ReadFile(mol, "mol.xyz")   # Open Babel will     uncompress automatically
    # mol.AddHydrogens() 
    # obConversion.WriteFile(mol, '1.mol')

    #calculate Nd         
    # mol = Chem.MolFromMolFile('1.mol')        
    n = len(mol.GetAtoms())         
    Nd = 0        
    for i in range(0,n):
        atom = mol.GetAtomWithIdx(i)
        #判断原子是否为芳香性
        if atom.GetIsAromatic() == True:
            Nd += 1
        if atom.GetIsAromatic() == False:
            #判断原子价电子是否等于总饱和度
            if atom.GetTotalValence() != atom.GetTotalDegree():
                Nd += 1
            if atom.GetTotalValence() == atom.GetTotalDegree():
                #判断原子是否在环上
                if atom.IsInRing() == True:
                    Nd += 1
    print('Nd = ', Nd, flush=True) 
    return Nd                                                                     

In [4]:
def cal_prop(moln,molo,tag):
    
    
    al=.0

    if molo.data.Acceptor=='PC61BM':
        al= -3.70
        adl= 0.077824564
    elif molo.data.Acceptor=='PC71BM':
        al= -3.91
        adl= 0.033470005
    if tag=='edahl':
        prop=al-float(molo.data.HOMO)
    if tag=='edall':
        prop=float(molo.data.LUMO)-al
    if tag=='adlumo':
        prop=adl
    if tag=='nd':
        # prop=cal_nd(moln)
        prop = float(molo.data.ND)


    return prop

In [5]:
def pred_data( model,tag,data):
     
            
    if tag== 'PCE':
        return pred_pce(model,data)
        
    else :
         return pred_prop(model,tag,data)    
             

In [6]:
def pred_pce(model,data):
    db=ase.db.connect(data)
    pce=[]
    ids=[]
    for row in db.select():
        x=[]
        x.extend((row.data.homo,row.data.lumo,row.data.edahl,row.data.edall,row.data.et1,row.data.nd,row.data.adlumo,row.data.dhomo,row.data.dlumo))
        y = model.predict(np.array(x).reshape(1,-1)).tolist()
        print(f'Device {id} ---> PCE = ', y, flush=True)
#         print(y)
        pce.extend(y)
        ids.append(row.id)
        
    return ids,pce

In [7]:
def pred_prop(model,tag,data):
    pred=AtomsData(data)
    pred_loader = spk.AtomsLoader(pred, batch_size=10) #40!!
    
    for count, batch in enumerate(pred_loader): 
        datapred = model(batch)
        ids=batch['_idx'].numpy().tolist()
        datapred=datapred[tag].detach().numpy().tolist()
        yield datapred,ids

In [8]:
def copy_prop(db,tag):
    for row in db.select():
        id=row.id
        prop = float(row.data[tag])
        yield prop, id

In [9]:
def write_results(predata,db):
    
    for num in predata.keys():
        print(f'{num}',end=' --> ',flush=True)
        print(predata[num],flush=True)
        db.update(id=num,data=predata[num])
        # for prop in predata[num].keys():
            
            # db.update(id=num+1, **{prop: predata[num][prop]}) 
    return 0

In [10]:
def split_train_test(db, normalize=False, minmax_scale=False):
    # Define inputs to model
    for row in db.select():
        print(row.id, row.data)
    feature_keys = ['homo', 'lumo', 'et1', 'dhomo', 'dlumo', 'nd', 'edahl', 'edall', 'adlumo', 'pce']
    data = np.array([[row.id] + [row.data[fk] for fk in feature_keys] for row in db.select()])

    X = data[:,1:-1]

    # Rescales data to be between 0 and 1
    # Rescale before standardizing?
    if minmax_scale:
         X = (X - np.min(X,axis=0)) / (np.max(X,axis=0) - np.min(X, axis=0))

    # Standardize (zero-mean, unit variance)
    if normalize:
        X -= X.mean(axis=0)
        X /= X.std(axis=0)

    y = data[:,-1]

    N = X.shape[0]
    shuffled = np.random.permutation(np.arange(N))

    train_frac = 0.8
    Ntrain = int(N * train_frac)

    Xtrain = X[shuffled[:Ntrain]]
    ytrain = y[shuffled[:Ntrain]]
    
    Xtest = X[shuffled[Ntrain:]]
    ytest = y[shuffled[Ntrain:]]

    # itest = data[shuffled[Ntrain:],0]
    
    return Xtrain, ytrain, Xtest, ytest, feature_keys


def train_pce_model(model,Xtrain, ytrain):
        
    # print(type(model_params))
    # print(model_  params)
    # train_data = lgb.Dataset(Xtrain,label=ytrain)

    # model = lgb.train(model_params, train_data, num_boost_round=100)
    print(model.get_params())
    model.fit(Xtrain, ytrain)

    return model

In [None]:
def write_train_db():
    target=['et1','dhomo','dlumo','homo','lumo','pce'] # need to predict with schnet; i.e. need SLI-GNN to get these ppties
    otarget=['ET1','DH','DL','HOMO','LUMO','PCE'] # need to predict with schnet; i.e. need SLI-GNN to get these ppties
    target2=['nd','edahl','edall','adlumo'] # no need to predict; can compute these ppties from the above targets and by knowing the acceptor
    # final_target = ['pce']
    all_targets = target + target2 #all of the target ppties used by LightGBM to predict PCE

    output_db_path = 'data/train2.db'
    if os.path.exists(output_db_path):
         os.remove(output_db_path)

    db=ase.db.connect(output_db_path) #output database where results get written to 
    odb=ase.db.connect('data/train.db') #input database containing all of the structures of the starting compounds

    nmols = odb.count()
    predata = {(n+1):{tag:None for tag in all_targets} for n in range(nmols)} # dict of dicts, where the 'outer' dict keys are acceptor IDs and the values are dicts containing their molecular ppties (and the acceptor type)
    print(predata,flush=True)

    # Copy structures from input db to output db; then only work with output db
    for mol in odb.select():      
            atom=mol.toatoms()
            db.write(atom)

    for tag, otag in zip(target, otarget):
        for property,id in copy_prop(odb,otag):
            print(id, property, flush=True)
            # predata.update({id:{tag:property}}) #add property:value pair to each molecule one property at a time (sprop is a singleton)
            predata[id][tag] = property 
    print(predata,flush=True)
    # write_results(predata,tag,db)    
#         print(predata)
    for tag in target2:
        for moln,molo in zip(db.select(),odb.select()): #molecules in both databases are ordered in the same way, so `molo` and `moln` refer to the same molecule; need to `molo`` (from input db `odb`) because it contains info about the acceptor
            sprop=cal_prop(moln,molo,tag)
            print(f'\nmoln.id = {moln.id}\tmolo.id = {molo.id}',flush=True)    
            # sid=moln.id-1 # for zero indexing?
            sid=moln.id
            # predata.update({sid:{tag:sprop}})
            predata[sid][tag] = sprop
           
    write_results(predata,db) #save molecules and their properties to a '.db' file
    return db


def inspect_feature(x,feature_name):
    fig, ax = plt.subplots()
    print(feature_name, end = ' --> ')
    fig, ax = histogram(x,bins=50,plt_objs=(fig,ax),xlabel=feature_name,show=False,usetex=False)
    plt.show()
    print(f'mean = {np.mean(x)} ; std = {np.std(x)}')




def test_model_from_db(Xtrain, ytrain, Xtest, ytest, model_type='best so far'):
    # Xtrain, ytrain, Xtest, ytest = split_train_test(db)
    # model = joblib.load(model_params_path)
    # model.set_params(objective='regression', metric='rmse')

    if model_type == 'overfit':
        model = lgb.LGBMRegressor(
            learning_rate=0.5,       # Even more aggressive learning
            max_depth=-1,            # No limit on depth
            num_leaves=2**16,        # A massive number of leaves to force splits
            min_child_samples=1,     # Allow splitting until only 1 sample remains
            n_estimators=5000,       # Huge number of trees to ensure memorization
            reg_alpha=0,             # No L1 regularization
            reg_lambda=0,            # No L2 regularization
            subsample=1.0,           # Use 100% of the data
            colsample_bytree=1.0,    # Use 100% of features
            min_split_gain=0,        # Allow splits even with **no gain**
            importance_type='gain',  # Prioritize gain-based splitting
            force_col_wise=True,     # Helps force more splits
            extra_trees=True,        # Randomizes split selection to allow more splits
            deterministic=True,      # Ensures no randomness
            boosting_type='gbdt',    # Standard boosting
            objective='regression',
            metric='rmse',
            random_state=42
        )

    elif model_type == 'paper':
        model = lgb.LGBMRegressor(
        learning_rate=0.15, 
        max_depth=9,
        # min_child_samples=5,
        n_estimators=39,
        num_leaves = 35,
        random_state=399,
        # reg_alpha=0.2,
        # reg_lambda=0.01,
        objective='regression',
        metric='rmse')

    else:
        model = lgb.LGBMRegressor(
        learning_rate=0.05, 
        max_depth=-1,
        min_child_samples=5,
        n_estimators=500,
        num_leaves = 2**12,
        random_state=399,
        reg_alpha=0.2,
        reg_lambda=0.01,
        objective='regression',
        metric='rmse')
    model = train_pce_model(model,Xtrain,ytrain)

    model_file = 'weights/lgb_nico'

    print('Saving model to: ', model_file)
    # model.booster_.save_model(model_file)
    joblib.dump(model, model_file)


    ytest_pred = model.predict(Xtest) 

    print(f'********** r =  {np.corrcoef(ytest, ytest_pred)[0,1]} **********')

    yrange = np.linspace(np.min(ytrain),np.max(ytrain),100)

    fig, ax = plt.subplots()
    ax.scatter(ytest,ytest_pred)
    ax.plot(yrange,yrange,'k--',lw=0.7)
    ax.set_ylabel('Predicted PCE (%)')
    ax.set_xlabel('PCE (%)')
    plt.savefig('lgb_perf_test.png',dpi=300)
    plt.show()

    return model

In [12]:
# db = write_train_db()
db = ase.db.connect("data/train2.db")
Xtrain, ytrain, Xtest, ytest, feature_names = split_train_test(db,normalize=True,minmax_scale=False)

train_data = np.concatenate((Xtrain, ytrain.reshape(-1,1)),axis=1).T
# feature_names = db.get(id=1).data.keys()
for x, ft in zip(train_data, feature_names):
    inspect_feature(x,ft)


model = test_model_from_db(Xtrain[:,:-1],ytrain,Xtest[:,:-1],ytest,model_type='paper') #exclude adlumo (only 2 unique values)

1 {'et1': 1.76, 'dhomo': 0.317, 'dlumo': 0.024, 'homo': -4.484, 'lumo': -1.941, 'pce': 0.04, 'nd': 75.0, 'edahl': 0.7839999999999998, 'edall': 1.7590000000000001, 'adlumo': 0.077824564}
2 {'et1': 2.3273, 'dhomo': 0.429, 'dlumo': 0.383, 'homo': -6.639, 'lumo': -3.305, 'pce': 0.05, 'nd': 25.0, 'edahl': 2.939, 'edall': 0.395, 'adlumo': 0.077824564}
3 {'et1': 2.4066, 'dhomo': 0.641, 'dlumo': 0.004, 'homo': -4.55, 'lumo': -1.342, 'pce': 0.11, 'nd': 48.0, 'edahl': 0.8499999999999996, 'edall': 2.358, 'adlumo': 0.077824564}
4 {'et1': 1.8773, 'dhomo': 0.203, 'dlumo': 0.198, 'homo': -5.156, 'lumo': -2.377, 'pce': 0.2, 'nd': 60.0, 'edahl': 1.4559999999999995, 'edall': 1.3230000000000004, 'adlumo': 0.077824564}
5 {'et1': 1.414, 'dhomo': 0.566, 'dlumo': 1.038, 'homo': -6.479, 'lumo': -3.981, 'pce': 0.24, 'nd': 23.0, 'edahl': 2.779, 'edall': -0.2809999999999997, 'adlumo': 0.077824564}
6 {'et1': 0.7634, 'dhomo': 1.429, 'dlumo': 0.984, 'homo': -5.291, 'lumo': -3.237, 'pce': 0.3, 'nd': 32.0, 'edahl': 1

<IPython.core.display.Javascript object>

homo --> [plt_utils.histogram] dx = 0.17694747780727027
mean = -0.017151331224302 ; std = 1.0218052119355827


<IPython.core.display.Javascript object>

lumo --> [plt_utils.histogram] dx = 0.21709476855088475
mean = 0.000647964763772424 ; std = 1.0312502994342276


<IPython.core.display.Javascript object>

et1 --> [plt_utils.histogram] dx = 0.21346792598377065
mean = 0.03166722724952306 ; std = 1.0173577001510645


<IPython.core.display.Javascript object>

dhomo --> [plt_utils.histogram] dx = 0.1100295594671504
mean = -0.022014534899658635 ; std = 0.9522228774997954


<IPython.core.display.Javascript object>

dlumo --> [plt_utils.histogram] dx = 0.09010200513592466
mean = -0.006531409034037647 ; std = 0.9949804823943926


<IPython.core.display.Javascript object>

nd --> [plt_utils.histogram] dx = 0.1109932185023845
mean = -0.01680493254941619 ; std = 0.8817169859871254


<IPython.core.display.Javascript object>

edahl --> [plt_utils.histogram] dx = 0.1773115798166873
mean = 0.0004836319940431016 ; std = 1.033007502216842


<IPython.core.display.Javascript object>

edall --> [plt_utils.histogram] dx = 0.2155861489357749
mean = 0.0169512659808158 ; std = 1.0266488238394342


<IPython.core.display.Javascript object>

adlumo --> [plt_utils.histogram] dx = 0.04305557891470846
mean = -0.052473986802313875 ; std = 0.9774709560564752


<IPython.core.display.Javascript object>

pce --> [plt_utils.histogram] dx = 0.19599999999999998
mean = 4.7901875 ; std = 2.002511383199544
{'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 1.0, 'importance_type': 'split', 'learning_rate': 0.15, 'max_depth': 9, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 39, 'n_jobs': None, 'num_leaves': 35, 'objective': 'regression', 'random_state': 399, 'reg_alpha': 0.0, 'reg_lambda': 0.0, 'subsample': 1.0, 'subsample_for_bin': 200000, 'subsample_freq': 0, 'metric': 'rmse'}
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000088 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 771
[LightGBM] [Info] Number of data points in the train set: 320, number of used features: 8
[LightGBM] [Info] Start training from score 4.790188
Saving model to:  weights/lgb_nico
0.7867616355439172


<IPython.core.display.Javascript object>

In [14]:
feature_importance = model.feature_importances_
plt.bar(range(len(feature_importance)), feature_importance)
plt.xlabel("Feature Index")
plt.ylabel("Importance Score")
plt.show()

<IPython.core.display.Javascript object>

In [20]:
from qcnico.plt_utils import multiple_histograms
from data_utils import make_indoor_data
from matplotlib import rcParams


device_xlsx = '/Users/nico/Desktop/scripts/OPVGCN/data/data-indoor.xlsx'
donor_csv = '/Users/nico/Desktop/scripts/OPVGCN/data/donors_emna_dft.csv'
acceptor_csv = '/Users/nico/Desktop/scripts/OPVGCN/data/acceptors_emna_dft.csv'

Xindoor, yindoor = make_indoor_data(device_xlsx, donor_csv, acceptor_csv, normalize=True)

ypred_indoor = model.predict(Xindoor)

yrange = np.linspace(np.min(yindoor), np.max(yindoor),100)

rcParams['text.usetex'] = False
print(rcParams['text.usetex'])


fig,ax = plt.subplots()
ax.scatter(yindoor,ypred_indoor)
ax.plot(yrange,yrange,'k--',lw=0.7)
ax.set_xlabel('PCE (%)')
ax.set_ylabel('Predicted PCE (%)')
plt.savefig('lgb_perf_indoor.png',dpi=300,bbox_inches="tight")
plt.show()

print(f'********** r =  {np.corrcoef(yindoor, ypred_indoor)[0,1]} **********')

num_features = 8

for k in range(num_features):
    fig, ax = plt.subplots()
    rcParams['text.usetex'] = False
    plt_dat = [Xtrain[:,k],Xindoor[:,k]]
    fig, ax = multiple_histograms(plt_dat,labels=['train', 'indoor'],bins=50,xlabel=feature_names[k],plt_objs=(fig,ax),normalised=True,show=False,usetex=False)
    ax.legend()
    plt.savefig(f'compare_{feature_names[k]}_train_v_indoor.png',dpi=300,bbox_inches="tight")
    plt.show()

fig, ax = plt.subplots()
fig,ax = multiple_histograms([ytrain, yindoor],labels=['train', 'indoor'],bins=50,xlabel='PCE',plt_objs=(fig,ax),normalised=True,show=False,usetex=False)
plt.savefig(f'compare_PCE_train_v_indoor.png',dpi=300)
plt.show()



Index(['Donor', 'Acceptor', 'Light source', 'Condition', 'Voc (V)',
       'Jsc (mA cm^2)', 'FF(%)', 'PCE(%)', 'SMILES(donor)', 'SMILES(acceptor)',
       'Donor_HOMO(ev)', 'Donor_LUMO(ev)', 'Donor_bandgap(ev)',
       'Acceptor_HOMO(ev)', 'Acceptor_LUMO(ev)', 'Acceptor_bandgap(ev)',
       'Reference', 'Unnamed: 17'],
      dtype='object')
Index(['Donor', 'Acceptor', 'Light source', 'Condition', 'Voc (V)',
       'Jsc (mA cm^2)', 'FF(%)', 'PCE(%)', 'SMILES(donor)', 'SMILES(acceptor)',
       'Donor_HOMO(ev)', 'Donor_LUMO(ev)', 'Donor_bandgap(ev)',
       'Acceptor_HOMO(ev)', 'Acceptor_LUMO(ev)', 'Acceptor_bandgap(ev)',
       'Reference', 'Unnamed: 17'],
      dtype='object')
Index(['Donor', 'Acceptor', 'Light source', 'Condition', 'Voc (V)',
       'Jsc (mA cm^2)', 'FF(%)', 'PCE(%)', 'SMILES(donor)', 'SMILES(acceptor)',
       'Donor_HOMO(ev)', 'Donor_LUMO(ev)', 'Donor_bandgap(ev)',
       'Acceptor_HOMO(ev)', 'Acceptor_LUMO(ev)', 'Acceptor_bandgap(ev)',
       'Reference', 'Unnamed:

<IPython.core.display.Javascript object>

********** r =  0.332191574498221 **********


<IPython.core.display.Javascript object>

[plt_utils.histogram] dx = 0.17694747780727027
[plt_utils.histogram] dx = 0.08485088959987876


<IPython.core.display.Javascript object>

[plt_utils.histogram] dx = 0.21709476855088475
[plt_utils.histogram] dx = 0.14563113053080823


<IPython.core.display.Javascript object>

[plt_utils.histogram] dx = 0.21346792598377065
[plt_utils.histogram] dx = 0.085946981495607


<IPython.core.display.Javascript object>

[plt_utils.histogram] dx = 0.1100295594671504
[plt_utils.histogram] dx = 0.10983962811157993


<IPython.core.display.Javascript object>

[plt_utils.histogram] dx = 0.09010200513592466
[plt_utils.histogram] dx = 0.07256324152147847


<IPython.core.display.Javascript object>

[plt_utils.histogram] dx = 0.1109932185023845
[plt_utils.histogram] dx = 0.0784556448447026


<IPython.core.display.Javascript object>

[plt_utils.histogram] dx = 0.1773115798166873
[plt_utils.histogram] dx = 0.08413747259862982


<IPython.core.display.Javascript object>

[plt_utils.histogram] dx = 0.2155861489357749
[plt_utils.histogram] dx = 0.12338503816992094


<IPython.core.display.Javascript object>

[plt_utils.histogram] dx = 0.19599999999999998
[plt_utils.histogram] dx = 0.534


In [21]:
yrange = np.linspace(0,10, 100)

fig, ax = plt.subplots()
ax.scatter(yindoor,ypred_indoor)
ax.set_xlabel('PCE (%)')
ax.set_ylabel('Predicted PCE (%)')
ax.plot(yrange,yrange,'k--',lw=0.7)
ax.set_ylim([np.min(ytrain), np.max(ytrain)])
ax.set_xlim([np.min(ytrain), np.max(ytrain)])
plt.savefig('lgb_perf_indoor_smallPCE.png',dpi=300,bbox_inches="tight")
plt.show()


<IPython.core.display.Javascript object>

In [16]:

from qcnico.plt_utils import multiple_histograms
from data_utils import make_cc_data

db_path = 'data/CADAC.db'

Xcadac, ycadac = make_cc_data(db_path, normalize=True,minmax_scale=True)

ypred_cadac = model.predict(Xcadac)

plt.scatter(ycadac,ypred_cadac)
plt.xlabel('PCE (\%)')
plt.ylabel('Predicted PCE (\%)')
plt.show()

num_features = 8

for k in range(num_features):
    fig, ax = plt.subplots()
    plt_dat = [Xtrain[:,k],Xcadac[:,k]]
    multiple_histograms(plt_dat,labels=['train', 'cadac'],bins=50,xlabel=feature_names[k],plt_objs=(fig,ax),normalised=True)

fig, ax = plt.subplots()
multiple_histograms([ytrain, ycadac],labels=['train', 'cadac'],bins=50,xlabel='PCE',plt_objs=(fig,ax),normalised=True)


{'PCE': 0, 'HOMO': 0, 'LUMO': 0, 'ET1': 0, 'ND': 0, 'Acceptor': 'PC71BM', 'DH': 0, 'DL': 0, 'name': 'C1-A1-D1-A1-C1'}


AttributeError: 'AtomsRow' object has no attribute 'homo'

In [None]:
model_paths = ['weights/lgb_model','weights/PCE_model']
for mp in model_paths:
    print(f'----- {mp} -----')
    model = joblib.load(mp)
    print(type(model))
    pdict = model.get_params()
    for key in pdict.keys():
        print(f'{key} ---> {pdict[key]}')
    print('\n\n')