In [25]:
# python env: modnenv_v2

import os
import string
import random
import numpy as np
from sklearn.model_selection import KFold
from pathlib import Path, PosixPath
import matplotlib.pyplot as plt
import json 
import pandas as pd
from copy import deepcopy
from modnet.models import EnsembleMODNetModel
from modnet.preprocessing import MODData
from modnet.hyper_opt import FitGenetic
from monty.serialization import dumpfn, loadfn
from pymatgen.ext.matproj import MPRester
from pymatgen.core.structure import Structure
import plotly.express as px
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error, mean_squared_error 
from scipy.stats import spearmanr
from IPython.display import Image
from tqdm import tqdm
import pickle
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [26]:
from importlib import reload

import modwrap as mdw

reload(mdw)

import acquilib as acq

reload(acq)

<module 'acquilib' from '/home/vtrinquet/Documents/Doctorat/JNB_Scripts_Clusters/NLO/HT/ref_idx/re2fractive/artificial/acquilib.py'>

In [27]:
path_re2f = Path('/home/vtrinquet/Documents/Doctorat/JNB_Scripts_Clusters/NLO/HT/ref_idx/re2fractive')

# Load the featurized MODData
md_featselec = (path_re2f / 'humanguided' / 'v0' / 'mod.data_refeatselec_v0_v2')

In [28]:
md = MODData.load(md_featselec)

2024-01-23 16:05:12,918 - modnet - INFO - Loaded <modnet.preprocessing.MODData object at 0x7f00f7a0a280> object, created with modnet version 0.4.1


In [29]:
md.df_featurized = md.df_featurized.iloc[:54]
md.df_targets = md.df_targets.iloc[:54]

In [34]:
model_params={
    'size_pop':2, # dflt 20
    'num_generations':2, # dflt 10
    'nested':0, # dflt = 5
    'n_jobs':2,
    'early_stopping':2, # dflt 4
    'refit':5, # dflt = 5
    'fast':False,
    }

Xt, Yt, Xp, Yp, results, model, scores_bk = mdw.actilearn(
    structures=None,
    ids=None,
    X=None,
    Y=None,
    md_feat=None,
    md_featselec=md,
    start_frac=None,
    start_n=50,
    start_set=None,
    start_state=42,
    ncycles=2,
    accuracy=None,
    accuracy_type=None,
    end_set=None,
    model_type=FitGenetic,
    model_params=model_params,
    cv_k=2,
    cv_state=42,
    acquisition=acq.exploration,
    acquisition_kwargs=None,
    acquisition_n=10,
    acquisition_frac=None,
    featurize_cycle=None,
    featurize_cv=None,
    featselec_cycle=None,
    featselec_cv=None,
)

Model already exists!
2024-01-23 16:09:00,598 - modnet - INFO - Loaded <modnet.models.ensemble.EnsembleMODNetModel object at 0x7f00e5bf2040> object, created with modnet version 0.4.1
Model already exists!
2024-01-23 16:09:00,703 - modnet - INFO - Loaded <modnet.models.ensemble.EnsembleMODNetModel object at 0x7f00ffcf2340> object, created with modnet version 0.4.1


In [13]:
def rankt(arg1, arg2='meh', **kwargs):
    # Implementation of rankt function
    # Use arg1, arg2, and kwargs as needed
    print(arg1)
    pass

def rankp(arg1, arg2='meh', arg3='3', **kwargs):
    # Implementation of rankp function
    # Use arg1, arg2, arg3, and kwargs as needed
    print(arg3)
    pass

def select(rank_function, tmp, **kwargs):
    # Call the specified rank function with arguments and kwargs
    print(tmp)
    return rank_function(**kwargs)

# Example usage:
kwargs_for_rank = {'arg1': 'value1', 'arg3': 'value2'}

# Using select with rankt
result_t = select(rankt, tmp='t', **kwargs_for_rank)

# Using select with rankp
result_p = select(rankp, tmp='p', **kwargs_for_rank)


t
value1
p
value2


In [56]:
from os import listdir
from os.path import isfile, join

def analysis(
      scores_dir_path=Path('./benchmark/scores')
      ):

    if (scores_dir_path / f"scores_overall.png").exists() and\
       (scores_dir_path / f"scores_overall.pdf").exists() and\
       (scores_dir_path / f"scores_unc_overall.png").exists() and\
       (scores_dir_path / f"scores_unc_overall.pdf").exists():
       print('Already analyzed!')
       return


    scores_all = {
       'mae_folds': [],
       'rmse_folds': [],
       'spr_folds': [],
       'mae_unc_folds': [],
       'rmse_unc_folds': [],
       'mae_avg': [],
       'rmse_avg': [],
       'spr_avg': [],
       'mae_unc_avg': [],
       'rmse_unc_avg': [],
    }

    scoresfiles = [f for f in listdir(scores_dir_path) if isfile(join(scores_dir_path, f)) and any(ch.isdigit() for ch in f)]
    for f in scoresfiles:
        scores_path = (scores_dir_path / f)

        with open(scores_path) as f:
            scores = json.load(f)

        scores_all['mae_folds'].append(scores['pred_mae'])
        scores_all['rmse_folds'].append(scores['pred_rmse'])
        scores_all['spr_folds'].append(scores['pred_spr'])
        scores_all['mae_unc_folds'].append(scores['unc_mae'])
        scores_all['rmse_unc_folds'].append(scores['unc_rmse'])
        scores_all['mae_avg'].append(np.mean(scores['pred_mae']))
        scores_all['rmse_avg'].append(np.mean(scores['pred_rmse']))
        scores_all['spr_avg'].append(np.mean(scores['pred_spr']))
        scores_all['mae_unc_avg'].append(np.mean(scores['unc_mae']))
        scores_all['rmse_unc_avg'].append(np.mean(scores['unc_rmse']))
    
    x = range(len(scores_all['mae_avg']))

    # Create subplots and unpack the Axes object
    fig, ax = plt.subplots()

    # Plot the data using the ax object
    ax.plot(x, -np.array(scores_all['mae_avg']), label='-MAE')
    ax.plot(x, -np.array(scores_all['rmse_avg']), label='-RMSE')
    ax.plot(x, scores_all['spr_avg'], label='+SPR')

    # Set labels for the axes
    ax.set_xlabel('# AL cycles', fontsize=14)
    ax.set_ylabel('Score', fontsize=14)

    # Add a legend
    ax.legend(fontsize=12)
    fig.savefig((scores_dir_path / f"scores_overall.png"))
    fig.savefig((scores_dir_path / f"scores_overall.pdf"))

    # Create subplots and unpack the Axes object
    fig, ax = plt.subplots()

    # Plot the data using the ax object
    ax.plot(x, -np.array(scores_all['mae_unc_avg']), label='-MAE')
    ax.plot(x, -np.array(scores_all['rmse_unc_avg']), label='-RMSE')

    # Set labels for the axes
    ax.set_xlabel('# AL cycles', fontsize=14)
    ax.set_ylabel('Score uncertainty', fontsize=14)

    # Add a legend
    ax.legend(fontsize=12)
    fig.savefig((scores_dir_path / f"scores_unc_overall.png"))
    fig.savefig((scores_dir_path / f"scores_unc_overall.pdf"))

    return scores_all
    
_ = analysis()


Already analyzed!


In [28]:
test = {'test1': [], 'test2': []}
print(test)
test['test1'].append(5)
test['test2'].append(5)
print(test)

{'test1': [], 'test2': []}
{'test1': [5], 'test2': [5]}
