In [None]:
import pandas as pd
from pymatgen.core.periodic_table import Element
import math
import numpy as np

def get_data():
    """pymatgenの元素物性料を得る．

    linear scaleとlog scaleの物性値がある．log scaleのはlog10_Xという名前になる．

    Returns:
        pd.Data: データ
    """
    columns_use = ['liquid_range',
                   'mineral_hardness', 'molar_volume', 'poissons_ratio',
                   'density_of_solid',  'critical_temperature', 'rigidity_modulus',
                   'brinell_hardness',
                   'boiling_point', 'bulk_modulus',
                   'X',
                   'superconduction_temperature', 'melting_point',
                   'velocity_of_sound', 'reflectivity',
                   'youngs_modulus']
    log_columns_use = [
                   'electrical_resistivity', 'thermal_conductivity',
                   'vickers_hardness',
                   'coefficient_of_linear_thermal_expansion']
    

                           
    element_properteis = {}
    for Z in range(1,103):
        elm = Element("H").from_Z(Z)    
        print(Z,elm)
        properties = {}
        for prop in columns_use:
            try:
                value_unit = elm.__getattr__(prop)
                if value_unit is not None:
                    if isinstance(value_unit,str):
                        s = value_unit.split()
                        try:
                            value = float(s[0])
                        except ValueError:
                            value = None
                    else:
                        value = float(value_unit)
                    if value is not None:
                        properties[prop] = value
            except AttributeError:
                #print("no", prop)
                pass 
        for prop in log_columns_use:
            try:
                value_unit = elm.__getattr__(prop)
                if value_unit is not None:
                    value = float(value_unit)
                    print(prop, value_unit)
                    value = math.log10(value)
                    properties["log10_"+prop] = value
            except AttributeError:
                # print("no", prop)]
                pass
        value = elm.X
        if value is not None and not np.isnan(value):
            properties["X"] = value
        element_properteis[str(elm)] = properties

    return element_properteis

g_properties =get_data()

In [None]:
pd.DataFrame(g_properties).T.to_csv("data/atomicprop.csv")

In [None]:

def make_items(v, ndiv):
    """ndiv分割したitemを作る．

    ただし，[ < v[0], >v[-1] ]しか無いのでndiv=3が現実的な選択である．

    Args:
        v (list[float]): データ
        ndiv (int): 分割数

    Returns:
        list[str]: itemリスト
    """

    vlist = []

    v = np.sort(v)
    len_v = v.shape[0]
    for d in range(1, ndiv):
        vlist.append(v[len_v*d//ndiv])
    
    itemlist = []
    # for value in vlist:
    #    for ineq in [">","<"]:
    #        itemlist.append("{}{:.2f}".format(ineq,value))
    itemlist.append("{}{:.2f}".format("<", vlist[0]))
    itemlist.append("{}{:.2f}".format(">", vlist[-1]))
    return itemlist

g_df = pd.DataFrame(g_properties).T

def make_min_max(df, ndiv=3):
    """make min and max. values.
    
    Args:
        properties (dict): element properties.
        
    Returns:
        dict: {element: (min, max)}
    
    """
    prop_minmax = {}
    for col in df.columns:
        values = df[col].values
        values = values[~np.isnan(values)]
        prop_minmax[col] = make_items(values, ndiv)
    return prop_minmax
                
g_prop_minmax = make_min_max(g_df)

def make_transaction(df2, itemlists):
    """make transaction, or query sentenses, and their results

    Args: 
        df2 (pd.DataFrame): data
        itemlists (list): a list of items

    Returns:
        list: transaction
    """
    labellist = []
    df_list = []
    lenindex = df2.shape[0]
    for prop in itemlists.keys():
        for item in itemlists[prop]:
            # print(prop,item)


            qstr = "{}{}".format(prop, item)
            
            v = np.array([False for i in range(lenindex)])
            df = pd.DataFrame(v[np.newaxis,:], 
                                columns=df2.index.tolist(), index=[qstr]).T
            
            idx = df2.query(qstr).index.tolist()
            df.loc[idx, qstr] = True

            df_list.append(df)
    df3 = pd.concat(df_list, axis=1)
    display(df3)
    
    transaction = {}
    for id_ in df3.index:
        transaction1 = []
        for col in df3.columns:
            flag = df3.loc[id_, col]
            if flag:
                transaction1.append(str(col))
        transaction[id_]=transaction1

    return transaction


g_transaction = make_transaction(g_df, g_prop_minmax)
g_transaction


In [None]:
g_transaction

In [None]:
import json
with open("data/atom_transaction.json", "w") as f:
    json.dump(g_transaction,f)


In [None]:
def get_booldata():
    """pymatgenの元素物性量(is_...)を得る．

    Returns:
        pd.Data: データ
    """
    category_columns_use = ["is_actinoid", "is_alkali","is_alkaline", "is_chalcogen",
                           "is_halogen", "is_lanthanoid", "is_metal", "is_metalloid",
                           "is_noble_gas", "is_post_transition_metal", "is_quadrupolar",
                            "is_rare_earth_metal", "is_transition_metal"]
    not_category_columns_use = []    
    
    element_properteis = {}
    for Z in range(1,103):
        elm = Element("H").from_Z(Z)
        properties = []
        for prop in category_columns_use:
            value = getattr(elm,prop)
            if value:
                properties.append(prop)
        for prop in not_category_columns_use:
            alue = getattr(elm,prop)
            if not value:
                properties.append("not_"+prop)
        element_properteis[str(elm)] = properties
    return element_properteis

g_bool_transaction = get_booldata()

In [None]:
g_bool_transaction

In [None]:
def dict_update(transaction: dict, transaction_add: dict):
    newtransaction = {}
    for elm,value in transaction.items():
        value.extend(transaction_add[elm])
        newtransaction[elm] = value
    return newtransaction
g_transaction = dict_update(g_transaction, g_bool_transaction )

In [None]:
g_transaction

In [None]:
import json
with open("data/atom_transaction_additional.json", "w") as f:
    json.dump(g_transaction,f)
