In [1]:
import tensorflow as tf
from tensorflow.python.keras import layers, losses
import os
import json
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

### Model settings

In [2]:
first_category = "formula"
second_category = "name"
data_folder = "formula-name"

### Data Standarization

In [3]:
@tf.keras.utils.register_keras_serializable()
def data_standardization(input_data): # CH3-CH=CH-CH(NO2)Br
    input_data = tf.strings.lower(input_data) # ch3-ch=ch-ch(no2)br
    input_data = tf.strings.regex_replace(input_data, "[^a-zà-ú]", ' ') # ch  ch ch ch no  br
    return tf.strings.regex_replace(input_data, "\s+", ' ') # ch ch ch ch no br

In [4]:
loaded_model = tf.keras.models.load_model("models/" + data_folder + "-model")

In [10]:
def predict_categories(examples):
    for example in examples:
        prediction = loaded_model.predict([example])[0][0] * 100
        category = first_category if prediction < 50 else second_category
        print(category, "(" + "%.2f" % prediction + "%):", example)

In [15]:
inorganic_names = [
    "arsenito diacido de sodio",
    "hipoclorito de sodio",
    "potasiuro de boro",
    "cloruro de sodio",
    "acido disulfuroso",
    "sulfurico"
]

predict_categories(inorganic_names) # 98.21%

name (100.00%): arsenito diacido de sodio
name (100.00%): hipoclorito de sodio
name (100.00%): potasiuro de boro
name (100.00%): cloruro de sodio
name (51.57%): acido disulfuroso
name (51.57%): sulfurico


In [11]:
organic_names = [
    "arsano",
    "benceno",
    "naftaleno",
    "2-cloropentanato",
    "cloruro de propilo",
    "di 2-cloropentanil éter",
    "2-bromo-2-cloropropano",
    "metanoato de isopropilo",
    "orto-difenilciclohexano",
    "2-bromo-2-cloropropil yododecil éter",
    "3-cloro-2-fluoro-hexa-1,3-dien-5-in-1-ona",
    "4-amino-2,6,6-tricloro-7,7-difluoro-89-metil-3-nitro-1,1-diyodononaconta-1,3-dien-5-ona",
]

predict_categories(organic_names)

name (51.57%): arsano
name (99.98%): benceno
name (51.57%): naftaleno
name (51.57%): 2-cloropentanato
name (100.00%): cloruro de propilo
name (100.00%): di 2-cloropentanil éter
name (100.00%): 2-bromo-2-cloropropano
name (100.00%): metanoato de isopropilo
name (51.57%): orto-difenilciclohexano
name (100.00%): 2-bromo-2-cloropropil yododecil éter
name (100.00%): 3-cloro-2-fluoro-hexa-1,3-dien-5-in-1-ona
name (100.00%): 4-amino-2,6,6-tricloro-7,7-difluoro-89-metil-3-nitro-1,1-diyodononaconta-1,3-dien-5-ona


In [12]:
organic_formulas = [
    "Ch3ChCh(Ch3ChCh3)ChCh",
    "H3C-CH2",
    "H3C-CH2-CH2",
    "h c c h",
    "h c c h h c c h",
    "CH3-CO-O-CH2-CH3",
    "CH3-CH2-O-CH2-CH3",
    "CH3-CH2-CH=CH-COOH",
    "ch3chch2ch(ch2ch2ch3)cooh",
    "ch3(Ch3)Chch2Ch(Ch3)Ch2Ch(Ch2Ch2Ch3)Ch3",
]

predict_categories(organic_formulas)

formula (0.00%): Ch3ChCh(Ch3ChCh3)ChCh
formula (0.00%): H3C-CH2
formula (0.00%): H3C-CH2-CH2
formula (0.00%): h c c h
formula (0.00%): h c c h h c c h
formula (0.00%): CH3-CO-O-CH2-CH3
formula (0.00%): CH3-CH2-O-CH2-CH3
formula (0.00%): CH3-CH2-CH=CH-COOH
formula (0.00%): ch3chch2ch(ch2ch2ch3)cooh
formula (0.00%): ch3(Ch3)Chch2Ch(Ch3)Ch2Ch(Ch2Ch2Ch3)Ch3


In [13]:
organic_names = [
    "arsano",
    "benceno",
    "naftaleno",
    "2-cloropentanato",
    "cloruro de propilo",
    "di 2-cloropentanil éter",
    "2-bromo-2-cloropropano",
    "metanoato de isopropilo",
    "orto-difenilciclohexano",
    "2-bromo-2-cloropropil yododecil éter",
    "3-cloro-2-fluoro-hexa-1,3-dien-5-in-1-ona",
    "4-amino-2,6,6-tricloro-7,7-difluoro-89-metil-3-nitro-1,1-diyodononaconta-1,3-dien-5-ona",
]

predict_categories(organic_names)

name (51.57%): arsano
name (99.98%): benceno
name (51.57%): naftaleno
name (51.57%): 2-cloropentanato
name (100.00%): cloruro de propilo
name (100.00%): di 2-cloropentanil éter
name (100.00%): 2-bromo-2-cloropropano
name (100.00%): metanoato de isopropilo
name (51.57%): orto-difenilciclohexano
name (100.00%): 2-bromo-2-cloropropil yododecil éter
name (100.00%): 3-cloro-2-fluoro-hexa-1,3-dien-5-in-1-ona
name (100.00%): 4-amino-2,6,6-tricloro-7,7-difluoro-89-metil-3-nitro-1,1-diyodononaconta-1,3-dien-5-ona


In [22]:
inorganic_formulas = [
    "mn2(Hpo3)3",
    "NaCl",
    "H2SO4",
    "CL203",
    "znCO2",
    "H2O",
    "FeNA2",
    "G2S2O5",
    "Cl2O⁺H2O",
]

predict_categories(inorganic_formulas)

formula (0.00%): mn2(Hpo3)3
name (51.57%): NaCl
formula (0.00%): H2SO4
formula (0.01%): CL203
name (51.57%): znCO2
formula (0.00%): H2O
name (51.57%): FeNA2
formula (3.05%): G2S2O5
formula (0.00%): Cl2O⁺H2O
