In [11]:
import pandas as pd
import random

# Step 1: Load Data from Excel
def load_chemical_data(file_path):
    chemical_data = pd.read_excel(file_path)
    return chemical_data['Compound'].tolist()  # Assuming the column containing compound names is named 'Compound'

# Step 2: Random Reactant Selection
def select_random_reactants(chemical_data, max_reactants=3):
    reactants = random.sample(chemical_data, min(len(chemical_data), max_reactants))
    return reactants

# Step 3: Random Product Selection
def select_random_products(reactants, chemical_data):
    # Ensure that products include some of the selected reactants
    num_products = random.randint(1, len(reactants))
    products = random.sample(reactants, num_products) + random.sample(chemical_data, random.randint(1, len(chemical_data)))
    return products

# Step 4: Generate Chemical Equation
def generate_chemical_equation(reactants, products):
    equation = ' + '.join(reactants) + ' -> ' + ' + '.join(products)
    return equation

# Step 5: Output
def main():
    # Load chemical compounds from Excel file
    file_path = 'compData.xlsx'  # Replace with the path to your Excel file
    chemical_data = load_chemical_data(file_path)
    
    # Select random reactants and products
    reactants = select_random_reactants(chemical_data)
    products = select_random_products(reactants, chemical_data)
    
    # Generate chemical equation
    chemical_equation = generate_chemical_equation(reactants, products)
    
    # Output
    print("Reactants:", reactants)
    print("Products:", products)
    print("Chemical Equation:", chemical_equation)

if __name__ == "__main__":
    main()


Reactants: ['LiH2PO3', 'BaSeO3', 'Mg(VO3)2']
Products: ['LiH2PO3', 'BaSeO3', 'C27H33N3O5', 'Th(SO4)2', 'C26H38O6', 'F4Mo', 'FeC10H10', 'Zn(AsO2)2', 'C8H6BrN', 'CsClO4', 'TlI3', 'Na2HPO4', 'FOTh', 'BeS', 'C4H4O', 'C8H9NO2', 'WI4', 'ZnTiO3', 'LiHSO4', 'H2S2O5', 'Zn3(PO4)2', 'AgCl3Cu2', 'C30H42O11', '(NH2)2CO·HClO4', 'CuSe', 'Al2Te', 'C4H6N2', 'WC', 'C37H48N2O8', 'NaNO3', 'RbIO4', 'AsCl3O', 'C3H6O2', 'C19H22N2', 'K2S', 'C4H6N2', 'F3Rh', 'C10H8', 'XeOF2', 'AsH3', 'C6H3Cl3O', 'CH2ClCOOH', 'Co2B', 'C19H14N2O4', 'Ga2Te3', 'BrF5', 'Ce2C3', 'C4H10O2', 'C5H4N4O2', 'RbIO3', 'C11H8O2', 'F2Ni', 'FW', 'EuNbO2', 'FeVO4', 'C5H9NO', 'C7H9BO3', 'Au(OH)3', 'C3N3(OH)3', 'C20H34O5', 'Zn(IO3)2', 'YbBr2', 'WBr2', 'Mg(ClO2)2', 'DyCl3', 'Co2S3', 'EuI2', 'AgBr', 'F3Fe', 'H2SO3', 'ZnCl2', 'C16H30N4O4S', 'C41H50N2O11', 'F2O', 'B2H6', 'HC5H5N+', 'Cu(VO3)2', 'C12H18N4O2', 'F7NS', 'C17H13ClN4', 'NaVO3', 'KAsO2', 'CH2CO', 'CuTeO3', 'PoCl4', 'TeY', 'KNO2', 'AsClO', 'HgTe', 'C10H13N5O4', 'C21H26O4', 'BrI', 'ZrS2', 'C52

In [17]:
import pandas as pd
import random

# Step 1: Load Unique Compounds
def load_unique_compounds(file_path):
    unique_compounds = pd.read_excel(file_path)['Compound'].tolist()
    return unique_compounds

# Step 2: Generate Random Reaction
def generate_random_reaction(compounds):
    # Randomly select reactants and products
    num_reactants = random.randint(1, min(len(compounds), 3))
    num_products = random.randint(1, min(len(compounds), 3))
    reactants = random.sample(compounds, num_reactants)
    products = random.sample(compounds, num_products)
    
    # Create reaction string
    reaction = ' + '.join(reactants) + ' -> ' + ' + '.join(products)
    return reaction

# Step 3: Dataset Creation
def create_dataset(compounds, num_equations):
    dataset = []
    for _ in range(num_equations):
        reaction = generate_random_reaction(compounds)
        dataset.append({'Reaction': reaction})
    return pd.DataFrame(dataset)

# Main Function
def main():
    # Load unique compounds
    file_path = 'compData.xlsx'  # Replace with the path to your Excel file
    unique_compounds = load_unique_compounds(file_path)
    
    # Create dataset
    num_equations = 10000
    dataset = create_dataset(unique_compounds, num_equations)
    
    # Save dataset to CSV
    dataset.to_csv('chemical_equations_dataset.csv', index=False)
    
    # Display dataset summary
    print("Dataset Summary:")
    print(dataset.head())
    print(dataset.info())

if __name__ == "__main__":
    main()


Dataset Summary:
                                            Reaction
0                       Na2TeO4 -> DLi + WI4 + HgBr2
1                NH4OH + FMo -> C20H19N + Cr2(TeO4)3
2  Sn(VO3)2 + C10H12N2 + Tm2(SO4)3 -> C10H9NO2 + ...
3  C21H25NO4 + Rb2CO3 + NaC6H7O7 -> CoI2 + C15H13...
4                      LiBr·2H2O -> LiIO3 + Sr(IO2)2
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 1 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Reaction  10000 non-null  object
dtypes: object(1)
memory usage: 78.2+ KB
None
