In [None]:
# Cell to test functionality
# Import necessary modules from RDKit
from rdkit import Chem
from rdkit.Chem import Descriptors

def generate_molecular_properties(smiles):
    # Parse the SMILES string to create a molecule object
    mol = Chem.MolFromSmiles(smiles)
    
    if mol is None:
        raise ValueError("Invalid SMILES string")
    
    # Calculate various molecular properties
    properties = {
        'Molecular Weight': Descriptors.MolWt(mol),
        'LogP': Descriptors.MolLogP(mol),
        'Number of Hydrogen Bond Donors': Descriptors.NumHDonors(mol),
        'Number of Hydrogen Bond Acceptors': Descriptors.NumHAcceptors(mol),
        'Number of Rotatable Bonds': Descriptors.NumRotatableBonds(mol),
        'Topological Polar Surface Area': Descriptors.TPSA(mol),
        'Number of Rings': Descriptors.RingCount(mol)
    }
    
    return properties

# Example usage
smiles = "CCO"  # Ethanol
properties = generate_molecular_properties(smiles)
for prop, value in properties.items():
    print(f"{prop}: {value}")

In [None]:
# Cell to validate that we can generate Mordred descriptors in the descriptors.txt file
import sys
sys.path.append('.')

from mordred import Calculator, descriptors
import pandas as pd
import numpy as np

def main():
    # Load columns from the file
    wanted_columns = list(open('descriptors.txt', 'r').readline().strip().split('\t'))
    print(f"Total columns in the file: {len(wanted_columns)}")

    # Get all available Mordred descriptor names
    calc = Calculator(descriptors)
    all_descriptor_names = [str(desc) for desc in calc.descriptors]
    print(f"Total Mordred descriptors available: {len(all_descriptor_names)}")

    # Find how many of the file columns match Mordred descriptors
    matching_columns = [col for col in wanted_columns if col in all_descriptor_names]
    print(f"Columns matching Mordred descriptors: {len(matching_columns)}")

    # Print some diagnostic information
    print("\nSample non-matching columns:")
    non_matching = [col for col in wanted_columns if col not in all_descriptor_names]
    print(non_matching[:20])  # Print first 20 non-matching columns

if __name__ == '__main__':
    main()

In [None]:
import pandas as pd
from mordred import Calculator, descriptors
from rdkit import Chem
import numpy as np

def generate_mordred_descriptors(smiles_list, columns=None):
    """
    Generate Mordred descriptors for a list of SMILES strings.
    
    Parameters:
    -----------
    smiles_list : list
        List of SMILES strings to calculate descriptors for
    columns : list, optional
        List of specific columns to include
    
    Returns:
    --------
    pandas.DataFrame
        DataFrame containing molecular descriptors for each SMILES string
    """
    # Create a calculator with all available descriptors
    calc = Calculator(descriptors)
    
    # If columns are specified, filter the descriptors
    if columns:
        # Filter descriptors to match the specified columns
        filtered_descriptors = [desc for desc in descriptors if str(desc) in columns]
        calc = Calculator(filtered_descriptors)
    
    # Prepare results
    results = []
    
    # Calculate descriptors for each SMILES string
    for smiles in smiles_list:
        # Convert SMILES to RDKit molecule
        mol = Chem.MolFromSmiles(smiles)
        
        if mol is not None:
            # Calculate descriptors
            try:
                desc_values = calc(mol)
                # Convert to dictionary, adding SMILES as first column
                desc_dict = {'SMILES': smiles, **dict(desc_values)}
                results.append(desc_dict)
            except Exception as e:
                print(f"Error calculating descriptors for {smiles}: {e}")
        else:
            print(f"Invalid SMILES string: {smiles}")
    
    # Convert to DataFrame
    df = pd.DataFrame(results)
    
    # Ensure all specified columns are present, fill with NaN if missing
    if columns:
        for col in columns:
            if col not in df.columns:
                df[col] = np.nan
        
        # Reorder columns to match the original specification
        df = df[['SMILES'] + [col for col in columns if col != 'SMILES']]
    
    return df

def load_column_names(file_path):
    """
    Load column names from a tab-separated file.
    
    Parameters:
    -----------
    file_path : str
        Path to the file containing column names
    
    Returns:
    --------
    list
        List of column names
    """
    with open(file_path, 'r') as f:
        # Read the first line and split by tab
        columns = f.readline().strip().split('\t')
    return columns

def main():
    # Example usage
    smiles_list = [
        'CC(=O)OC1=CC=CC=C1C(=O)O',  # Aspirin
        'CN1C=NC2=C1C(=O)N(C(=O)N2C)C',  # Caffeine
        'CC(C)(C)NCC(O)C1=CC(=C(C=C1)O)CO'  # Salbutamol
    ]
    
    # Generate descriptors
    descriptors_df = generate_mordred_descriptors(smiles_list)
    
    # Save to CSV (optional)
    descriptors_df.to_csv('mordred_descriptors.csv', index=False)
    
    # Print first few rows and basic info
    print(descriptors_df)
    print("\nTotal descriptors calculated:", len(descriptors_df.columns) - 1)  # -1 for SMILES column

if __name__ == '__main__':
    main()

# Note: This script requires the following libraries:
# - mordred
# - rdkit
# - pandas

# To install dependencies:
# pip install mordred rdkit pandas

In [None]:
import pandas as pd
from mordred import Calculator, descriptors
from rdkit import Chem
import numpy as np

def generate_mordred_descriptors(smiles_list, columns=None):
    """
    Generate Mordred descriptors for a list of SMILES strings.
    
    Parameters:
    -----------
    smiles_list : list
        List of SMILES strings to calculate descriptors for
    columns : list, optional
        List of specific columns to include
    
    Returns:
    --------
    pandas.DataFrame
        DataFrame containing molecular descriptors for each SMILES string
    """
    # Create a calculator with all available descriptors
    calc = Calculator(descriptors)
    
    # If columns are specified, filter the descriptors
    if columns:
        # Filter descriptors to match the specified columns
        filtered_descriptors = [desc for desc in descriptors if str(desc) in columns]
        calc = Calculator(filtered_descriptors)
    
    # Prepare results
    results = []
    
    # Calculate descriptors for each SMILES string
    for smiles in smiles_list:
        # Convert SMILES to RDKit molecule
        mol = Chem.MolFromSmiles(smiles)
        
        if mol is not None:
            # Calculate descriptors
            try:
                desc_values = calc(mol)
                # Convert to dictionary, adding SMILES as first column
                desc_dict = {'SMILES': smiles, **dict(desc_values)}
                results.append(desc_dict)
            except Exception as e:
                print(f"Error calculating descriptors for {smiles}: {e}")
        else:
            print(f"Invalid SMILES string: {smiles}")
    
    # Convert to DataFrame
    df = pd.DataFrame(results)
    
    # Ensure all specified columns are present, fill with NaN if missing
    if columns:
        for col in columns:
            if col not in df.columns:
                df[col] = np.nan
        
        # Reorder columns to match the original specification
        df = df[['SMILES'] + [col for col in columns if col != 'SMILES']]
    
    return df

def load_column_names(file_path):
    """
    Load column names from a tab-separated file.
    
    Parameters:
    -----------
    file_path : str
        Path to the file containing column names
    
    Returns:
    --------
    list
        List of column names
    """
    with open(file_path, 'r') as f:
        # Read the first line and split by tab
        columns = f.readline().strip().split('\t')
    return columns

def main():
    # Example usage
    smiles_list = [
        'CC(=O)OC1=CC=CC=C1C(=O)O',  # Aspirin
        'CN1C=NC2=C1C(=O)N(C(=O)N2C)C',  # Caffeine
        'CC(C)(C)NCC(O)C1=CC(=C(C=C1)O)CO'  # Salbutamol
    ]
    
    
    # Generate descriptors
    descriptors_df = generate_mordred_descriptors(smiles_list)
    
    # Save to CSV (optional)
    descriptors_df.to_csv('mordred_descriptors.csv', index=False)
    
    # Print first few rows and basic info
    print(descriptors_df)
    print("\nTotal descriptors calculated:", len(descriptors_df.columns) - 1)  # -1 for SMILES column

main()

In [None]:
from rdkit import Chem
from mordred import Calculator, descriptors
import pandas as pd

def load_descriptors(file_path):
    """
    Load descriptor names from a tab-separated text file.
    
    Parameters:
    -----------
    file_path : str
        Path to the file containing descriptor names
    
    Returns:
    --------
    list
        List of descriptor names
    """
    with open(file_path, 'r') as f:
        # Read the first line and split by tab
        descriptor_list = f.readline().strip().split('\t')
    return descriptor_list

def generate_mordred_descriptors(smiles_list, descriptor_names):
    """
    Generate specified Mordred descriptors for a list of SMILES strings.
    
    Parameters:
    -----------
    smiles_list : list
        List of SMILES strings
    descriptor_names : list
        List of descriptor names to calculate
    
    Returns:
    --------
    DataFrame
        DataFrame containing the specified descriptors
    """
    valid_descriptors = []
    for name in descriptor_names:
        try:
            valid_descriptors.append(getattr(descriptors, name))
        except AttributeError:
            print(f"Warning: Descriptor '{name}' is not valid and will be skipped.")
    
    calc = Calculator(valid_descriptors, ignore_3D=True)
    mols = [Chem.MolFromSmiles(smiles) for smiles in smiles_list]
    descriptors_df = calc.pandas(mols)
    descriptors_df.insert(0, 'SMILES', smiles_list)
    return descriptors_df

def main():
    # Load descriptor names from a tab-separated text file
    descriptor_names = load_descriptors('descriptors.txt')
    
    # Example usage
    smiles_list = [
        'CC(=O)OC1=CC=CC=C1C(=O)O',  # Aspirin
        'CN1C=NC2=C1C(=O)N(C(=O)N2C)C',  # Caffeine
        'CC(C)(C)NCC(O)C1=CC(=C(C=C1)O)CO'  # Salbutamol
    ]
    
    # Generate descriptors
    descriptors_df = generate_mordred_descriptors(smiles_list, descriptor_names)
    
    # Save to CSV (optional)
    descriptors_df.to_csv('mordred_descriptors.csv', index=False)
    
    # Print first few rows and basic info
    print(descriptors_df)
    print("\nTotal descriptors calculated:", len(descriptors_df.columns) - 1)  # -1 for SMILES column

main()

In [5]:
import modal

app = modal.App("example-get-started")


@app.function()
def square(x):
    print("This code is running on a remote worker!")
    return x**2


@app.local_entrypoint()
def main():
    print("the square is", square.remote(42))

In [6]:
print("the square is", square.remote(42))

ExecutionError: Function has not been hydrated with the metadata it needs to run on Modal, because the App it is defined on is not running.