# SMILES and PDB ID Input Notebook

This notebook provides a simple interface to:

1. Enter a SMILES (Simplified Molecular Input Line Entry System) string representing a chemical structure
2. Enter a PDB (Protein Data Bank) ID
3. Save these inputs to variables for further processing

The input cells include validation to ensure proper formatting.

**Instructions:**
- Run the cell below
- Enter the requested information when prompted
- The values will be stored in variables named `smiles` and `pdb_id`

In [None]:
# Input cell for SMILES string and PDB ID
import re

def validate_smiles(smiles):
    """Basic validation for SMILES string"""
    if not smiles:
        return False
    
    # Check if string contains typical SMILES characters
    # This is a basic check and not comprehensive
    pattern = r'^[A-Za-z0-9@+\-\[\]\(\)\.=#$:%\/\\]+$'
    return bool(re.match(pattern, smiles))

def validate_pdb_id(pdb_id):
    """Validate PDB ID format - typically 4 characters, alphanumeric"""
    if not pdb_id:
        return False
        
    # Standard PDB IDs are 4 characters long and alphanumeric
    pattern = r'^[A-Za-z0-9]{4}$'
    return bool(re.match(pattern, pdb_id))

# Get SMILES input with validation
while True:
    smiles_input = input("Enter SMILES string: ")
    if validate_smiles(smiles_input):
        smiles = smiles_input
        break
    else:
        print("Invalid SMILES format. Please try again.")

# Get PDB ID input with validation
while True:
    pdb_input = input("Enter PDB ID (e.g., 4HHB): ")
    if validate_pdb_id(pdb_input):
        pdb_id = pdb_input.upper()  # PDB IDs are conventionally uppercase
        break
    else:
        print("Invalid PDB ID format. A valid PDB ID is 4 characters long and alphanumeric.")

# Print confirmation
print("\nInputs successfully saved:")
print(f"SMILES: {smiles}")
print(f"PDB ID: {pdb_id}")
print("\nThese values are now stored in the variables 'smiles' and 'pdb_id'")