# Step 4: SAXS Calculation from Cluster Distributions
Calculate the small-angle X-ray scattering (SAXS) of cluster network distributions.

---

## Custom Imports
Relative import the custom classes to support the cluster network tool.

In [None]:
# Import and run the setup script
import sys, os
import numpy as np
from typing import TypedDict, Optional, Dict

# Ensure the project root is in sys.path to locate setup_env
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

# Import the setup helper and configure the environment
from setup_env import setup_environment

# Capture the imported classes from setup_environment
BulkVolumeParams, BulkVolume, RadiusOfGyrationCalculator, PDBEditor, TrajectoryProcessor, PDBFileHandler, Atom, ClusterNetwork, ClusterBatchAnalyzer = setup_environment()

# SAXS Cluster Batch Analysis

#### (Optional) Validate Scattering Volume Estimates

In [None]:
# Sample Input Parameters
## PbI2 in DMSO Sample
solvent_name = 'DMS'
density_neat_solvent = 1.1    # g/cm³ for DMSO
molar_mass_solvent = 78.13    # g/mol for DMSO
molar_mass_solute = 461.0     # g/mol for PbI2

mass_percent_solute = 25.83   # 25.83% PbI2 by mass (approximately 0.8 M)
total_mass = 1.403            # grams of solution
density_solution = 1.403       # g/cm³

ionic_radii = {
    'Pb': 1.19,   # angstroms
    'I': 2.20      # angstroms
}
stoichiometry = {
    'Pb': 1,
    'I': 2
}
atomic_masses = {
    'Pb': 207.2,   # g/mol
    'I': 126.9      # g/mol
}
solute_residues = {
    'Pb': 'PBI',
    'I': 'PBI'
}

# Instantiate the BulkVolume class with the solvent name 'DMS' for DMSO
bulk_volume = BulkVolume(
    mass_percent_solute=mass_percent_solute,
    density_solution=density_solution,
    density_neat_solvent=density_neat_solvent,
    molar_mass_solvent=molar_mass_solvent,
    molar_mass_solute=molar_mass_solute,
    ionic_radii=ionic_radii,
    stoichiometry=stoichiometry,
    atomic_masses=atomic_masses,
    solute_residues=solute_residues,
    solvent_name=solvent_name,       # 3-letter uppercase string for DMSO
    total_mass=total_mass     # Optional: defaults to 100 g if not specified
)

# Perform Volume Estimation
volumes = bulk_volume.estimate_volumes()

### Setup for Electron Density Contrast Calculations

In [None]:
# Example electrons information to be added
electrons_info = {
    'DMS': {
        'Solvent': 42  # Example: DMSO solvent molecule has 42 electrons
    },
    'PBI': {
        'Pb': 80,     # Example: Pb2+ ion has 80 electrons
        'I': 54        # Example: I- ion has 54 electrons
    }
}

# Update the volumes dictionary with electrons per unit
updated_volumes = bulk_volume.add_electrons_per_unit(electrons_info)

# Optionally, print the updated volumes dictionary
import pprint
print("\nUpdated Volumes with Electrons:")
pprint.pprint(updated_volumes)

# Calculate electron density of the solution (solvent)
electron_density_solution = bulk_volume.calculate_solution_electron_density(electrons_info)

# Optionally, print the electron density dictionary for the solution
print("\nElectron Density of the Solution (Solvent):")
pprint.pprint(electron_density_solution)

### Initialize Cluster Batch Analyzer

#### Batch Analyzer Settings

In [2]:
%matplotlib widget

## -- DEFINE THE PDB FOLDER PATH
pdb_directory = '/Users/keithwhite/repos/MDScatter/data/PbI2_DMSO_0p8M_cr_nb/clusters_pdb_sc3p7_PbO-3'

## -- SETUP FOR FIRST COORDINATION SHELL
target_elements = ['Pb']
neighbor_elements = ['O', 'I']
distance_thresholds = {
    ('Pb', 'O'): 3.0,  # Threshold distance in angstroms
    ('Pb', 'I'): 3.7
}

## -- FOR CHARGE DISTRIBUTION CALCULATION
partial_charges = {
    'Pb': (2, 6),    # Lead with a charge of +2 and coordination number of 6
    'I': (-1, 1),    # Iodine with a charge of -1 and coordination number of 1
    'S': (0, 4),     # Sulfur in DMSO with a neutral charge and coordination number of 4
    'O': (0, 2),     # Oxygen in DMSO with a neutral charge and coordination number of 2
    'C': (0, 4),     # Carbon in DMSO with a neutral charge and coordination number of 4
    'H': (0, 1)      # Hydrogen in DMSO with a neutral charge and coordination number of 1
}

## -- DEFINE THE PARAMETERS FOR BULK VOLUME ESTIMATION FROM EXPERIMENTAL MEASUREMENTS
BulkVolumeParams = {
    'mass_percent_solute': 25.83,
    'density_solution': 1.403,          # g/cm³
    'density_neat_solvent': 1.1,        # g/cm³
    'molar_mass_solvent': 78.13,        # g/mol for DMSO
    'molar_mass_solute': 461.0,         # g/mol for PbI₂
    'ionic_radii': {
        'Pb': 1.19,    # Ionic radius in Å
        'I': 2.20
    },
    'stoichiometry': {
        'Pb': 1,
        'I': 2
    },
    'atomic_masses': {
        'Pb': 207.2,   # Atomic mass in g/mol
        'I': 126.9
    },
    'solute_residues': {
        'Pb': 'PBI',   # Residue name for solute atoms
        'I': 'PBI'
    },
    'solvent_name': 'DMS',   # 3-letter code for DMSO
    'total_mass': 100.0,     # Total mass of the solution in grams
    'electrons_info': {
        'DMS': {
            'Solvent': 42  # Electrons per DMSO molecule
        },
        'PBI': {
            'Pb': 80,     # Electrons per Pb²⁺ ion
            'I': 54       # Electrons per I⁻ ion
        }
    }
}

#### Initialization

In [None]:
# Instantiate the ClusterBatchAnalyzer class with the necessary parameters
analyzer = ClusterBatchAnalyzer(
    pdb_directory=pdb_directory,
    target_elements=target_elements,
    neighbor_elements=neighbor_elements,
    distance_thresholds=distance_thresholds,
    charges=partial_charges,
    core_residue_names=['PBI'], 
    shell_residue_names=['DMS'],
    # volume_method='ionic_radius'  # Choose the ionic radius estimation method
    # volume_method='radius_of_gyration'  # Choose the radius of gyration method
    volume_method='bulk_volume',  # Choose the radius of gyration method,
    bulk_volume_params = BulkVolumeParams
)

#### Cluster Distribution Estimates

In [None]:
coordination_stats_per_size = analyzer.analyze_clusters()

## Estimate Electron Density Contrast

In [None]:
## Count the total number of solvent molecules over a set of full MD frame PDB files - total solvent molecules.
## Count the total number of solvent molecules over a set of cluster files - coordinated solvent molecules.
## Calculate the total fraction of coordinated solvent, and the resulting free solvent molecule count.

## Estimate the volume fraction of the free solvent molecules.
## Estimate the electron density of the solvent based on the number of free solvent molecules in the estimated volume and electrons per solvent molecule.
## Estimate the electron density of each individual cluster from their estimated volume and the electron counts from the unique atoms, as well as the contrast for each cluster to the solvent (delta rho).

## Cluster contrast value and solvent electron density value will be used to calculate an f0 scaling function in the SAXS cluster analyzer.

## Classes:
## Solvent electron density estimation class: 
### Provide path to set of PDB full MD frames, the residue name of the solvent molecule, and iteratively use the PDBFileHandler and Atom class to count total molecules over all frames.
### Estimate the bulk solvent volume using the solvent density and molar mass of the solvent, this is the volume per solvent molecule.
### Use the solvent residue name, PDBFileHandler and Atom classes to determine the total number of solvent molecules per frame, and over all frames.
### Estimate the total volume of the solvent molecules per frame and over all frames. 
### Estimate the volume percentage of the solvent molecules per frame and over all frames. This will required knowing the box size for each frame, we can provide the sidewall dimension of the box. 
### Estimate the electron density from the number of molecules over all frames, electrons per molecule, and total volume of the molecules.

## Classes:
## Solution electron density estimate: 
### Provide path to PDB representative of a single MD frame, the residue name of the solvent molecule, and iteratively use the PDBFileHandler and Atom class to count total molecules over all frames.
### Estimate the bulk solvent volume using the solvent density and molar mass of the solvent, this is the volume per solvent molecule.
### Use the solvent residue name, PDBFileHandler and Atom classes to determine the total number of solvent molecules per frame, and over all frames.
### Estimate the total volume of the solvent molecules per frame and over all frames. 
### Estimate the volume percentage of the solvent molecules per frame and over all frames. This will required knowing the box size for each frame, we can provide the sidewall dimension of the box. 
### Estimate the electron density from the number of molecules over all frames, electrons per molecule, and total volume of the molecules.

## Calculate SAXS

### Method 1 

In [None]:
# Step 1: Analyze clusters to compute coordination numbers and volumes
# Specify shape_type='sphere' to use spherical approximation with radius of gyration
coordination_stats_per_size = analyzer.analyze_clusters(shape_type='sphere')
# coordination_stats_per_size = analyzer.analyze_clusters(shape_type='ellipsoid')

# Step 2: Define a range of q-values in inverse angstroms
q_values = np.linspace(0.01, 1.4, 1000)

# Step 3: Plot the total I(q) vs. q on a log-log scale
analyzer.plot_total_iq(q_values)
analyzer.save_total_iq(q_values)

# Additional Steps (Optional):
# If you want to visualize the average volume vs. cluster size using the radius of gyration,
# you can call the corresponding plot method:
analyzer.plot_average_volume_vs_cluster_size_rg()
