# Step 2: Cluster Network Tool
Used to determine cluster networks, coordination numbers, SAXS patterns from cluster distributions, cluster shape function distributions in PDF.

## Custom Imports
Relative import the custom classes to support the cluster network tool.

In [1]:
# Import and run the setup script
import sys, os

# Ensure the project root is in sys.path to locate setup_env
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

# Import the setup helper and configure the environment
from setup_env import setup_environment

# Capture the imported classes from setup_environment
TrajectoryProcessor, PDBFileHandler, Atom, ClusterNetwork, ClusterBatchAnalyzer = setup_environment()

Scripts directory '/Users/keithwhite/repos/MDScatter/scripts' has been added to sys.path.
Class imports have been set up.


## Cluster Network Usage
Used to identify and parse cluster networks from MD PDB files. 

### Save Individually Identified Clusters

In [2]:
# Define the path to the PDB file and initialize the handler
pdb_file_path = '/Users/keithwhite/repos/run_fullrmc/clustertool/example/PbI2_DMSO_0p8M_test.pdb'
core_residue_names = ['PBI']
shell_residue_names = ['DMS']
pdb_handler = PDBFileHandler(pdb_file_path, core_residue_names, shell_residue_names)

# Initialize ClusterNetwork with relevant parameters
node_elements = ['Pb']
linker_elements = ['Pb', 'I']
terminator_elements = ['I']
segment_cutoff = 3.8
cluster_network = ClusterNetwork(
    pdb_handler.core_atoms,
    pdb_handler.shell_atoms,
    node_elements,
    linker_elements,
    terminator_elements,
    segment_cutoff,
    core_residue_names,
    shell_residue_names
)

# Specify the inputs for coordination
target_elements = ['Pb']  # Elements in the main cluster to consider for coordination
neighbor_elements = ['O']  # Neighboring elements in the shell residues
distance_thresholds = {('Pb', 'O'): 3}  # Coordination thresholds

# Output directory for the PDB files
output_directory = '/Users/keithwhite/repos/run_fullrmc/clustertool/example/clusters'

# Write the cluster PDB files with the coordinated shell residues
cluster_network.write_cluster_pdb_files_with_coordinated_shell(pdb_handler, output_directory, target_elements, neighbor_elements, distance_thresholds, shell_residue_names)


FileNotFoundError: [Errno 2] No such file or directory: '/Users/keithwhite/repos/run_fullrmc/clustertool/example/PbI2_DMSO_0p8M_test.pdb'

### Save Individually Identified Clusters w/ Shell (Solvent) Molecules

In [None]:
# Define the path to the PDB file and initialize the handler
pdb_file_path = '/Users/keithwhite/repos/run_fullrmc/clustertool/example/PbI2_DMSO_0p8M_test.pdb'
core_residue_names = ['PBI']
shell_residue_names = ['DMS']
pdb_handler = PDBFileHandler(pdb_file_path, core_residue_names, shell_residue_names)

# Initialize ClusterNetwork with relevant parameters
node_elements = ['Pb']
linker_elements = ['Pb', 'I']
terminator_elements = ['I']
segment_cutoff = 3.7
cluster_network = ClusterNetwork(
    pdb_handler.core_atoms,
    pdb_handler.shell_atoms,
    node_elements,
    linker_elements,
    terminator_elements,
    segment_cutoff,
    core_residue_names,
    shell_residue_names
)

# Specify the inputs for coordination
target_elements = ['Pb']  # Elements in the main cluster to consider for coordination
neighbor_elements = ['O']  # Neighboring elements in the shell residues
distance_thresholds = {('Pb', 'O'): 3}  # Coordination thresholds

# Output directory for the PDB files
output_directory = '/Users/keithwhite/repos/run_fullrmc/clustertool/example/clusters'

# Write the cluster PDB files with the coordinated shell residues
cluster_network.write_cluster_pdb_files_with_coordinated_shell(pdb_handler, output_directory, target_elements, neighbor_elements, distance_thresholds, shell_residue_names)


### Batch Process MD Frames to Extract Clusters

In [None]:
import os

# Define the paths for input and output directories
# input_pdb_directory = '/Users/keithwhite/repos/run_fullrmc/md_sim/travis_md_results/PbI2_DMSO_0.8M_berendsen/trajectory_pdb'  # Path to the folder with PDB files
# output_directory = '/Users/keithwhite/repos/run_fullrmc/md_sim/travis_md_results/PbI2_DMSO_0.8M_berendsen/split_clusters_pdb_PbI3p4A'

input_pdb_directory = '/Users/keithwhite/repos/run_fullrmc/md_sim/travis_md_crnb/PbI2_DMSO_0.8M_cr_nb/split_pdb'  # Path to the folder with PDB files
output_directory = '/Users/keithwhite/repos/run_fullrmc/md_sim/travis_md_crnb/PbI2_DMSO_0.8M_cr_nb/clusters_pdb'

# Core and shell residue names (customize as needed)
core_residue_names = ['PBI']
shell_residue_names = ['DMS']

# Ensure the output directory exists
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Cluster network parameters
node_elements = ['Pb']
linker_elements = ['Pb', 'I']
terminator_elements = ['I']
segment_cutoff = 3.6

# Coordination inputs
target_elements = ['Pb']  # Elements in the main cluster to consider for coordination
neighbor_elements = ['O']  # Neighboring elements in the shell residues
distance_thresholds = {('Pb', 'O'): 3}  # Coordination thresholds

# Loop through all PDB files in the input directory
for pdb_file_name in os.listdir(input_pdb_directory):
    if pdb_file_name.endswith('.pdb'):
        pdb_file_path = os.path.join(input_pdb_directory, pdb_file_name)
        
        # Initialize the PDB handler
        pdb_handler = PDBFileHandler(pdb_file_path, core_residue_names, shell_residue_names)

        # Initialize ClusterNetwork with relevant parameters
        cluster_network = ClusterNetwork(
            pdb_handler.core_atoms,
            pdb_handler.shell_atoms,
            node_elements,
            linker_elements,
            terminator_elements,
            segment_cutoff,
            core_residue_names,
            shell_residue_names
        )

        # Write the cluster PDB files with the coordinated shell residues
        cluster_network.write_cluster_pdb_files_with_coordinated_shell(
            pdb_handler, 
            output_directory, 
            target_elements, 
            neighbor_elements, 
            distance_thresholds, 
            shell_residue_names
        )

print("Cluster isolation and PDB file generation complete.")

### Bond Length, Angle, Coordination Number Analysis

In [None]:
# Define the path to the PDB file and initialize the handler
pdb_file_path = '/Users/keithwhite/repos/run_fullrmc/clustertool/example/PbI2_DMSO_0p8M_test.pdb'
core_residue_names = ['PBI']
shell_residue_names = ['DMS']
pdb_handler = PDBFileHandler(pdb_file_path, core_residue_names, shell_residue_names)

# Initialize ClusterNetwork with relevant parameters
node_elements = ['Pb']
linker_elements = ['Pb', 'I']
terminator_elements = ['I']
segment_cutoff = 3.6
cluster_network = ClusterNetwork(
    pdb_handler.core_atoms,
    pdb_handler.shell_atoms,
    node_elements,
    linker_elements,
    terminator_elements,
    segment_cutoff,
    core_residue_names,
    shell_residue_names
)

# Analyze networks
networks = cluster_network.analyze_networks()

# Specify bond lengths and angles to calculate
# bond_length_pairs = [('Pb', 'I', 3.8), ('I', 'I', 7.6)]
bond_length_pairs = [('Pb', 'I', 3.8)] #, ('I', 'I', 7.6)]
bond_angle_triplets = [('I', 'Pb', 'I', 3.8)]

# Calculate and plot distributions
cluster_network.calculate_and_plot_distributions(bond_length_pairs, bond_angle_triplets)

# Analyze and print network details
networks = cluster_network.analyze_networks()
print("\nNetwork Analysis:")
print("Number of Networks:", len(networks))
for network_id in networks:
    print("Network ID:", network_id)
    branches = cluster_network.find_branches(network_id)
    branch_analysis = cluster_network.analyze_branches(branches)
    print("Number of Branches in Network:", branch_analysis["num_branches"])
    for branch_detail in branch_analysis["branch_details"]:
        print("Number of Segments in Branch:", branch_detail["num_segments"])
        print("Number of Motifs in Branch:", branch_detail["num_motifs"])

# Update atoms with network-specific residue names
updated_atoms = pdb_handler.core_atoms + pdb_handler.shell_atoms
for atom in updated_atoms:
    if atom.network_id:
        atom.residue_name = atom.network_id

# Apply the updated residue names to the atoms
pdb_handler.update_residue_names(updated_atoms)

# Optionally, write the modified PDB file with network-specific residue values
output_pdb_file_path = '/Users/keithwhite/repos/run_fullrmc/clustertool/example/output_PbI2_DMSO_0p8M_test.pdb'
pdb_handler.write_pdb_file(output_pdb_file_path)

# Calculate coordination numbers
target_elements = ['Pb']
neighbor_elements = ['I', 'O']
distance_thresholds = {('Pb', 'I'): 3.8, ('Pb', 'O'): 3}
coordination_stats, total_stats = cluster_network.calculate_coordination_numbers(target_elements, neighbor_elements, distance_thresholds)

# Print coordination numbers
cluster_network.print_coordination_numbers(coordination_stats, total_stats)

# Visualize the atom networks in 3D
cluster_network.visualize_networks()


In [None]:
# Define parameters for the heatmap
central_element = 'Pb'
x_pair = ('Pb', 'I')
y_pair = ('Pb', 'O')
x_range = (1, 6)  # Range for Pb-I coordination number
y_range = (1, 6)  # Range for Pb-O coordination number
distance_cutoffs = {('Pb', 'I'): 3.6, ('Pb', 'O'): 3}

# Calculate and plot the heatmap
cluster_network.calculate_and_plot_heatmap(central_element, x_pair, y_pair, x_range, y_range, distance_cutoffs)


## Cluster Batch Analysis
Obtain statistical distribution information on batches of isolated clusters.

#### Check available coordination numbers and oxidation states for input atoms.

In [None]:
from mendeleev import element

# Retrieve the element information for oxygen
oxygen = element('I')

# Fetch the ionic radii for oxygen in different oxidation states
ionic_radii = oxygen.ionic_radii

# Print the ionic radii for each oxidation state
for ir in ionic_radii:
    print(f"Oxidation State: {ir.charge}, Coordination: {ir.coordination}, Ionic Radius: {ir.ionic_radius} pm")


#### Calculate statistical distribution information on cluster population.

In [None]:
%matplotlib widget

# Define parameters for analysis
pdb_directory = '/Users/keithwhite/repos/run_fullrmc/md_sim/travis_md_crnb/PbI2_DMSO_0.8M_cr_nb/clusters_pdb'
target_elements = ['Pb']
neighbor_elements = ['O', 'I']
distance_thresholds = {
    ('Pb', 'O'): 3.0,  # Example threshold distances in angstroms
    ('Pb', 'I'): 3.6
}

# Define the charge dictionary, ensuring all charges are integers
partial_charges = {
    'Pb': (2, 6),    # Lead with a charge of 2+ and coordination number of 6
    'I': (-1, 6),    # Iodine with a charge of 1- and coordination number of 6
    'S': (-2, 6),    # Sulfur in DMSO with a neutral charge and coordination number of 2
    'O': (-2, 2),    # Oxygen in DMSO with a charge of 2- and coordination number of 2
    'C': (4, 4),     # Carbon in DMSO with a neutral charge and coordination number of 4
    'H': (1, 1)      # Hydrogen in DMSO with a neutral charge and coordination number of 1
}

# Instantiate the ClusterBatchAnalyzer class with the necessary parameters
analyzer = ClusterBatchAnalyzer(
    pdb_directory=pdb_directory,
    target_elements=target_elements,
    neighbor_elements=neighbor_elements,
    distance_thresholds=distance_thresholds,
    charges=partial_charges,
    core_residue_names=['PBI'], 
    shell_residue_names=['DMS'],
    volume_method='radius_of_gyration'  # Choose the radius of gyration method
)

# Step 1: Analyze clusters to compute coordination numbers and volumes
# Specify shape_type='sphere' to use spherical approximation with radius of gyration
coordination_stats_per_size = analyzer.analyze_clusters(shape_type='sphere')
# coordination_stats_per_size = analyzer.analyze_clusters(shape_type='ellipsoid')

# Step 2: Define a range of q-values in inverse angstroms
q_values = np.linspace(0.01, 1.4, 1000)

# Step 3: Plot the total I(q) vs. q on a log-log scale
analyzer.plot_total_iq(q_values)
analyzer.save_total_iq(q_values)

# Additional Steps (Optional):
# If you want to visualize the average volume vs. cluster size using the radius of gyration,
# you can call the corresponding plot method:
analyzer.plot_average_volume_vs_cluster_size_rg()
