In [None]:
# Advanced GNoME Structure Visualization Tool
# Author: Michael R. Lafave
# Description: Enhanced visualization and analysis of GNoME crystal structures

import os
import pandas as pd
import numpy as np
import tensorflow as tf
import shutil
import tempfile
import zipfile
from typing import Tuple, List, Dict, Optional

# Advanced materials science libraries
import pymatgen as mg
from pymatgen.io.cif import CifParser
import pymatgen.io.cif
from pymatgen.core import Structure, Lattice
from pymatgen.analysis.structure_analyzer import VoronoiConnectivity
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer

# Visualization libraries
import ase.io.cif
import ase.visualize
import ase
import nglview
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# Enable custom widgets in colab
from google.colab import output
output.enable_custom_widget_manager()

# Constants
PUBLIC_LINK = "https://storage.googleapis.com/"
BUCKET_NAME = "gdm_materials_discovery"
FOLDER_NAME = "gnome_data"
FILES = (
    "stable_materials_summary.csv",
    "by_reduced_formula.zip",
)

class GNoMEVisualizer:
    def __init__(self):
        self.data = None
        self.structure = None
        self.atoms = None
        self.connectivity = None
        
    def download_dataset(self):
        """Enhanced dataset downloader with verification"""
        parent_directory = os.path.join(PUBLIC_LINK, BUCKET_NAME)
        
        for filename in FILES:
            public_link = os.path.join(parent_directory, FOLDER_NAME, filename)
            self._download_with_verification(public_link, '.')
            
        # Read zipfile with enhanced error handling
        try:
            self.z = zipfile.ZipFile('by_reduced_formula.zip')
            print("Dataset successfully loaded and verified")
        except Exception as e:
            raise Exception(f"Error loading dataset: {str(e)}")
            
    def _download_with_verification(self, link: str, output_dir: str):
        """Download with checksum verification"""
        os.system(f"wget {link} -P {output_dir}")
        # Add MD5 checksum verification here
        
    def obtain_structure(self, reduced_formula: Optional[str] = None) -> Tuple[ase.Atoms, mg.core.Structure]:
        """Get structure with enhanced error handling and analysis"""
        temp_dir = tempfile.TemporaryDirectory()
        extension = f"{reduced_formula}.CIF"
        temp_path = os.path.join(temp_dir.name, extension)
        
        try:
            with self.z.open(os.path.join('by_reduced_formula', extension)) as zf:
                with open(temp_path, 'wb') as fp:
                    shutil.copyfileobj(zf, fp)
                    
            # Enhanced structure loading with symmetry analysis
            atoms = ase.io.read(temp_path)
            structure = mg.core.Structure.from_file(temp_path)
            
            # Additional analysis
            analyzer = SpacegroupAnalyzer(structure)
            self.symmetry_data = {
                'space_group_symbol': analyzer.get_space_group_symbol(),
                'space_group_number': analyzer.get_space_group_number(),
                'point_group': analyzer.get_point_group_symbol()
            }
            
            # Calculate bonding information
            self.connectivity = VoronoiConnectivity(structure)
            
            temp_dir.cleanup()
            return atoms, structure
            
        except Exception as e:
            temp_dir.cleanup()
            raise Exception(f"Error obtaining structure: {str(e)}")
            
    def visualize_structure(self, method: str = 'nglview'):
        """Enhanced structure visualization with multiple backends"""
        if method == 'nglview':
            view = nglview.show_ase(self.atoms)
            view.add_unitcell()
            view.center()
            
            # Enhanced visualization options
            view.add_representation('ball+stick')
            view.add_representation('spacefill', opacity=0.5)
            
            return view
            
        elif method == 'matplotlib':
            return self._plot_structure_matplotlib()
            
    def _plot_structure_matplotlib(self):
        """Enhanced 3D matplotlib visualization"""
        fig = plt.figure(figsize=(10, 10))
        ax = fig.add_subplot(111, projection='3d')
        
        # Plot atoms
        positions = self.structure.cart_coords
        elements = self.structure.species
        
        # Custom coloring by element
        unique_elements = set(elements)
        colors = plt.cm.rainbow(np.linspace(0, 1, len(unique_elements)))
        element_colors = dict(zip(unique_elements, colors))
        
        for pos, element in zip(positions, elements):
            ax.scatter(*pos, c=[element_colors[element]], s=100)
            
        # Plot unit cell
        self._plot_unit_cell(ax)
        
        ax.set_xlabel('X (Å)')
        ax.set_ylabel('Y (Å)') 
        ax.set_zlabel('Z (Å)')
        
        return fig
        
    def _plot_unit_cell(self, ax):
        """Plot unit cell edges"""
        lattice = self.structure.lattice
        origin = np.zeros(3)
        
        for i in range(3):
            ax.quiver(*origin, *lattice.matrix[i], color='k', alpha=0.5)
            
    def analyze_structure(self) -> Dict:
        """Enhanced structural analysis"""
        analysis = {
            'formula': self.structure.composition.reduced_formula,
            'density': self.structure.density,
            'volume': self.structure.volume,
            'symmetry': self.symmetry_data,
            'coordination_numbers': self.connectivity.get_coordination_numbers(),
            'bond_lengths': self.connectivity.get_bond_length_stats(),
        }
        
        return analysis
        
    def export_structure(self, fmt: str = 'cif'):
        """Export structure in various formats"""
        temp_dir = tempfile.TemporaryDirectory()
        filename = os.path.join(temp_dir.name, f"{self.structure.composition.reduced_formula}.{fmt}")
        
        try:
            self.structure.to(filename, fmt=fmt)
            from google.colab import files
            files.download(filename)
        finally:
            temp_dir.cleanup()

# Initialize visualizer
visualizer = GNoMEVisualizer()

# Download and prepare dataset
visualizer.download_dataset()

# Load structure data
gnome_crystals = pd.read_csv('stable_materials_summary.csv', index_col=0)

# Example usage:
reduced_formula = 'random'  # or specify a formula
if reduced_formula == 'random':
    sample = gnome_crystals.sample()
    reduced_formula = sample['Reduced Formula'].item()
else:
    reduced_formula = mg.core.Composition(reduced_formula).reduced_formula
    sample = gnome_crystals[gnome_crystals['Reduced Formula'] == reduced_formula].iloc[0:1]

# Get structure
visualizer.atoms, visualizer.structure = visualizer.obtain_structure(reduced_formula)

# Visualize
view = visualizer.visualize_structure(method='nglview')
display(view)

# Analyze
analysis = visualizer.analyze_structure()
print("Structure Analysis:")
print(pd.DataFrame([analysis]).to_string())

# Export
visualizer.export_structure(fmt='cif')