# Development of miscellaneous new features for polymerist

## Trying out `anytree`

In [1]:
import polymerist as ps
from polymerist import genutils, polymerist, duration
from polymerist.genutils import bits, typetools, importutils
from polymerist.genutils.decorators.functional import allow_string_paths

import json, math
import pkgutil, importlib

print(importutils.module_hierarchy(json))

# from anytree import RenderTree, Node, AsciiStyle, ContStyle, ContRoundStyle, DoubleStyle



No functional group SMARTS data from LUT found on system; regenerating from https://www.daylight.com/dayhtml_tutorials/languages/smarts/smarts_examples.html


├───decoder
├───encoder
├───scanner
└───tool


### Test for paths

In [2]:
from polymerist.genutils.treetools import AbstractNodeCorrespondence, compile_tree_factory, copy_tree

from typing import Iterable, Optional
from anytree import Node
from pathlib import Path
from polymerist.genutils.decorators.functional import allow_string_paths

class PathToNodeCorrespondence(AbstractNodeCorrespondence, FROMTYPE=Path):
    '''Concrete implementation of how to produce filetrees from pathlib Paths'''
    def name(self, path : Path) -> str:
        return path.name
    
    def has_children(self, path : Path) -> bool:
        return path.is_dir()
    
    def children(self, path) -> Iterable[Path]:
        return path.iterdir()
    
make_file_tree = allow_string_paths(compile_tree_factory(PathToNodeCorrespondence(), class_alias='directory', obj_attr_name='file'))
# make_file_tree = compile_tree_factory(PathToNodeCorrespondence(), treename='directory')


@allow_string_paths # an explicit implementation just to see what an AbstractNodeCorrespondence simplifies
def file_tree(path : Path, max_depth : Optional[int]=None, _curr_depth : int=0) -> Node:
    '''Compiled a directory tree from a toplevel path. 
    
    Any subdirectories encountered will be expanded into their own tree,
    up to the specified maximum depth or until exhaustion if max_depth=None'''
    path_node = Node(
        name=path.name,
        file_path=path, # NOTE: can't name this attribute "path", as "path" is already an attribute fo the base Node class
    )

    if path.is_dir() and ( # recursively add subnodes IFF
            (max_depth is None)             # 1) no depth limit is set, or
            or (_curr_depth < max_depth)    # 2) a limit IS set, but hasn't been reached yet
        ): 
        for subpath in path.iterdir():
            subpath_node = file_tree(subpath, max_depth=max_depth, _curr_depth=_curr_depth+1)
            subpath_node.parent = path_node

    return path_node

In [5]:
import polymerist as ps
from anytree.render import RenderTree, ContRoundStyle

main_path = Path(ps.__file__).parent
main_path = str(main_path)
ft = make_file_tree(main_path, max_depth=None, stop=lambda path : path.is_file() or path.name == '__pycache__')
# ct = copy_tree(ft, stop=lambda node : node.file.is_file() or node.file.name == '__pycache__')

rt = RenderTree(ft, style=ContRoundStyle())
print(rt.by_attr('name'))

polymerist
├── genutils
│   ├── typetools
│   ├── decorators
│   ├── textual
│   ├── logutils
│   ├── fileutils
│   │   ╰── jsonio
│   ╰── sequences
│       ├── discernment
│       ╰── similarity
├── unitutils
├── rdutils
│   ├── bonding
│   ├── labeling
│   ├── reactions
│   ╰── rdcoords
├── mdtools
│   ├── lammpstools
│   ├── openfftools
│   │   ├── solvation
│   │   │   ╰── solvents
│   │   ╰── partialcharge
│   │       ╰── rescharge
│   ╰── openmmtools
├── maths
│   ├── linearalg
│   ├── greek
│   ├── lattices
│   ├── combinatorics
│   ├── fractions
│   ╰── numbersys
├── tests
├── analysis
├── smileslib
│   ╰── functgroups
├── polymers
│   ├── smidgelib
│   ╰── monomers
╰── graphics


### Better typehinting for Filters

In [None]:
from typing import Any, Callable, Generic, NewType, TypeAlias, TypeVar
from anytree.node import Node

T = TypeVar('T')
Filter : TypeAlias = Callable[[Any], bool]
class Filter(Generic[T]):
    def __call__(self, obj : T) -> bool:
        return False

NULL_FILTER : Filter = lambda inp : False # a filter which doesn't do anything (but has the right call signature)


In [None]:
def test(fil : Filter[int]) -> list[str]:
    return ['a']

help(test)

In [None]:
Filter[int]

### Test for Python modules

In [None]:
from types import ModuleType
from typing import Iterable, Optional, TypeAlias, Union

import inspect
import pkgutil
import importlib, importlib.machinery
from importlib.machinery import SourceFileLoader, FileFinder

from pathlib import Path
from anytree import Node


# EXTRACTING INFO FROM A SINGLE IMPORTED MODULE
def flexible_module_pass(module : Union[str, Path, ModuleType]) -> ModuleType:
    '''Flexible interface for supplying a ModuleType object as an argument
    Allows for passing a name (either module name or string path), Path location, or a module proper'''
    if isinstance(module, ModuleType):
        return module
    elif isinstance(module, str):
        pass
    elif isinstance(module, Path):
        pass
    else:
        raise TypeError(f'Cannot interpret object of type "{type(module).__name__}" as a module')

def is_package(module : ModuleType) -> bool:
    '''Check whether a Python module is a package (i.e. contains other importable modules within itself)'''
    # module_spec = getattr(module, '__spec__', None) # this doesn't work when a string is passed in
    module_spec = importlib.util.find_spec(module.__name__)
    if module_spec is None:
        raise ValueError(f'No ModuleSpec found for {module}')

    module_loader = module_spec.loader
    # module_loader = pkgutil.get_loader(module) # NOTE: while more compact than above, this function is slated for deprecation
    if module_loader is None:
        raise ValueError(f'No SourcefileLoader found for {module}')

    return module_loader.is_package(module.__name__)

# TODO : find way to get depth of submodule in toplevel ("number of dots" before standalone name)
def relative_module_name(module : ModuleType, relative_to : Optional[ModuleType]=None, remove_leading_dot : bool=True) -> str:
    '''Gets the name of a module relative to another (presumably toplevel) module
    If the given module is not in the path of the toplevel module, will simply return as module.__name__'''
    rel_mod_name = module.__name__
    if relative_to is not None:
        toplevel_prefix = relative_to.__name__
        if remove_leading_dot:
            toplevel_prefix += '.' # append dot to prefix to remove it later
        rel_mod_name = rel_mod_name.removeprefix(toplevel_prefix)

    return rel_mod_name


# COMPILING MODULE TREES FOR FULL PACKAGES
def module_tree(module : ModuleType, blacklist : Optional[Iterable[str]]=None, relative_to : Optional[ModuleType]=None, max_depth : Optional[int]=None, _curr_depth : int=0) -> Optional[Node]:
    '''Create a tree for a module and all its submodules, to a set depth and with optional blacklisting by module name'''
    if blacklist is None:
        blacklist = []

    # TODO: figure out way to get loader (or FileFinder?) for toplevel
    module_is_pkg = is_package(module)
    module_name = relative_module_name(module, relative_to=relative_to, remove_leading_dot=True)

    module_node = Node(
        name=module_name,
        module=module,
    )
    if module_is_pkg and ( # recursively add subnodes IFF
            (max_depth is None)             # 1) no depth limit is set, or
            or (_curr_depth < max_depth)    # 2) a limit IS set, but hasn't been reached yet
        ):
        for (submodule_loader, submodule_name, sub_is_pkg) in pkgutil.iter_modules(module.__path__):
            if submodule_name not in blacklist: # TOSELF: also worth checking the full __name__? (requires importing a potentially blacklisted module which isn't great)
                submodule = importlib.import_module(f'.{submodule_name}', package=module.__package__)
                submodule_node = module_tree(submodule, blacklist=blacklist, relative_to=module, max_depth=max_depth, _curr_depth=_curr_depth+1)
                submodule_node.parent = module_node

    return module_node


In [None]:
from typing import Callable, Any

Filter : TypeAlias = Callable[[Any], bool] # TODO: move this to somewhere in typetools
NULL_FILTER : Filter = lambda inp : False # a filter which doesn't do anything (but has the right call signature)


In [None]:
import json, math
mods = (
    ps,
    polymerist, 
    genutils,
    duration,
    bits,
    typetools,
    json,
    math,
)

for mod in mods:
    print(mod.__name__, is_package(mod))
    print('\t', getattr(mod, '__file__', None), getattr(mod, '__path__', None))
    mspec = mod.__spec__
    par_mod = importlib.find_loader(mspec.parent).load_module()
    print('\t', mspec.origin, mspec.parent, mspec.name, relative_module_name(mod, relative_to=par_mod))

In [None]:
from anytree import RenderTree, Node, AsciiStyle, ContStyle, ContRoundStyle, DoubleStyle


mt = module_tree(genutils, blacklist=('decorators',), max_depth=None, relative_to=ps)
rt = RenderTree(mt)
print(rt.by_attr('name'))

In [None]:
main_path = Path(ps.__file__).parent
filetree = file_tree_from_path(main_path, max_depth=None)

rt = RenderTree(filetree, style=ContRoundStyle())
print(rt.by_attr('name'))

## Testing polymerist importability within environment

In [None]:
import numpy as np
from openff.toolkit import Molecule, Topology, ForceField

In [None]:
import polymerist as ps
from polymerist.genutils import pyimports, importutils

import pandas as pd
print(importutils.module_hierarchy(ps))

In [None]:
import nglview

print(nglview.__version__)
nglview.demo()

In [None]:
from polymerist.polymers.monomers import specification
from rdkit import Chem

smi = 'CCO-c1ccccc1-N=C=C'
mol1 = Chem.MolFromSmiles(smi)
display(mol1)

sma = specification.expanded_SMILES(smi, assign_map_nums=True)
exp_sma = specification.compliant_mol_SMARTS(sma)
mol2 = Chem.MolFromSmarts(sma)
display(mol2)


In [None]:
from openff.toolkit import Molecule

offmol = Molecule.from_smiles(smi)
offmol.generate_conformers(n_conformers=1)
offmol.visualize(backend='nglview')

## Dynamically reading all import statements in codebase

In [None]:
import polymerist as ps
from polymerist.genutils import pyimports, importutils

print(importutils.module_hierarchy(ps))

In [None]:
infos = pyimports.extract_imports_from_module(ps)

df = pd.DataFrame.from_records([info.__dict__ for info in infos])
df.to_csv('test.csv')

In [None]:
nonrel = [info for info in infos if not info.is_relative and info.parent_module is None]
len(nonrel)

In [None]:
import sys

imported_names = set(info.object_name for info in nonrel)
imported_names

registered_builtins = set(sys.builtin_module_names)
registered_stdlibs = set(sys.stdlib_module_names)

nb_imports = imported_names - registered_builtins - registered_stdlibs
nb_imports

# Another thing

In [None]:
import nglview

nglview.demo()