In [1]:
%load_ext autoreload
%autoreload 2

In [14]:
from python_tools import file_utils as filu
from python_tools import module_utils as modu
from python_tools import inspect_utils as iu
from python_tools import package_utils as pku
from python_tools import pathlib_utils as plu

# Complete process of cleaning module

In [None]:

"""
Purpose: Take an unordered module file and to clean it up
so avoids circular imports that could result in errors. And
also make it look professional


---- old ----
1) Get all of the modules used
2) Divide the modules into those referenced by outside packages and those not
3) Get all of the member variables
4) Get all of the doc strings
5) Arrange in the following order:
--------------


1) Run the replacement to put the right qualifiers in front of a module 
using the right package
2) Get all the docstrings and replace in the file with nothing
3) Find all of the module at the beginning of the line
4) Determine the package that each module came from (if it did have one)
5) Determine the parent module (if any)
6) Put the correct prefix in front of module if not already have it
- and add the relative one to those inside current package

--------------------------------------------
7) Why can't go collect current variables?
- because there could be lists  that screw up the regex
* the variables may sometimes depend on another class (but handle those on case by case)
--------------------------------------------

8) Replace the docstrng and the modules (at the begginning of line) with empty strings
9) write the data in following order
- at beginning: non-referencing modules, doc strings
- at end: referencing modules

non-referencing modules
#local veraibles
doc strings
rest of code
referencing modules
itself

-- things going to think about: 
those not at the start of the line --> do not want using the dot operator
for relative (other than that everything else should be the same)
"""

In [None]:
from python_tools import file_utils as filu
filu.search_directory_files_for_str(
    "/python_tools/python_tools/",
    search_str="tqdm_utils import tqdm"
)

In [351]:
filepath = "/python_tools/python_tools/networkx_utils.py"
verbose = True
overwrite = False

# Part 0: Copy and paste document into new file if not overwrite

In [352]:
from pathlib import Path
from python_tools import system_utils as su


filepath= Path(filepath)
curr_mod = Path(filepath).stem

if not overwrite:
    output_path = filepath.parents[0] / Path(f"{filepath.stem}_replaced{filepath.suffix}")
    su.copy_file(filepath,output_path)
else:
    output_path = filepath

if verbose:
    print(f"output_path = {output_path}")

output_path = /python_tools/python_tools/networkx_utils_replaced.py


# 1) Run replacement to put right qualifiers in front of something

In [353]:
packages = [
    "/python_tools/python_tools/",
    "/machine_learning_tools/machine_learning_tools/",
    "/pytorch_tools/pytorch_tools/",
    "/graph_tools/graph_tools/",
    "/meshAfterParty/meshAfterParty/",
    "/neuron_morphology_tools/neuron_morphology_tools/",
]

pkg_to_module = {k.split("/")[1]:pku.module_names_from_directories(k)
                for k in packages}

directory = packages[0]
package_name = directory.split("/")[1]
modules = pku.module_names_from_directories(directory)

modules

['filtering_utils',
 'json_utils',
 'matplotlib_utils',
 'statistics_visualizations',
 'requirement_utils',
 'networkx_utils',
 'pretty_print_confusion_matrix',
 'numpy_utils',
 'ipyvolume_movie_utils',
 'dict_utils',
 'regex_utils',
 'hash_utils',
 'inspect_utils',
 'statistics_utils',
 'general_utils',
 'string_utils',
 'algorithms_utils',
 'ipyvolume_utils',
 'pandas_utils',
 'pathlib_utils',
 'system_utils',
 'package_utils',
 'scipy_utils',
 'seaborn_utils',
 'matlab_utils',
 'argparse_utils',
 'module_utils',
 'function_utils',
 'networkx_utils_replaced',
 'file_utils',
 'data_struct_utils',
 'linalg_utils',
 'widget_utils',
 'dj_utils',
 'example_re',
 'tqdm_utils',
 'mesh_utils']

In [354]:
from python_tools import regex_utils as ru
from python_tools import file_utils as filu

word_comb = ru.word_pattern
#modules = ["general_utils","numpy_utils"]
modules_or = f"(?:{'|'.join(modules)})"

# easy_import_pattern = (
# f"(?:(import {modules_or} as {word_comb}))"
# f"|(?:from {word_comb} (import {modules_or} as {word_comb}))"
# f"|(?:(import {modules_or}))"
# f"|(?:from {word_comb} (import {modules_or}))"  
# )

# this pattern will not match the proceeding and then in 
# group 1 will match the 
easy_import_pattern = (f"(?:from {word_comb} )?"
f"(import {modules_or} as {word_comb}|import {modules_or})")

easy_replacement = fr"from {package_name} \1"

if verbose:
    print(f"-- easy initial replacement --")
filu.file_regex_replace(
    pattern = easy_import_pattern,
    replacement = easy_replacement,
    filepath = output_path,
    overwrite_file = True,
    verbose = verbose
)

if verbose:
    print(f"-- harder initial replacement --")

harder_import_pattern = f"(?:from [.]+({modules_or} import {word_comb}))"
harder_replacement = fr"from {package_name}.\1"
filu.file_regex_replace(
    pattern = harder_import_pattern,
    replacement = harder_replacement,
    filepath = output_path,
    overwrite_file = True,
    verbose = verbose
)

if verbose:
    print(f"-- easier relative replacement --")

easy_import_relative = f"({ru.start_of_line_pattern})from {package_name} "
easy_replacement_relative = fr"\1from . "

filu.file_regex_replace(
    pattern = easy_import_relative,
    replacement = easy_replacement_relative,
    filepath = output_path,
    overwrite_file = True,
    verbose = verbose
)

if verbose:
    print(f"-- harder relative replacement --")
    
harder_import_relative = f"({ru.start_of_line_pattern})from {package_name}."
harder_replacement_relative = fr"\1from ."

filu.file_regex_replace(
    pattern = harder_import_relative,
    replacement = harder_replacement_relative,
    filepath = output_path,
    overwrite_file = True,
    verbose = verbose
)

-- easy initial replacement --
# of substitutions = 27
  --> output_filepath = /python_tools/python_tools/networkx_utils_replaced.py
-- harder initial replacement --
# of substitutions = 1
  --> output_filepath = /python_tools/python_tools/networkx_utils_replaced.py
-- easier relative replacement --
# of substitutions = 7
  --> output_filepath = /python_tools/python_tools/networkx_utils_replaced.py
-- harder relative replacement --
# of substitutions = 1
  --> output_filepath = /python_tools/python_tools/networkx_utils_replaced.py


PosixPath('/python_tools/python_tools/networkx_utils_replaced.py')

# Getting all docstrings (and replace with empty)

In [355]:
from python_tools import string_utils as stru


above_first_func_def = True

data = filu.read_file(output_path)
multi_line_comm = modu.multiline_str(
    filepath = output_path,
    verbose = verbose,
    return_text=False,
    above_first_func_def = above_first_func_def,
)

range_list = [k.span() for k in multi_line_comm]

data_doc = stru.remove_range_list(
    data,
    range_list=range_list,
    verbose = False,
)

all_doc = [modu.multiline_str_text(obj) for obj in multi_line_comm]


    
#data_doc = f"{all_doc}\n{data_doc}"
#filu.write_file(output_path,data=data_doc,replace=True)

# of multi-line strings = 1


# Reorganizing the modules in the file

In [356]:
"""
Purpose: To retrieve all of the modules, 
and move the unique list to the top and bottom 
of module

Pseudocode: 
1) get a unique list of all the modules
2) remove all instances of those modules that occur at beginnings of the line
3) sort the modules into relative and non-relative
4) Remove the own file from the list
5) Create a prefix 
"""

'\nPurpose: To retrieve all of the modules, \nand move the unique list to the top and bottom \nof module\n\nPseudocode: \n1) get a unique list of all the modules\n2) remove all instances of those modules that occur at beginnings of the line\n3) sort the modules into relative and non-relative\n4) Remove the own file from the list\n5) Create a prefix \n'

In [362]:
import numpy as np
finds = modu.find_import_modules_in_file(
    data = data_doc,
    unique = True,
    verbose = verbose,
    beginning_of_line = True,
    
)

finds = list(np.sort(finds))

module_pattern = fr"{ru.start_of_line_pattern}({'|'.join(finds)})"
data_doc_no_mod, count = re.subn(
    pattern = module_pattern,
    repl="",
    string=data_doc,
)

if verbose:
    print(f"# of modules replaced = {count}")
    
pkg_list = list(pkg_to_module.keys())

non_pkg_mods = []
pkg_mods = dict()
own_mod = []

for k in finds:
    if curr_mod in k:
        own_mod.append(k)
        continue
    if "from ." in k:
        if package_name not in pkg_mods:
            pkg_mods[package_name] = []
        pkg_mods[package_name].append(k)
        continue
        
    for pkg in pkg_list:
        if pkg in k:
            if pkg not in pkg_mods:
                pkg_mods[pkg] = []
            pkg_mods[pkg].append(k)
            continue
    non_pkg_mods.append(k)
    

non_pkg_mods_str = "\n".join(non_pkg_mods)
own_mod_str = "\n".join(own_mod)
pkg_mods_str = "\n\n".join([f"--- from {pkg} ---\n" + "\n".join(m)
                           for pkg,m in pkg_mods.items() ])

if verbose:
    print(non_pkg_mods_str)
    print(f"\n")
    print(pkg_mods_str)
    print(f"\n")
    print(own_mod_str)
    
separator = f"\n\n"

if len(all_doc) > 0:
    all_doc_str = "\n\n".join(all_doc)
    all_doc_str = f"'''{all_doc_str}\n'''{separator}"
else:
    all_doc_str = ""
    
if len(non_pkg_mods_str) > 0:
    non_pkg_mods_str += separator
if len(pkg_mods_str) > 0:
    pkg_mods_str += separator
    
final_data = (
    all_doc_str +
    non_pkg_mods_str + 
    data_doc_no_mod +
    pkg_mods_str + 
    own_mod_str
)

# of matches (unique = True) = 21
# of modules replaced = 21
from copy import deepcopy
from networkx.classes.function import path_weight as pw
from networkx.drawing.nx_pydot import graphviz_layout
import copy
import itertools
import matplotlib.pyplot as plt
import networkx as nx
import networkx.classes.function as cls_func
import numpy as np
import pandas as pd
import pydot
import random
import time


--- from python_tools ---
from . import general_utils as gu
from . import numpy_utils as nu
from . import pandas_utils as pu
from . import regex_utils as ru
from . import string_utils as stru
from . import tqdm_utils as tqu
from .tqdm_utils import tqdm


from . import networkx_utils as xu


In [363]:
# Write to a new file
filu.write_file(
    output_path,
    final_data,
    replace=True,
)

# 1) Get all of the modules used

In [38]:
modules_used = modu.find_import_modules_in_file(
    filename=filepath,
    pattern = pattern_import,
    verbose = verbose,
    unique = True
)
modules_used

# of matches (unique = True) = 21


['from .tqdm_utils import tqdm',
 'import itertools',
 'import pandas as pd',
 'from . import general_utils as gu',
 'import networkx.classes.function as cls_func',
 'import numpy as np',
 'from . import string_utils as stru',
 'import pydot',
 'from . import pandas_utils as pu',
 'from . import regex_utils as ru',
 'from copy import deepcopy',
 'from networkx.classes.function import path_weight as pw',
 'import networkx as nx',
 'import time',
 'import matplotlib.pyplot as plt',
 'import random',
 'import copy',
 'from networkx.drawing.nx_pydot import graphviz_layout',
 'from . import networkx_utils as xu',
 'from . import tqdm_utils as tqu',
 'from . import numpy_utils as nu']

# 2) Divide modules into referenced outside of package and those not

In [None]:
modules_directory = [
    "/meshAfterParty/meshAfterParty/",
    "/python_tools/python_tools/",
    "/graph_tools/graph_tools/",
    "/neuron_morphology_tools/neuron_morphology_tools/",
    "/pytorch_tools/pytorch_tools/",
]


#modu.prefix_module_imports_in_files

In [37]:
directory = "/python_tools/python_tools/"

modules = modu.modules_from_directory(
    directory = directory,
    verbose = verbose
)

pattern_import = modu.import_pattern_str(
    #modules = modules,
    verbose = verbose
)
py_tools_imports = modu.find_import_modules_in_file(
    filename=filepath,
    pattern = pattern_import,
    verbose = verbose,
    unique = False
)
py_tools_imports

# of modules = 36
import_str = (?:\A|\n)((?:import [a-zA-Z._]+ as [a-zA-Z._]+)|(?:from [a-zA-Z._]+ import [a-zA-Z._]+ as [a-zA-Z._]+)|(?:import [a-zA-Z._]+)|(?:from [a-zA-Z._]+ import [a-zA-Z._]+))
# of matches (unique = False) = 21


['from . import general_utils as gu',
 'from . import numpy_utils as nu',
 'from . import pandas_utils as pu',
 'from . import regex_utils as ru',
 'from . import string_utils as stru',
 'from . import tqdm_utils as tqu',
 'from .tqdm_utils import tqdm',
 'from copy import deepcopy',
 'from networkx.classes.function import path_weight as pw',
 'from networkx.drawing.nx_pydot import graphviz_layout',
 'import copy',
 'import itertools',
 'import matplotlib.pyplot as plt',
 'import networkx as nx',
 'import networkx.classes.function as cls_func',
 'import numpy as np',
 'import pandas as pd',
 'import pydot',
 'import random',
 'import time',
 'from . import networkx_utils as xu']

# 3) Get all the member variables

In [12]:
from python_tools import networkx_utils as xu
iu.global_vars(xu,verbose = True)

# of global variables = 3


['downstream_name', 'node_id_default', 'upstream_name']

In [None]:
"""
Can we detect member variables without the module? no not necessarily
"""

# 4) Get all docstrings

In [6]:
multi_line_comm = modu.multiline_str(
    filepath = filepath,
    verbose = verbose,
    return_text = True
)

# of multi-line strings = 3
