In [1]:
from tools.toollib import ast_parser
from pathlib import Path

pkg_root = Path("msticpy/common")
file_imports = {}
all_mod_imports = {}


def _analyze_module_imports(py_file):
    file_analysis = ast_parser.analyze(py_file)

    # create a set of all imports
    all_imports = {file.strip() for file in file_analysis["imports"] if file}
    all_imports.update(
        file.strip() for file in file_analysis["imports_from"].keys() if file
    )

    if None in all_imports:
        all_imports.remove(None)  # type: ignore
    return file_analysis, all_imports


pkg_py_files = list(pkg_root.glob("**/*.py"))
print(f"processing {len(pkg_py_files)} modules")
for py_file in pkg_py_files:
    file_analysis, module_imports = _analyze_module_imports(py_file)
    # add the external imports for the module
    mod_name = ".".join(py_file.relative_to(pkg_root).parts)
    all_mod_imports[mod_name] = module_imports
    file_imports[mod_name] = file_analysis

print(all_mod_imports)
print(file_imports)



processing 17 modules


In [16]:
from tools.toollib.import_analyzer import analyze_imports

mod_imports = analyze_imports(".", "msticpy", "requirements-all.txt", extras=[], process_setup_py=False)

processing 261 modules


In [21]:
import numpy as np
module_imp = []
for mod_name, imports in mod_imports.items():

    for mod_import in imports.internal:
        cross_dep = mod_import.rsplit(".", maxsplit=1)[0] if "." in mod_import else np.nan
        module_imp.append({
            "module": mod_name,
            "file": "/" + "/".join(mod_name.split(".")[:-1]) + ".py",
            "import": mod_import,
            "cross_dep": cross_dep,
        })

import pandas as pd
module_imports = pd.DataFrame(module_imp)

In [30]:
module_imports[
    (module_imports.cross_dep.notna())
    & (module_imports["import"] != "_version")
    & (module_imports["import"] != "common.utility")
].to_csv("module_imports.csv")

In [79]:
module_imports[["import", "module", "cross_dep"]].groupby("import").agg(
    mod_count=pd.NamedAgg("module", "count"),
    cross_mod_count=pd.NamedAgg("cross_dep", "nunique"),
    dep_modules=pd.NamedAgg("module", "unique")
).sort_values("mod_count", ascending=False).query("mod_count > 2 and cross_mod_count > 0").iloc[0].dep_modules

array(['analysis.eventcluster.py', 'analysis.syslog_utils.py',
       'analysis.timeseries.py', 'analysis.data.auditdextract.py',
       'analysis.data.base64unpack.py', 'analysis.data.cmd_line.py',
       'analysis.data.iocextract.py', 'analysis.data.syslog_utils.py',
       'config.mp_config_control.py', 'data.context.domain_utils.py',
       'data.context.geoip.py', 'data.context.ip_utils.py',
       'data.context.tilookup.py',
       'data.context.tiproviders.alienvault_otx.py',
       'data.context.tiproviders.azure_sent_byoti.py',
       'data.context.tiproviders.http_base.py',
       'data.context.tiproviders.ibm_xforce.py',
       'data.context.tiproviders.intsights.py',
       'data.context.tiproviders.kql_base.py',
       'data.context.tiproviders.open_page_rank.py',
       'data.context.tiproviders.riskiq.py',
       'data.context.tiproviders.ti_provider_base.py',
       'data.context.tiproviders.tor_exit_nodes.py',
       'data.context.tiproviders.virustotal.py',
       'da

In [81]:
import numpy as np

module_imports[["import", "module", "cross_dep"]].groupby("import").agg(
    mod_count=pd.NamedAgg("module", "nunique"),
    cross_mod_count=pd.NamedAgg("cross_dep", "nunique"),
    dep_modules=pd.NamedAgg("module", np.unique)
).sort_values("mod_count", ascending=False).query("mod_count > 1 and cross_mod_count > 0").to_csv("common_imports.csv")

In [102]:
ren_mods = pd.read_csv("module_new_folder.csv", header=0)

In [103]:
ren_mods["path_parts"] = ren_mods[(ren_mods.new_location != "depr") & (ren_mods.new_location.notna())].file.str.split("/")

In [104]:
def make_path(row):
    if isinstance(row.new_location, float) or row.new_location == "depr":
        return ""
    return row.new_location + "/" + row.path_parts[-1]

ren_mods["new_path"] = ren_mods.apply(make_path, axis=1).fillna("")

In [105]:
ren_mods["final_path"] = ren_mods.new_path
ren_mods.loc[ren_mods.new_path == "", "final_path"] = ren_mods[ren_mods.new_path == ""].file

In [75]:
ren_mods.to_csv("new_location.csv")

In [106]:
ren_mods.new_location.drop_duplicates()

0                       NaN
5                      depr
17               /transform
21                     /api
29                    /auth
39                  /config
80             /vis/viewers
85                 /context
90              /data/azure
91           /context/azure
99     /context/tiproviders
112             /context/vt
117                   /data
120              /datamodel
190                    /vis
208                 /widget
Name: new_location, dtype: object

In [115]:
print("\n".join(ren_mods.final_path.str.extract("(/[^/]+)").drop_duplicates().sort_values(0)[0].to_list()))

/__init__.py
/analysis
/api
/auth
/common
/config
/context
/data
/datamodel
/nbtools
/sectools
/transform
/vis
/widget
