# Utility to visuzalize various imports in any module

In [1]:
def parse_text(text, mode=''):
    """
    mode: 'debug' to print intermediate steps
    """
    coding = {}
    for line in text.split('\n'):
        base = line.split(' ')
        if mode=='debug':
            print(base)
            
        if base[0] == 'import':
            key = base[1]
            if len(base)==4 and base[2]=='as':
                alias = base[3]
            else:
                alias = ''
            coding[key] = {'alias':alias if alias else None}
        elif base[0]=='from':
            child = None
            if '.' in base[1]:
                temp  = base[1].split('.')
                key   = temp[0]
                child = base[1]
                if mode=='debug':
                    print("child",child)
            else:
                key   = base[1]
            for indx, t in enumerate(base):
                if t!='import':
                    continue
                if t=='import':
                    imported = ' '.join(base[(indx+1):])
                    if mode=='debug':
                        print("imported pre", imported)
                    imported = ','.join([x for x in imported.split(',') if x!=''])
            if mode=='debug':
                if mode=='debug':
                    print("imported", imported)
            if key in coding and 'import' in coding[key] and coding[key]['import']:
                if child in coding[key]['import']:
                    coding[key]['import'][child].append(imported)
                else:
                    coding[key]['import'][child] = [imported]
            else:
                coding[key] = {'alias': None, }
                if child:
                    coding[key]['import'] =  {child: [imported]}
    return coding

def vizualize_coding(coding):
    for key, value in coding.items():
        if value['alias'] is not None:
            print(key, "->", value['alias'])
        else:
            print(key)
        if 'import' in value:
            level=1
            for key, value in value['import'].items():
                print("└───",key)
                print("\t"+"└───", ','.join(value))

In [2]:
text = """from pyspark.sql import SparkSession
import pyspark.sql.functions as F
from pyspark.sql.functions import when, col, row_number,x
from pyspark.sql.window import Window as W, Split as S
from pyspark.sql.window import Window as W, Split 
from pyspark.sql.types import IntegerType, FloatType

import pandas as pd
import re

"""

coding = parse_text(text)
print()
print(coding)
print()

vizualize_coding(coding)


{'pyspark': {'alias': None, 'import': {'pyspark.sql': ['SparkSession'], 'pyspark.sql.functions': ['when, col, row_number,x'], 'pyspark.sql.window': ['Window as W, Split as S', 'Window as W, Split '], 'pyspark.sql.types': ['IntegerType, FloatType']}}, 'pyspark.sql.functions': {'alias': 'F'}, 'pandas': {'alias': 'pd'}, 're': {'alias': None}}

pyspark
└─── pyspark.sql
	└─── SparkSession
└─── pyspark.sql.functions
	└─── when, col, row_number,x
└─── pyspark.sql.window
	└─── Window as W, Split as S,Window as W, Split 
└─── pyspark.sql.types
	└─── IntegerType, FloatType
pyspark.sql.functions -> F
pandas -> pd
re


### Sklearn: linear models example

In [3]:
#https://github.com/scikit-learn/scikit-learn/tree/main/sklearn/linear_model
text_sklearn = """import numbers
import warnings

import numpy as np
from scipy import optimize
from joblib import Parallel, effective_n_jobs

from ._base import LinearClassifierMixin, SparseCoefMixin, BaseEstimator
from ._linear_loss import LinearModelLoss
from ._sag import sag_solver
from .._loss.loss import HalfBinomialLoss, HalfMultinomialLoss
from ..preprocessing import LabelEncoder, LabelBinarizer
from ..svm._base import _fit_liblinear
from ..utils import check_array, check_consistent_length, compute_class_weight
from ..utils import check_random_state
from ..utils.extmath import softmax
from ..utils.extmath import row_norms
from ..utils.optimize import _newton_cg, _check_optimize_result
from ..utils.validation import check_is_fitted, _check_sample_weight
from ..utils.multiclass import check_classification_targets
from ..utils.fixes import delayed
from ..model_selection import check_cv
from ..metrics import get_scorer"""

coding_sklearn = parse_text(text_sklearn, mode='')
print()
# print(coding_sklearn)
print()

vizualize_coding(coding_sklearn)



numbers
numpy -> np
scipy
joblib

└─── ._base
	└─── LinearClassifierMixin, SparseCoefMixin, BaseEstimator
└─── ._linear_loss
	└─── LinearModelLoss
└─── ._sag
	└─── sag_solver
└─── .._loss.loss
	└─── HalfBinomialLoss, HalfMultinomialLoss
└─── ..preprocessing
	└─── LabelEncoder, LabelBinarizer
└─── ..svm._base
	└─── _fit_liblinear
└─── ..utils
	└─── check_array, check_consistent_length, compute_class_weight,check_random_state
└─── ..utils.extmath
	└─── softmax,row_norms
└─── ..utils.optimize
	└─── _newton_cg, _check_optimize_result
└─── ..utils.validation
	└─── check_is_fitted, _check_sample_weight
└─── ..utils.multiclass
	└─── check_classification_targets
└─── ..utils.fixes
	└─── delayed
└─── ..model_selection
	└─── check_cv
└─── ..metrics
	└─── get_scorer


### Sklearn: ridge models example

In [4]:
#https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/linear_model/_ridge.py

text_ridge="""from abc import ABCMeta, abstractmethod
from functools import partial
import warnings

import numpy as np
import numbers
from scipy import linalg
from scipy import sparse
from scipy import optimize
from scipy.sparse import linalg as sp_linalg

from ._base import LinearClassifierMixin, LinearModel
from ._base import _deprecate_normalize, _preprocess_data, _rescale_data
from ._sag import sag_solver
from ..base import MultiOutputMixin, RegressorMixin, is_classifier
from ..utils.extmath import safe_sparse_dot
from ..utils.extmath import row_norms
from ..utils import check_array
from ..utils import check_consistent_length
from ..utils import check_scalar
from ..utils import compute_sample_weight
from ..utils import column_or_1d
from ..utils.validation import check_is_fitted
from ..utils.validation import _check_sample_weight
from ..preprocessing import LabelBinarizer
from ..model_selection import GridSearchCV
from ..metrics import check_scoring
from ..exceptions import ConvergenceWarning
from ..utils.sparsefuncs import mean_variance_axis
"""

coding_ridge = parse_text(text_ridge,mode='')
print()
#print(coding_ridge)
print()

vizualize_coding(coding_ridge)



abc
functools
numpy -> np
numbers
scipy
└─── scipy.sparse
	└─── linalg as sp_linalg

└─── ._base
	└─── LinearClassifierMixin, LinearModel,_deprecate_normalize, _preprocess_data, _rescale_data
└─── ._sag
	└─── sag_solver
└─── ..base
	└─── MultiOutputMixin, RegressorMixin, is_classifier
└─── ..utils.extmath
	└─── safe_sparse_dot,row_norms
└─── ..utils
	└─── check_array,check_consistent_length,check_scalar,compute_sample_weight,column_or_1d
└─── ..utils.validation
	└─── check_is_fitted,_check_sample_weight
└─── ..preprocessing
	└─── LabelBinarizer
└─── ..model_selection
	└─── GridSearchCV
└─── ..metrics
	└─── check_scoring
└─── ..exceptions
└─── ..utils.sparsefuncs
	└─── mean_variance_axis


In [5]:
text_pyspark = """import inspect
import sys
import functools
import warnings
from typing import (
    Any,
    cast,
    Callable,
    Dict,
    List,
    Iterable,
    overload,
    Optional,
    Tuple,
    TYPE_CHECKING,
    Union,
    ValuesView,
)

from pyspark import since, SparkContext
from pyspark.rdd import PythonEvalType
from pyspark.sql.column import Column, _to_java_column, _to_seq, _create_column_from_literal
from pyspark.sql.dataframe import DataFrame
from pyspark.sql.types import ArrayType, DataType, StringType, StructType

# Keep UserDefinedFunction import for backwards compatible import; moved in SPARK-22409
from pyspark.sql.udf import UserDefinedFunction, _create_udf  # noqa: F401

# Keep pandas_udf and PandasUDFType import for backwards compatible import; moved in SPARK-28264
from pyspark.sql.pandas.functions import pandas_udf, PandasUDFType  # noqa: F401
from pyspark.sql.utils import to_str
"""

coding_pyspark = parse_text(text_pyspark,mode='')
print()
#print(coding_pyspark)
print()

vizualize_coding(coding_pyspark)




inspect
sys
functools
typing
pyspark
└─── pyspark.rdd
	└─── PythonEvalType
└─── pyspark.sql.column
	└─── Column, _to_java_column, _to_seq, _create_column_from_literal
└─── pyspark.sql.dataframe
	└─── DataFrame
└─── pyspark.sql.types
	└─── ArrayType, DataType, StringType, StructType
└─── pyspark.sql.udf
	└─── UserDefinedFunction, _create_udf  # noqa: F401
└─── pyspark.sql.pandas.functions
	└─── pandas_udf, PandasUDFType  # noqa: F401
└─── pyspark.sql.utils
	└─── to_str


## Next steps:

1. Need to Fix issue when () and import on next line # like in typing
2. Need to ignore comments (# noqa: F401)