In [1]:
#default_exp parsing_imports

In [2]:
#export
import ast
from collections import namedtuple

In [3]:
#export


Import = namedtuple("Import", ["module", "name", "alias"])


def _get_imports(file_content):
    root = ast.parse(file_content)

    for node in ast.iter_child_nodes(root):
        if isinstance(node, ast.Import):
            module = []
        elif isinstance(node, ast.ImportFrom) and not node.module is None :  
            module = node.module.split('.')
        else:
            continue

        for n in node.names:
            yield Import(module, n.name.split('.'), n.asname)

In [4]:
example_file_content = 'import gzip\nfrom typing import Dict, Callable\nimport numpy as np\n\n\nclass CompressedKeyedVectors(object):\n\n    def __init__(self, vocab_path: str, embedding_path: str, to_lowercase: bool=True):\n        """\n        Class from sdadas polish-nlp-resources\n        https://github.com/sdadas/polish-nlp-resources\n        I need to get it somewhere from where I can import it easily for using with custom BentoML model\n        """\n        self.vocab_path: str = vocab_path\n        self.embedding_path: str = embedding_path\n        self.to_lower: bool = to_lowercase\n        self.vocab: Dict[str, int] = self.__load_vocab(vocab_path)\n        embedding = np.load(embedding_path)\n        self.codes: np.ndarray = embedding[embedding.files[0]]\n        self.codebook: np.ndarray = embedding[embedding.files[1]]\n        self.m = self.codes.shape[1]\n        self.k = int(self.codebook.shape[0] / self.m)\n        self.dim: int = self.codebook.shape[1]\n\n    def __load_vocab(self, vocab_path: str) -> Dict[str, int]:\n        open_func: Callable = gzip.open if vocab_path.endswith(".gz") else open\n        with open_func(vocab_path, "rt", encoding="utf-8") as input_file:\n            return {line.strip():idx for idx, line in enumerate(input_file)}\n\n    def vocab_vector(self, word: str):\n        if word == "<pad>": return np.zeros(self.dim)\n        val: str = word.lower() if self.to_lower else word\n        index: int = self.vocab.get(val, self.vocab["<unk>"])\n        codes = self.codes[index]\n        code_indices = np.array([idx * self.k + offset for idx, offset in enumerate(np.nditer(codes))])\n        return np.sum(self.codebook[code_indices], axis=0)\n\n    def __getitem__(self, key):\n        return self.vocab_vector(key)'
print(example_file_content)

import gzip
from typing import Dict, Callable
import numpy as np


class CompressedKeyedVectors(object):

    def __init__(self, vocab_path: str, embedding_path: str, to_lowercase: bool=True):
        """
        Class from sdadas polish-nlp-resources
        https://github.com/sdadas/polish-nlp-resources
        I need to get it somewhere from where I can import it easily for using with custom BentoML model
        """
        self.vocab_path: str = vocab_path
        self.embedding_path: str = embedding_path
        self.to_lower: bool = to_lowercase
        self.vocab: Dict[str, int] = self.__load_vocab(vocab_path)
        embedding = np.load(embedding_path)
        self.codes: np.ndarray = embedding[embedding.files[0]]
        self.codebook: np.ndarray = embedding[embedding.files[1]]
        self.m = self.codes.shape[1]
        self.k = int(self.codebook.shape[0] / self.m)
        self.dim: int = self.codebook.shape[1]

    def __load_vocab(self, vocab_path: str) -> Dict[str, int]:

In [5]:
assert list(_get_imports('from . import tracking')) == []

In [6]:
list(_get_imports(example_file_content))

[Import(module=[], name=['gzip'], alias=None),
 Import(module=['typing'], name=['Dict'], alias=None),
 Import(module=['typing'], name=['Callable'], alias=None),
 Import(module=[], name=['numpy'], alias='np')]

In [7]:
example_import_line = 'from sklearn import linear_model, model_selection'
list(_get_imports(example_import_line))

[Import(module=['sklearn'], name=['linear_model'], alias=None),
 Import(module=['sklearn'], name=['model_selection'], alias=None)]

In [8]:
#export


def get_module_from_import(imp):
    if imp.module == []:
        return imp.name[0]
    else:
        return imp.module[0]


def get_modules(file_content):
    for imp in _get_imports(file_content):
        yield get_module_from_import(imp)

In [9]:
set(get_modules(example_file_content))

{'gzip', 'numpy', 'typing'}