# Case Study: Zeeguu/API - static analyze
- Backend of a web application that supports [free reading in foreign languages](https://zeeguu.org)
- Open source [repository on GH](https://github.com/zeeguu/API/)



# Basic Data Gathering

  - Basic used case, for creating a sequence diagram of the coding patch. 

  - file: `./zeeguu_core/model/user.py` <==>
  - module: `zeeguu_core.model.User`




In [1]:
# Installing Required Dependencies
import sys
sys.version
!{sys.executable} -m pip install gitpython
!{sys.executable} -m pip install pyvis





In [2]:
# In Collab our notebook runs in a temporary mounted file system
# Let's print the name of the folder where our script runs
import os
cwd = os.getcwd()
print(cwd)


/Users/nicklasjeppesen/Desktop/software_individual_report/zeep


In [3]:
# Let's declare a var for the path where we're going to download a repository
# Warning: this must end in /
CODE_ROOT_FOLDER=cwd+"/content/zeeguu_api/"


In [4]:
from git import Repo
# GitPython is a library that allows us to work easily with git from Python
# https://gitpython.readthedocs.io/en/stable/tutorial.html


# If the file exists, it means we've already downloaded
#if not os.path.exists(CODE_ROOT_FOLDER):
if not os.path.exists(CODE_ROOT_FOLDER):
  Repo.clone_from("https://github.com/zeeguu/api", CODE_ROOT_FOLDER)


In [5]:
ENDPOINT_ONE = CODE_ROOT_FOLDER+"zeeguu/api/endpoints/activity_tracking.py"
ENDPOINT_TWO = CODE_ROOT_FOLDER+"zeeguu/api/endpoints/user_articles.py"
os.path.exists(ENDPOINT_ONE)
os.path.exists(ENDPOINT_TWO)

True

# Code library: 



In [6]:
# naïve way of extracting imports using regular expressions
import re
import builtins

def getFileContentFromPath(file_path): 
    with open(file_path, 'r') as file:
        file_content = file.read()
    return file_content

# extracting a module name from a file name
# e.g. ../core/model/user.py -> zeeguu.core.model.user
def module_name_from_file_path(full_path):   
    file_name = full_path[len(CODE_ROOT_FOLDER):]
    file_name = file_name.replace("/__init__.py","")
    file_name = file_name.replace("/",".")
    file_name = file_name.replace(".py","")
    return file_name


# extracting a module name from a file name
# e.g. 'zeeguu.core.model.user' --> cwd+/content/zeeguu-api/zeeguu/core/model/user.py 
# There can be two issues: 
# 1: if the module also include a functionCall, then we shall remove it. 
# 2: if the class named, is not equal the fileName, then we shall check the init file, the find the file name. 

def getCorrectFileNameFromInitFile(init_path, className): 
    lines = [line for line in open(init_path)]
    for line in lines:
        imp = className in line

        if imp: 
            lineSplit = line.split(" ")
            newLine = lineSplit[1]
            newLine = newLine.replace(".", "")
            return newLine
            
# extracting a module name from a file name
# e.g. 'zeeguu.core.model.user' --> cwd+/content/zeeguu-api/zeeguu/core/model/user.py 
# There can be two issues: 
# 1: if the module also include a functionCall, then we shall remove it. 
# 2: if the class named, is not equal the fileName, then we shall check the init file, the find the file name. 
def file_path_from_module_name(module_name):
    file_name = module_name.replace(".","/")
    firstTry = cwd+"/content/zeeguu-api/"+file_name+".py"
   
    if(os.path.exists(firstTry)): # check if modulename, is correct file path.  
        return firstTry
   
    # problem 1: try to remove function name, from file/class name, so we only have, module and class/file name. 
    firstTry = cwd+"/content/zeeguu-api/"+file_name # removing the .py, for easier manipulate the path later. 
    recon = firstTry.rsplit("/", 1)
    if(not os.path.isdir(recon[0])): 
        #print("is a folder")
        recon = recon[0].rsplit("/", 1)
        if(not os.path.isdir(recon[0])): 
            return None # I dont know what to do longer, so give up. 
    
    # problem 2: hope, module is equal fileName, but not sure. 
    retryTwo = recon[0]+"/"+recon[1]+".py"
    if(os.path.exists(retryTwo)): # if True, then the className/filename/Module name is correct. 
       return retryTwo

    init_path = recon[0]+"/__init__.py"
    correctFileName = getCorrectFileNameFromInitFile(init_path, recon[1])
    
    if not correctFileName: 
        return None
    retryThree = recon[0]+"/"+correctFileName+".py"
    if(os.path.exists(retryThree)): 
        return retryThree
    return None
 

# helper function to get a file path w/o having to always provide the /content/zeeguu-api/ prefix
def file_path(file_name):
    return CODE_ROOT_FOLDER+file_name

#assert 'zeeguu.core.model.user' == module_name_from_file_path(file_path('zeeguu/core/model/user.py'))

# we assume that imports are always at the
# MODIFIED, 
#   SO IT RETURN IMPORT FROM OTHER PARTS OF THE ZEEGUU CODE BASED. 
#   Also, it return an array, instead of a single 
def import_from_line(line):
    # regex patterns used
    #   ^  - beginning of line
    #   \S - anything that is not space
    #   +  - at least one occurrence of previous
    #  ( ) - capture group (read more at: https://pynative.com/python-regex-capturing-groups/)
    finalArray = []
    try:
      y = re.search("^from (\S+)", line)
      if y:
        
        for item in getImportedModules(line): 
            #print(item)
            if(item.startswith("zeeguu")):
                finalArray.append(item)
      else:
        y = re.search("^import (\S+)", line)
        if y.group(1).startswith("zeeguu"):
            finalArray.append(y.group(1))
      #if(module.startswith("zeeguu")): 
       # return module
     # else:
      #  return None
      #return y.group(1)
    except:
      return finalArray
    return finalArray

def getImportedModules(text2):
    y = re.search("^from (\S+)", text2)
    if y: 
        print(text2)
        basisModule = y.group(1)
        #basisModule = basisModule#.replace("(", "").replace(")", "").strip()
        finalModuleList = []
        pattern = r"from\s+[\w.]+\s+import\s+([\w\s,]+)"
        match = re.search(pattern, text2)
        if match:
            imported_items_str = match.group(1)
            imported_items = [item.strip() for item in imported_items_str.split(",")]
            print(imported_items)
            for item in imported_items: 
                finalModuleList.append(basisModule+"."+item)
            return finalModuleList
        else:
            return []
        


# extracts all the imported modules from a file
# returns a module of the form zeeguu_core.model.bookmark, e.g.
def imports_from_file(file):
    all_imports = []
    lines = [line for line in open(file)]

    for line in lines:
        imp = import_from_line(line)

        if imp:
            #all_imports.append(imp)
            for item in imp: 
                all_imports.append(item)

    return all_imports

#imports_from_file(file_path('/zeeguu/core/model/user.py'))
#imports_from_file(file_path('/zeeguu/api/endpoints/activity_tracking.py'))


# Find all functions, defined with def first, from a text file. 
def find_python_functions(file):
    pattern = r"def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\([^()]*\)\s*:"
    matches = re.findall(pattern, file)
    return matches


# Return a function body from string, where the string can be a string of a class and not. 
def extract_function_body(func_name, func_string):
    #pattern = r'def\s+' + re.escape(func_name) + r'\s*\(.*?\)\s*:\s*(.*?)\bdef\s'
    #pattern = rf"def\s+{func_name}\s*\(\s*\):(\s*\n\s*\"\"\".*?\"\"\")?\s*(.*?)\bdef\s"
    pattern = "(def*\W("+func_name+"+)([\w\W]*?))(?=(def)|$)"

    match = re.search(pattern, func_string, re.DOTALL)
    if match:
        return match.group(1)
    else:
        return None

# Return a list of called functions from a function as string    
def getCalledfunction(source_code):
   
    builtinsFunctions = dir(__builtins__)
    called_functions = []

    # Define the regex pattern to match function calls
    function_call_pattern = r'(\w+)\('
    # Find all matches of function calls in the source code
    matches = re.findall(function_call_pattern, source_code)

    # Filter out function names that are not defined in the global scope
    for match in matches:
        if match not in builtinsFunctions:# and callable(globals()[match]):
            called_functions.append(match)
    return called_functions
 

# return a new list, of function there exists in the modules list. 
def GetAllRealFilePath(list_of_functions: list, modules: list): 
    functions_that_exists = []
    for func in list_of_functions: 
        for module in modules: 
            try: 
                mini_file_path = file_path_from_module_name(module) # return a specific file path, for a given module. 
                file_content = getFileContentFromPath(mini_file_path)
               
                functions_in_folder = find_python_functions(file_content)
                if func in functions_in_folder: 
                    functions_that_exists.append(func)
                    break
            except: 
                print("error at module: " + module)
                print("error at function: " + func)
    return functions_that_exists



Dreje bog for løsning af problemet. 
hvad har vi indtil videre: 

* Nuværende problem: hvis from.. bliver gjort over flere linjer, med (), så er jeg fuckt. 

* module_name_from_file_path: where we get a current module name from a file path. 
* file_path_from_module_name: return file path from a module name as string. 
* imports_from_file(file): return array of all module import of a file, where module is from zeguu, otherwise empty. 
* find_python_functions(file): # Find all functions, defined with def first, from a text file. 
* extract_function_body(func_name, func_string): Return a function body from string, where the string can be a string of a class and not. 
* getCalledfunction(source_code): Return a list of called functions from a function as string    
* getFileContentFromPath(file_path): get content of file by a file path. 

Process: 

1: Get the function body of the examing function by codepath to the file and def name. 
    * function body: getFileContentFromPath, extract_function_body(func_name, filecontent)
2: Get a list of all calling functions in that function: 
    * list of calling functions: getCalledfunction(source_code):
3: foreach called functions, 
    if they are in one of zeeguu module, 
        get filepath by module name, and call recusive, just to print, calling hiracy. 
        Next, return a calling node, of functions. 


master function: 
    def masterfunction(file_path, function_name): 
        Execute. 




In [13]:
# used of  code library: 

func_name = "upload_user_activity_data"
local_endpoint = ENDPOINT_ONE
print(local_endpoint)
        
def masterFunction(func_name, endpoint):
    codeContent = getFileContentFromPath(endpoint) 
    funcBody = extract_function_body(func_name, codeContent)

    CalingFunctions = getCalledfunction(funcBody) # return a list of calling functions ex. ['upload_user_activity_data', 'create_from_post_data', 'get', 'distill_article_interactions', 'get', 'notify_audio_experiment']
    if func_name in CalingFunctions: CalingFunctions.remove(func_name)
    ImportedModules = imports_from_file(endpoint) # return list of modules (might be functionNames in them) ex. ['zeeguu.api.utils.route_wrappers.cross_domain', 'zeeguu.api.utils.route_wrappers.with_session', 'zeeguu.core.model.UserActivityData']

    # First print the dependcy, thereafter their Dependicy.
    #print(func_name + ": Depends on the folling modules")
    #print(ImportedModules)     

    
    # ex function name. 
    ex_function = "create_from_post_data"
    ex_module = "zeeguu.core.model.UserActivityData"
    #ImportedModules = imports_from_file(endpoint)[0] # return list of modules (might be functionNames in them) ex. ['zeeguu.api.utils.route_wrappers.cross_domain', 'zeeguu.api.utils.route_wrappers.with_session', 'zeeguu.core.model.UserActivityData']
    #mini_file_path = file_path_from_module_name(ImportedModules) # return a specific file path, for a given module. 
    
    print(CalingFunctions)
    print(ImportedModules)

    realFunctions = GetAllRealFilePath(CalingFunctions, ImportedModules)
    
    return realFunctions
    
result = masterFunction(func_name, local_endpoint)

print(result)




/Users/nicklasjeppesen/Desktop/software_individual_report/zeep/content/zeeguu_api/zeeguu/api/endpoints/activity_tracking.py
from flask import request

['request']
from zeeguu.core.user_activity_hooks.article_interaction_hooks import distill_article_interactions

['distill_article_interactions']
from . import api, db_session

['api', 'db_session']
from zeeguu.api.utils.route_wrappers import cross_domain, with_session

['cross_domain', 'with_session']
from zeeguu.core.model import UserActivityData

['UserActivityData']
['create_from_post_data', 'get', 'distill_article_interactions', 'get', 'notify_audio_experiment']
['zeeguu.core.user_activity_hooks.article_interaction_hooks.distill_article_interactions', 'zeeguu.api.utils.route_wrappers.cross_domain', 'zeeguu.api.utils.route_wrappers.with_session', 'zeeguu.core.model.UserActivityData']
['create_from_post_data', 'distill_article_interactions']


In [8]:
#ef_is_correct_file_path(filepath)

path = "/Users/nicklasjeppesen/Desktop/software_individual_report/zeep/content/zeeguu-api/zeeguu/core/model"
init_path = path+"/__init__.py"
className = "UserActivityData"

print(os.path.isdir(path))

print(os.path.exists(init_path))

#print(getFileContentFromPath(init_path)) 
lines = [line for line in open(init_path)]
for line in lines:
    imp = className in line

    if imp: 
        print(True)
        lineSplit = line.split(" ")
        newLine = lineSplit[1]
        newLine = newLine.replace(".", "")
        print(newLine)
        break




True
True
True
user_activitiy_data


In [9]:
import os 

def getCorrectFileNameFromInitFile(init_path, className): 
    lines = [line for line in open(init_path)]
    for line in lines:
        imp = className in line

        if imp: 
            lineSplit = line.split(" ")
            newLine = lineSplit[1]
            newLine = newLine.replace(".", "")
            return newLine
            
# extracting a module name from a file name
# e.g. 'zeeguu.core.model.user' --> cwd+/content/zeeguu-api/zeeguu/core/model/user.py 
# There can be two issues: 
# 1: if the module also include a functionCall, then we shall remove it. 
# 2: if the class named, is not equal the fileName, then we shall check the init file, the find the file name. 
def file_path_from_module_name(module_name):
    file_name = module_name.replace(".","/")
    firstTry = cwd+"/content/zeeguu-api/"+file_name+".py"
   
    if(os.path.exists(firstTry)): # check if modulename, is correct file path.  
        return firstTry
   
    # problem 1: try to remove function name, from file/class name, so we only have, module and class/file name. 
    firstTry = cwd+"/content/zeeguu-api/"+file_name # removing the .py, for easier manipulate the path later. 
    recon = firstTry.rsplit("/", 1)
    if(not os.path.isdir(recon[0])): 
        #print("is a folder")
        recon = recon[0].rsplit("/", 1)
        if(not os.path.isdir(recon[0])): 
            return None # I dont know what to do longer, so give up. 
    
    # problem 2: hope, module is equal fileName, but not sure. 
    retryTwo = recon[0]+"/"+recon[1]+".py"
    if(os.path.exists(retryTwo)): # if True, then the className/filename/Module name is correct. 
       return retryTwo

    init_path = recon[0]+"/__init__.py"
    correctFileName = getCorrectFileNameFromInitFile(init_path, recon[1])
    
    if not correctFileName: 
        return None
    retryThree = recon[0]+"/"+correctFileName+".py"
    if(os.path.exists(retryThree)): 
        return retryThree
    return None
    

path = "zeeguu/core/model/UserActivityData"
print(file_path_from_module_name(path))

testingPath = "zeeguu/api/utils/route_wrappers/cross_domain" #with functionName, 
print(file_path_from_module_name(testingPath))


/Users/nicklasjeppesen/Desktop/software_individual_report/zeep/content/zeeguu-api/zeeguu/core/model/user_activitiy_data.py
/Users/nicklasjeppesen/Desktop/software_individual_report/zeep/content/zeeguu-api/zeeguu/api/utils/route_wrappers.py


In [10]:

dd = file_path("zeeguu/core/model/bookmark.py")

#print(imports_from_file(file_path("zeeguu/core/model/bookmark.py")))

#lines = Path(dd).read_text()
#result = find_python_functions(lines) # return list of functions. 








# EXAMPLES OF FUNCTIONS. 

In [11]:
import inspect
import builtins

builtinsFunctions = dir(__builtins__)

txt = """
class MyClass():
  def foo(x, y=2):
      z = x*y + 3
      print("z is", z)
      return z**2
"""

tree = ast.parse(txt, mode='exec')
code = compile(tree, filename='blah', mode='exec')
namespace = {}

exec(code, namespace)
val = "foo"
dict_item = namespace["MyClass"].__dict__.items()
for x, y in list(dict_item):
  if val == x:
    print(x)
    print(y)
    print(type(x))
    print(type(y))
    y(2)
    print(inspect.getmodule(y))
    #print(inspect.getsourcelines(y))
    #print(dir(y))

NameError: name 'ast' is not defined

In [None]:
'''
import re

def extract_function_body(func_string):
    pattern = r'def\s+\w+\s*\(.*?\)\s*:\s*(.*?)\bdef\s'
    match = re.search(pattern, func_string, re.DOTALL)
    if match:
        return match.group(1)
    else:
        return None

# Example usage:
function_string = """
def my_function(x):
    if x > 0:
        return x
    else:
        return -x
def another_function():
    print("Another function")
"""

body = extract_function_body(function_string)
print(body)
'''
#print 
'''
if x > 0:
        return x
    else:
        return -x
'''

'\nif x > 0:\n        return x\n    else:\n        return -x\n'

In [None]:
import re

# Example usage:
function_string = """
def my_function(x):
    if x > 0:
        return x
    else:
        return -x
def another_function():
    print("Another function")
"""

text = """
class myCLass: 
    def my_class_func_a():
        #print("Inside func_a")
        myclass_func_b()

    def myclass_func_b():
        print("Inside func_b")
"""

function_name = "my_function"
body = extract_function_body(function_name, function_string)
body2 = extract_function_body("my_class_func_a", text)
print(body2)
print(getCalledfunction(body2))


def my_class_func_a():
        #print("Inside func_a")
        myclass_func_b()

    
['my_class_func_a', 'myclass_func_b']


# ARCHIVE CODE 

In [None]:
# ARCHIVE CODE LIBS 

'''
def methods_in_class(cls):
	return [
		(name, object) 
		for (name, object) 
			in cls.__dict__.items() 
		if hasattr(object, '__call__')]
#methods_in_class(Foo) '''




"\ndef methods_in_class(cls):\n\treturn [\n\t\t(name, object) \n\t\tfor (name, object) \n\t\t\tin cls.__dict__.items() \n\t\tif hasattr(object, '__call__')]\n#methods_in_class(Foo) "

In [None]:

import ast

def get_function_body2(file_path, function_name):
    with open(file_path, "r") as file:
        lines = file.readlines()

    start_line = None
    end_line = None

    # Find the line numbers where the function starts and ends
    for i, line in enumerate(lines):
        if line.strip().startswith("def " + function_name):
            start_line = i
        elif start_line is not None and line.strip() == "":
            end_line = i
            break

    if start_line is not None and end_line is not None:
        # Extract the lines of the function body
        function_lines = lines[start_line:end_line]
        # Remove leading indentation
        function_lines = [line.lstrip() for line in function_lines]
        # Join the lines to form the function body
        function_body = "".join(function_lines)
        return function_body
    else:
        return None

def get_function_calls(function_def_node):
    function_calls = []
    for node in ast.walk(function_def_node):
        if isinstance(node, ast.Call):
            
            function_calls.append(node)
    return function_calls


def get_function_body(file_path, function_name):
    with open(file_path, "r") as file:
        tree = ast.parse(file.read(), filename=file_path)
    
    # Find the function definition node
    for node in ast.walk(tree):
        if isinstance(node, ast.FunctionDef) and node.name == function_name:
            # Extract the body of the function
            return node
            #return get_function_calls(node)
            
    # If the function is not found, return None
    return None


# Example usage
file_path = ENDPOINT_ONE
print(file_path)
function_name = "upload_user_activity_data"
function_body = get_function_body(file_path, function_name)
if function_body is not None:
    print("Function body:")
    
    #for statement in function_body:
      #  print(ast.dump(statement))
        #print(ast.dump(statement, True, False))
        
else:
    print(f"Function '{function_name}' not found in the file.")

/Users/nicklasjeppesen/Desktop/software_individual_report/zeep/content/zeeguu_api/zeeguu/api/endpoints/activity_tracking.py
Function body:


In [None]:
import ast
import importlib
import importlib.util

f = open(cwd+"/content/zeeguu-api/zeeguu/core/model/user.py")
user_ast = ast.parse(f.read())
#print(user_ast.body[0])  
print(cwd+"/content/zeeguu-api/zeeguu/core/model/user.py")
stringmodule = module_name_from_file_path(cwd+"/content/zeeguu-api/zeeguu/core/model/user.py")

file_path_from_module_name(stringmodule)


#for node in ast.walk(user_ast):
   # print(node.__dict__)
   #print("children: " + str([x for x in ast.iter_child_nodes(node)]) + "\\n")



/Users/nicklasjeppesen/Desktop/software_individual_report/zeep/content/zeeguu-api/zeeguu/core/model/user.py


'/Users/nicklasjeppesen/Desktop/software_individual_report/zeep/content/zeeguu-api/zeeguu/core/model/user.py'