# Case Study: Zeeguu/API - static analyze
- Backend of a web application that supports [free reading in foreign languages](https://zeeguu.org)
- Open source [repository on GH](https://github.com/zeeguu/API/)



# Basic Data Gathering

  - Basic used case, for creating a sequence diagram of the coding patch. 

  - file: `./zeeguu_core/model/user.py` <==>
  - module: `zeeguu_core.model.User`




In [1]:
# Installing Required Dependencies
import sys
sys.version
!{sys.executable} -m pip install gitpython
!{sys.executable} -m pip install pyvis

You should consider upgrading via the '/Users/nicklasjeppesen/opt/miniconda3/bin/python -m pip install --upgrade pip' command.[0m
You should consider upgrading via the '/Users/nicklasjeppesen/opt/miniconda3/bin/python -m pip install --upgrade pip' command.[0m


In [2]:
# In Collab our notebook runs in a temporary mounted file system
# Let's print the name of the folder where our script runs
import os
cwd = os.getcwd()
print(cwd)


/Users/nicklasjeppesen/Desktop/software_individual_report/zeep


In [71]:
# Let's declare a var for the path where we're going to download a repository
# Warning: this must end in /
CODE_ROOT_FOLDER=cwd+"/content/zeeguu_api/"


In [4]:
from git import Repo
# GitPython is a library that allows us to work easily with git from Python
# https://gitpython.readthedocs.io/en/stable/tutorial.html


# If the file exists, it means we've already downloaded
#if not os.path.exists(CODE_ROOT_FOLDER):
if not os.path.exists(CODE_ROOT_FOLDER):
  Repo.clone_from("https://github.com/zeeguu/api", CODE_ROOT_FOLDER)


In [72]:
ENDPOINT_ONE = CODE_ROOT_FOLDER+"zeeguu/api/endpoints/activity_tracking.py"
ENDPOINT_TWO = CODE_ROOT_FOLDER+"zeeguu/api/endpoints/user_articles.py"
os.path.exists(ENDPOINT_ONE)
os.path.exists(ENDPOINT_TWO)

True

# Code library: 



In [84]:
# naïve way of extracting imports using regular expressions
import re

def methods_in_class(cls):
	return [
		(name, object) 
		for (name, object) 
			in cls.__dict__.items() 
		if hasattr(object, '__call__')]
#methods_in_class(Foo)

# extracting a module name from a file name
def module_name_from_file_path(full_path):

    # e.g. ../core/model/user.py -> zeeguu.core.model.user

    file_name = full_path[len(CODE_ROOT_FOLDER):]
    file_name = file_name.replace("/__init__.py","")
    file_name = file_name.replace("/",".")
    file_name = file_name.replace(".py","")
    return file_name

# extracting a module name from a file name
def module_name_to_file_path(file_name):

    # e.g. 'zeeguu.core.model.user' --> cwd+/content/zeeguu-api/zeeguu/core/model/user.py 
    #file_name = full_path[len(CODE_ROOT_FOLDER):]
    file_name = file_name.replace(".","/")
    return cwd+"/content/zeeguu-api/"+file_name+".py"



# helper function to get a file path w/o having to always provide the /content/zeeguu-api/ prefix
def file_path(file_name):
    return CODE_ROOT_FOLDER+file_name

#assert 'zeeguu.core.model.user' == module_name_from_file_path(file_path('zeeguu/core/model/user.py'))


# we assume that imports are always at the
# TODO for you: add full support for imports; this is not complete...
def import_from_line(line):

    # regex patterns used
    #   ^  - beginning of line
    #   \S - anything that is not space
    #   +  - at least one occurrence of previous
    #  ( ) - capture group (read more at: https://pynative.com/python-regex-capturing-groups/)
    try:
      y = re.search("^from (\S+)", line)
      if not y:
        y = re.search("^import (\S+)", line)
      return y.group(1)
    except:
      return None


# extracts all the imported modules from a file
# returns a module of the form zeeguu_core.model.bookmark, e.g.
def imports_from_file(file):

    all_imports = []

    lines = [line for line in open(file)]

    for line in lines:
        imp = import_from_line(line)

        if imp:
            all_imports.append(imp)

    return all_imports

#imports_from_file(file_path('/zeeguu/core/model/user.py'))
#imports_from_file(file_path('/zeeguu/api/endpoints/activity_tracking.py'))

def get_called_functions(func):
    called_functions = []

    # Get the source code of the function
    source_lines, _ = inspect.getsourcelines(func)
    source_code = ''.join(source_lines)

    # Define the regex pattern to match function calls
    function_call_pattern = r'(\w+)\('

    # Find all matches of function calls in the source code
    matches = re.findall(function_call_pattern, source_code)

    # Filter out function names that are not defined in the global scope
    for match in matches:
        if match in globals() and callable(globals()[match]):
            if match != func.__name__:
                called_functions.append(match)

    return called_functions

def find_python_functions(file):
    pattern = r"def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\([^()]*\)\s*:"
    matches = re.findall(pattern, file)
    return matches


In [62]:
import ast
import importlib
import importlib.util

f = open(cwd+"/content/zeeguu-api/zeeguu/core/model/user.py")
user_ast = ast.parse(f.read())
#print(user_ast.body[0])  
print(cwd+"/content/zeeguu-api/zeeguu/core/model/user.py")
stringmodule = module_name_from_file_path(cwd+"/content/zeeguu-api/zeeguu/core/model/user.py")

module_name_to_file_path(stringmodule)


#for node in ast.walk(user_ast):
   # print(node.__dict__)
   #print("children: " + str([x for x in ast.iter_child_nodes(node)]) + "\\n")



/Users/nicklasjeppesen/Desktop/software_individual_report/zeep/content/zeeguu-api/zeeguu/core/model/user.py


'/Users/nicklasjeppesen/Desktop/software_individual_report/zeep/content/zeeguu-api/zeeguu/core/model/user.py'

In [87]:
import sys

sys.path.append("/zeep/content/zeeguu_api/zeeguu")

from pathlib import Path


dd = file_path('zeeguu/core/model/bookmark.py')
lines = Path(dd).read_text()
result = find_python_functions(lines) # return list of functions. 








In [121]:

import ast

def get_function_body2(file_path, function_name):
    with open(file_path, "r") as file:
        lines = file.readlines()

    start_line = None
    end_line = None

    # Find the line numbers where the function starts and ends
    for i, line in enumerate(lines):
        if line.strip().startswith("def " + function_name):
            start_line = i
        elif start_line is not None and line.strip() == "":
            end_line = i
            break

    if start_line is not None and end_line is not None:
        # Extract the lines of the function body
        function_lines = lines[start_line:end_line]
        # Remove leading indentation
        function_lines = [line.lstrip() for line in function_lines]
        # Join the lines to form the function body
        function_body = "".join(function_lines)
        return function_body
    else:
        return None

def get_function_calls(function_def_node):
    function_calls = []
    for node in ast.walk(function_def_node):
        if isinstance(node, ast.Call):
            
            function_calls.append(node)
    return function_calls


def get_function_body(file_path, function_name):
    with open(file_path, "r") as file:
        tree = ast.parse(file.read(), filename=file_path)
    
    # Find the function definition node
    for node in ast.walk(tree):
        if isinstance(node, ast.FunctionDef) and node.name == function_name:
            # Extract the body of the function
            return node
            #return get_function_calls(node)
            
    # If the function is not found, return None
    return None


# Example usage
file_path = ENDPOINT_ONE
print(file_path)
function_name = "upload_user_activity_data"
function_body = get_function_body(file_path, function_name)
if function_body is not None:
    print("Function body:")
    print(inspect.getsource(function_body.))
    #for statement in function_body:
      #  print(ast.dump(statement))
        #print(ast.dump(statement, True, False))
        
else:
    print(f"Function '{function_name}' not found in the file.")

/Users/nicklasjeppesen/Desktop/software_individual_report/zeep/content/zeeguu_api/zeeguu/api/endpoints/activity_tracking.py
Function body:


TypeError: module, class, method, function, traceback, frame, or code object was expected, got str

Dreje bog for løsning af problemet. 
hvad har vi indtil videre: 
* Method in class. 
* module_name_from_file_path: where we get a current module name from a file path. 
* module_name_to_file_path: return file path from a module name as string. 
* imports_from_file(file): return array of all module import of a file
* find_python_functions(file): Return functions in a python file. 

rutinen er følgende: 

1: Get a python function body from a path, where path is a string. 




In [11]:
# test
print(imports_from_file(file_path('zeeguu/core/model/bookmark.py')))
#print(imports_from_file(file_path('zeeguu/core/model/unique_code.py')))

['datetime', 'sqlalchemy', 'sqlalchemy', 'sqlalchemy.orm', 'sqlalchemy.orm.exc', 'wordstats', 'zeeguu.logging', 'zeeguu.core.bookmark_quality.fit_for_study', 'zeeguu.core.definition_of_learned', 'zeeguu.core.model', 'zeeguu.core.model.sorted_exercise_log', 'zeeguu.core.model.exercise', 'zeeguu.core.model.exercise_outcome', 'zeeguu.core.model.exercise_source', 'zeeguu.core.model.language', 'zeeguu.core.model.text', 'zeeguu.core.model.user', 'zeeguu.core.model.user_word', 'zeeguu.core.util.encoding', 'zeeguu', 'zeeguu.core.model']


In [142]:
txt = """
class MyClass():
  def foo(x, y=2):
      z = x*y + 3
      print("z is", z)
      return z**2
"""

tree = ast.parse(txt, mode='exec')
code = compile(tree, filename='blah', mode='exec')
namespace = {}

exec(code, namespace)
val = "foo"
dict_item = namespace["MyClass"].__dict__.items()
for x, y in list(dict_item):
  if val == x:
    print(x)
    print(y)
    print(type(x))
    print(type(y))
    y(2)
    print(inspect.getmodule(y))
    #print(inspect.getsourcelines(y))
    #print(dir(y))

foo
<function MyClass.foo at 0x7f93c480baf0>
<class 'str'>
<class 'function'>
z is 7
None


In [143]:
import re

def extract_function_body(func_string):
    pattern = r'def\s+\w+\s*\(.*?\)\s*:\s*(.*?)\bdef\s'
    match = re.search(pattern, func_string, re.DOTALL)
    if match:
        return match.group(1)
    else:
        return None

# Example usage:
function_string = """
def my_function(x):
    if x > 0:
        return x
    else:
        return -x
def another_function():
    print("Another function")
"""

body = extract_function_body(function_string)
print(body)


if x > 0:
        return x
    else:
        return -x



In [146]:
import re

def extract_function_body(func_name, func_string):
    pattern = r'def\s+' + re.escape(func_name) + r'\s*\(.*?\)\s*:\s*(.*?)\bdef\s'
    match = re.search(pattern, func_string, re.DOTALL)
    if match:
        return match.group(1)
    else:
        return None

# Example usage:
function_string = """
def my_function(x):
    if x > 0:
        return x
    else:
        return -x
def another_function():
    print("Another function")
"""

function_name = "my_function"
body = extract_function_body(function_name, function_string)
print(body)


if x > 0:
        return x
    else:
        return -x

