In [4]:
import rope.base.project
from rope.refactor.extract import ExtractMethod
import re

APOSTROPHE_MARKER = "__APOSTROPHE__"
PERCENT_FORMAT_MARKER = "__PERCENT_FORMAT__"

def serialize(text: str):
    # Replace "'{var}'" with "__APOSTROPHE__{var}__APOSTROPHE__"
    text = re.sub(r"'{([^'}]*?)}'", f"{APOSTROPHE_MARKER}{{\\1}}{APOSTROPHE_MARKER}", text)
    # Replace "%s" with "__PERCENT_FORMAT__"
    text = re.sub(r"%\((.*?)\)s", f"{PERCENT_FORMAT_MARKER}{{\\1}}", text)
    return text

def deserialize(text: str):
    text = re.sub(f"{APOSTROPHE_MARKER}{{(.*?)}}{APOSTROPHE_MARKER}", "'{\\1}'", text)
    text = re.sub(f"{PERCENT_FORMAT_MARKER}{{(.*?)}}", "%(\\1)s", text)
    return text

myproject = rope.base.project.Project('../../')

myresource = myproject.get_resource('tests/notebooks/src/test.py')
contents = myresource.read()
serialized_contents = serialize(myresource.read())
myresource.write(serialized_contents)
extract_span = r"""logger.info(f"Downloading repository and indexing for {repo_full_name}...")
    start = time.time()
    logger.info("Recursively getting list of files...")
    blocked_dirs = get_blocked_dirs(repo)
    sweep_config.exclude_dirs.extend(blocked_dirs)"""        
serialized_extract_span = serialize(extract_span)
print(serialized_extract_span)

start, end = serialized_contents.find(serialized_extract_span), serialized_contents.find(serialized_extract_span) + len(serialized_extract_span)
print(start, end)

try:
    extractor = ExtractMethod(myproject, myresource, start, end)
    change_set = extractor.get_changes("helper", similar=True)
    for change in change_set.changes:
        if change.old_contents is not None:
            change.old_contents = deserialize(change.old_contents)
        else:
            change.old_contents = deserialize(change.resource.read())
        change.new_contents = deserialize(change.new_contents)
    for change in change_set.changes:
        print(change.get_description())
except Exception as e:
    print(e)
finally:
    myresource.write(contents)

logger.info(f"Downloading repository and indexing for {repo_full_name}...")
    start = time.time()
    logger.info("Recursively getting list of files...")
    blocked_dirs = get_blocked_dirs(repo)
    sweep_config.exclude_dirs.extend(blocked_dirs)
5973 6221
--- a/tests/notebooks/src/test.py
+++ b/tests/notebooks/src/test.py
@@ -174,28 +174,32 @@
     commits = repo.get_commits()
     commit_hash = commits[0].sha
 
+    start = helper(repo_full_name, repo, sweep_config)
+    file_list, snippets, index = prepare_lexical_search_index(
+        cloned_repo, sweep_config, repo_full_name
+    )
+    # scoring for vector search
+    files_to_scores = compute_vector_search_scores(
+        file_list, cloned_repo, repo_full_name
+    )
+
+    collection_name, documents, ids, metadatas = prepare_documents_metadata_ids(
+        snippets, cloned_repo, files_to_scores, start, repo_full_name
+    )
+
+    deeplake_vs = deeplake_vs or compute_deeplake_vs(
+        collection_name, documents, ids, m

In [22]:
len(change.new_contents) - len(change.old_contents)

85