In [1]:
from llama_index_migration_tools.main import main

In [2]:
import os
from pathlib import Path
import shutil

In [3]:
class cd:
    """Context manager for changing the current working directory"""
    def __init__(self, newPath):
        self.newPath = os.path.expanduser(newPath)

    def __enter__(self):
        self.savedPath = os.getcwd()
        os.chdir(self.newPath)

    def __exit__(self, etype, value, traceback):
        os.chdir(self.savedPath)

### Get list of packages

In [4]:
IGNORE_LIST = [
    "__init__.py",
    "__pycache__",
    "loading.py",
    "base.py"
]

In [5]:
prefix = None
kind = "vector_stores"

In [6]:
from os import listdir
from os.path import isfile, join

core_path = f"/Users/nerdai/Projects/forks/llama_index/llama-index-core/llama_index/core/{prefix}/{kind}" if prefix else f"/Users/nerdai/Projects/forks/llama_index/llama-index-core/llama_index/core/{kind}"
core = listdir(core_path)
core

['__init__.py', 'types.py', '__pycache__', 'utils.py', 'simple.py']

In [7]:
legacy_path = f"/Users/nerdai/Projects/forks/llama_index/llama-index-legacy/llama_index/legacy/{prefix}/{kind}" if prefix else f"/Users/nerdai/Projects/forks/llama_index/llama-index-legacy/llama_index/legacy/{kind}"
legacy = listdir(legacy_path)

In [8]:
first_pass = sorted([el for el in legacy if el not in IGNORE_LIST])
first_pass

['astra.py',
 'awadb.py',
 'azurecosmosmongo.py',
 'bagel.py',
 'cassandra.py',
 'chatgpt_plugin.py',
 'chroma.py',
 'cogsearch.py',
 'dashvector.py',
 'deeplake.py',
 'docarray',
 'dynamodb.py',
 'elasticsearch.py',
 'epsilla.py',
 'faiss.py',
 'google',
 'jaguar.py',
 'lancedb.py',
 'lantern.py',
 'metal.py',
 'milvus.py',
 'mongodb.py',
 'myscale.py',
 'neo4jvector.py',
 'opensearch.py',
 'pgvecto_rs.py',
 'pinecone.py',
 'pinecone_utils.py',
 'postgres.py',
 'qdrant.py',
 'qdrant_utils.py',
 'redis.py',
 'registry.py',
 'rocksetdb.py',
 'simple.py',
 'singlestoredb.py',
 'supabase.py',
 'tair.py',
 'tencentvectordb.py',
 'timescalevector.py',
 'types.py',
 'typesense.py',
 'utils.py',
 'weaviate.py',
 'weaviate_utils.py',
 'zep.py']

In [9]:
additional_ignore_files = {
    "embeddings": [
        "multi_modal_base.py",
        "utils.py"
    ],
    "llms": [
        "llm.py",
        "mock.py",
        "types.py",
        "utils.py",
    ],
    "multi_modal_llms": [
        "generic_utils.py"
    ],
    "output_parsers": [
        "utils.py",
        "pydantic.py",
        "selection.py"
    ],
    "program": [
        "utils.py",
        "llm_prompt_program.py",
        "llm_program.py",
        "multi_modal_llm_program.py",
        "predefined"
    ],
    "prompts": [
        'chat_prompts.py',
        'default_prompt_selectors.py',
        'default_prompts.py',
        'display_utils.py',
        'mixin.py',
        'prompt_type.py',
        'prompt_utils.py',
        'prompts.py',
        'system.py',
        'utils.py'
    ],
    "question_gen": [
        'llm_generators.py',
        'output_parser.py',
        'prompts.py',
        'types.py'
    ],
    "response_synthesizers": [
        'accumulate.py',
        'compact_and_accumulate.py',
        'compact_and_refine.py',
        'factory.py',
        'generation.py',
        'google',
        'no_text.py',
        'refine.py',
        'simple_summarize.py',
        'tree_summarize.py',
        'type.py'
    ],
    "retrievers": [
        'auto_merging_retriever.py',
        'fusion_retriever.py',
        'recursive_retriever.py',
        'router_retriever.py',
        'transform_retriever.py',
    ],
    "chat_store": ['simple_chat_store.py'],  # prefix storage
    "docstore": [  # prefix storage
        'registry.py',
        'simple_docstore.py',
        "keyval_docstore.py",
        'types.py',
        'utils.py'
    ],
    "kvstore": [  # prefix storage
        'simple_kvstore.py',
        'types.py'
    ],
    "index_store": [  # prefix storage
        'keyval_index_store.py',
        'simple_index_store.py',
        'types.py',
        'utils.py'
    ],
    "vector_stores": [
        'docarray',
        'google',
        'registry.py',
        'simple.py',
        'types.py',
        'utils.py',
        'astra.py',
 'awadb.py',
 'azurecosmosmongo.py',
 'bagel.py',
 'cassandra.py',
    ]
}

In [10]:
final_removal_list = [el for el in first_pass if el not in additional_ignore_files[kind]]

In [11]:
final_removal_list

['chatgpt_plugin.py',
 'chroma.py',
 'cogsearch.py',
 'dashvector.py',
 'deeplake.py',
 'dynamodb.py',
 'elasticsearch.py',
 'epsilla.py',
 'faiss.py',
 'jaguar.py',
 'lancedb.py',
 'lantern.py',
 'metal.py',
 'milvus.py',
 'mongodb.py',
 'myscale.py',
 'neo4jvector.py',
 'opensearch.py',
 'pgvecto_rs.py',
 'pinecone.py',
 'pinecone_utils.py',
 'postgres.py',
 'qdrant.py',
 'qdrant_utils.py',
 'redis.py',
 'rocksetdb.py',
 'singlestoredb.py',
 'supabase.py',
 'tair.py',
 'tencentvectordb.py',
 'timescalevector.py',
 'typesense.py',
 'weaviate.py',
 'weaviate_utils.py',
 'zep.py']

### Create packages

In [12]:
extensions_path = f"/Users/nerdai/Projects/forks/llama_index/llama-index-integrations/{prefix}/{kind}" if prefix else f"/Users/nerdai/Projects/forks/llama_index/llama-index-integrations/{kind}"
print(extensions_path)

/Users/nerdai/Projects/forks/llama_index/llama-index-integrations/vector_stores


In [13]:
import subprocess # just to call an arbitrary command e.g. 'ls'

In [14]:
for ext in final_removal_list:
    name = ext.replace(".py","").replace("_"," ")
    base_file = f"{legacy_path}/{ext}"
    # enter the directory like this:
    with cd(extensions_path):
        main(integration_name=name, integration_type=kind, prefix=prefix, base_file=base_file)