Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/import_packages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
github_token: ${{ github.token }}
workflow: ".github/workflows/import_packages.yml"
workflow_conclusion: success
name: sqlite_vectordb_file
name: sqlite_data
path: /tmp/
name_is_regexp: true
skip_unpack: false
Expand All @@ -71,11 +71,11 @@ jobs:

- name: 'Run import_packages.py with poetry'
run: |
poetry run python scripts/import_packages.py --jsonl-dir /tmp/jsonl-files --db-path /tmp/sqlite_data/vectordb.db
poetry run python scripts/import_packages.py --jsonl-dir /tmp/jsonl-files --vec-db-path /tmp/sqlite_data/vectordb.db

- name: 'Upload SQLite Vector DB File'
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4
with:
name: sqlite_vectordb_file
name: sqlite_data
path: /tmp/sqlite_data/vectordb.db
retention-days: 90
12 changes: 6 additions & 6 deletions scripts/import_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@


class PackageImporter:
def __init__(self, jsonl_dir="data", db_path="./sqlite_data/vectordb.db"):
os.makedirs(os.path.dirname(db_path), exist_ok=True)
self.db_path = db_path
def __init__(self, jsonl_dir="data", vec_db_path="./sqlite_data/vectordb.db"):
os.makedirs(os.path.dirname(vec_db_path), exist_ok=True)
self.vec_db_path = vec_db_path
self.json_files = [
os.path.join(jsonl_dir, "archived.jsonl"),
os.path.join(jsonl_dir, "deprecated.jsonl"),
Expand All @@ -25,7 +25,7 @@ def __init__(self, jsonl_dir="data", db_path="./sqlite_data/vectordb.db"):
self.model_path = "./codegate_volume/models/all-minilm-L6-v2-q5_k_m.gguf"

def _get_connection(self):
conn = sqlite3.connect(self.db_path)
conn = sqlite3.connect(self.vec_db_path)
conn.enable_load_extension(True)
sqlite_vec_sl_tmp.load(conn)
conn.enable_load_extension(False)
Expand Down Expand Up @@ -129,12 +129,12 @@ def __del__(self):
help="Directory containing JSONL files. Default is 'data'.",
)
parser.add_argument(
"--db-path",
"--vec-db-path",
type=str,
default="./sqlite_data/vectordb.db",
help="Path to SQLite database file. Default is './sqlite_data/vectordb.db'.",
)
args = parser.parse_args()

importer = PackageImporter(jsonl_dir=args.jsonl_dir, db_path=args.db_path)
importer = PackageImporter(jsonl_dir=args.jsonl_dir, vec_db_path=args.vec_db_path)
asyncio.run(importer.run_import())
Loading