In [1]:
import numpy as np
import pandas as pd
from pymilvus import (Collection, CollectionSchema, DataType, FieldSchema,
                      connections, utility)
from sentence_transformers import SentenceTransformer

In [2]:
data = pd.read_csv('cleaned_jobpostings.csv')

In [29]:
# Load the pre-trained model
model = SentenceTransformer("paraphrase-MiniLM-L6-v2")

# Example job descriptions (replace with your data)
job_descriptions = data['Job Description'].tolist()

# Generate embeddings
embeddings = model.encode(str(job_descriptions), convert_to_tensor=False)

In [None]:
with open('job_description_embeddings.npy', 'wb') as f:
    np.save(f, np.array(embeddings))

In [10]:
#Creates a collection:
%pip install pymilvus
from pymilvus import (Collection, CollectionSchema, DataType, FieldSchema,
                      connections, utility)

#Connects to a server:
connections.connect(alias="default")

fields = [
    FieldSchema(name="job_ids", dtype=DataType.VARCHAR, is_primary=True, auto_id=False, max_length=250),
    FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=384)
]
schema = CollectionSchema(fields, "APIs")

In [12]:
#indexing
job_description_emb= Collection("job_description_emb", schema)
index = {
    "index_type": "IVF_FLAT",
    "metric_type": "L2",
    "params": {"nlist": 128},
}
job_description_emb.create_index("embeddings", index)

In [14]:
#insert embeddings
sample_embeddings= np.load('sample_embeddings.npy')
sample_data = pd.read_csv('cleaned_jobpostings_sample.csv')

insert_data = [list(sample_data['Job Id']), sample_embeddings]
job_description_emb.insert(insert_data)
job_description_emb.flush()

In [17]:
#This part is just to control 

job_description_emb.load()
search_params = {"metric_type": "L2", "params": {"nprobe": 84}, "offset": 0}
encodings = list(embeddings[:10])
results = job_description_emb.search(
    data=encodings, 
    anns_field="embeddings", 
    param=search_params,
    limit=2, 
    expr=None,
)
distances = [i.distances for i in results]
job_ids = [i.ids for i in results]