In [24]:
import numpy as np
import polars as pl

from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

In [25]:
df = pl.scan_parquet('data/processed/embedded_university_and_progam.parquet')

In [26]:
model_name = 'all-MiniLM-L6-v2'
model = SentenceTransformer(model_name)

In [27]:
def return_search_results(query_personal_interest: str, 
                          query_career_prospect: str, 
                          df: pl.LazyFrame, 
                          model) -> list:
    query = f'I am interested in {query_personal_interest}. Upon graduation, I want to work as {query_career_prospect}'
    embedded_query = model.encode(query).reshape(1, -1)
     
    embeddings_university = df.select(pl.nth(range(4, 388))).collect()
    embeddings_program = df.select(pl.nth(range(388, 772))).collect()
    embeddings_career = df.select(pl.nth(range(772, 1156))).collect()
    
    similarity_university = cosine_similarity(embeddings_university, embedded_query)
    similarity_program = cosine_similarity(embeddings_program, embedded_query)
    similarity_career = cosine_similarity(embeddings_career, embedded_query)
    mean_similarity = (similarity_university + 2 * similarity_program + 2 * similarity_career) / 5
    
    number_of_recommendations = 7
    index_sorted = np.argsort(-mean_similarity, axis=0).flatten()
    index_result = index_sorted[:number_of_recommendations]
    
    program_names = df.select('program').collect().to_series(0).to_list()
    selected_programs = [program_names[i] for i in index_result]
    
    return selected_programs

In [28]:
query_personal_interest = "Astronomy, Astrophysics, Quantum Mechanics, Coding, Data Analysis"
query_career_prospect = "Astronomer, Astrophysicist, Researcher, Data Scientist"

result = return_search_results(query_personal_interest, query_career_prospect, df, model)

for number, program in enumerate(result):
    print(f'{number + 1}. {program}')

1. Bachelor of Science (Physics) at National University of Singapore (NUS) 
2. Bachelor of Science (Mathematical Sciences) at Nanyang Technological University (NTU) 
3. Bachelor of Computing (Computer Science) at National University of Singapore (NUS) 
4. Bachelor of Computing (Data Science and Artificial Intelligence) at Nanyang Technological University (NTU) 
5. Bachelor of Science (Applied Physics) at Nanyang Technological University (NTU) 
6. Bachelor of Engineering (Engineering Science) at National University of Singapore (NUS) 
7. Bachelor of Engineering (Bioengineering) at Nanyang Technological University (NTU) 
