Instructions for Keyring:

Type the following commands into your terminal and input your netId and passwords. This is stored securely and python will ask for your system password to access them. This way, we avoid storing/hardcoding our credentials.

`python -m keyring set TigerOutcomes_Service username_key`

`python -m keyring set TigerOutcomes_Service password_key`

In [10]:
import os
import keyring
import pandas as pd

# Define server path and mount path
server_path = "smb://files/dept/InstResearch/TigerOutcomes"
mount_path = "/Volumes/TigerOutcomes"  # Where the server will be mounted on your Mac

# Retrieve credentials from Keychain
username = keyring.get_password("TigerOutcomes_Service", "username_key")
password = keyring.get_password("TigerOutcomes_Service", "password_key")

# Mount the server using AppleScript for SMB connection
os.system(f"osascript -e 'do shell script \"mount volume \\\"{server_path}\\\" as user name \\\"{username}\\\" with password \\\"{password}\\\"\"'")

# Path to the Excel file on the mounted server
file_path = f"{mount_path}/COS333_AcA_Student_Outcomes.xlsx"  # Replace 'file.xlsx' with your actual file name

# Read the Excel file with pandas if the server is successfully mounted
try:
    df = pd.read_excel(file_path)
    print(df.head())
except FileNotFoundError:
    print("File not found. Check if the server is properly mounted.")

       mount [-dfrkuvw] special | mount_point (64)o options] [-t external_type] special mount_point


               Source  RecordYear Academic_Year_Degree_Awarded Degree_Track  \
0  Academic Analytics        2021                      2013-14           AB   
1  Academic Analytics        2022                      2013-14           AB   
2  Academic Analytics        2024                      2013-14           AB   
3  Academic Analytics        2023                      2013-14           AB   
4  Academic Analytics        2021                      2009-10           AB   

       Degree_Descr Entity_Name Position UnitName  IPEDSID Country  \
0  Bachelor of Arts         NaN      NaN      NaN      NaN     NaN   
1  Bachelor of Arts         NaN      NaN      NaN      NaN     NaN   
2  Bachelor of Arts         NaN      NaN      NaN      NaN     NaN   
3  Bachelor of Arts       Kawin  Founder      NaN      NaN      US   
4  Bachelor of Arts         NaN      NaN      NaN      NaN     NaN   

    Country_Name State State_Name   StudyID  
0            NaN   NaN        NaN  COS28100  
1           

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def cosine_similarity_tfidf(target, job_titles):
    # Vectorize the target and job titles together
    vectorizer = TfidfVectorizer()
    vectors = vectorizer.fit_transform([target] + job_titles)
    
    # Calculate cosine similarity between target and each job title
    target_vector = vectors[0]  # The first vector is for the target
    similarity_scores = cosine_similarity(target_vector, vectors[1:])[0]  # Compare target to each job title

    # Return similarity scores as a list
    return similarity_scores.tolist()

# Example usage
target_job = "Data Scientist"
job_titles_list = ["Data Analyst", "Machine Learning Engineer", "Data Engineer", "Software Developer"]
tfidf_scores = cosine_similarity_tfidf(target_job, job_titles_list)
print("TF-IDF Cosine Similarity Scores:", tfidf_scores)

from sklearn.feature_extraction.text import CountVectorizer

def cosine_similarity_bow(target, job_titles):
    # Vectorize the target and job titles together
    vectorizer = CountVectorizer()
    vectors = vectorizer.fit_transform([target] + job_titles)
    
    # Calculate cosine similarity between target and each job title
    target_vector = vectors[0]  # The first vector is for the target
    similarity_scores = cosine_similarity(target_vector, vectors[1:])[0]  # Compare target to each job title

    # Return similarity scores as a list
    return similarity_scores.tolist()

# Example usage
bow_scores = cosine_similarity_bow(target_job, job_titles_list)
print("BoW Cosine Similarity Scores:", bow_scores)

import numpy as np
from gensim.models import KeyedVectors
from sklearn.metrics.pairwise import cosine_similarity

# Load pre-trained word embeddings (e.g., GloVe, Word2Vec)
# Make sure to point to the correct path to your embeddings file
# e.g., word_vectors = KeyedVectors.load_word2vec_format("path/to/glove.6B.100d.txt", binary=False)
word_vectors = KeyedVectors.load_word2vec_format("path/to/glove.6B.100d.txt", binary=False)

def average_word_embeddings(text, model, vector_size=100):
    # Generate average word embeddings for the input text
    words = text.lower().split()
    embeddings = [model[word] for word in words if word in model]
    if embeddings:
        return np.mean(embeddings, axis=0)
    else:
        return np.zeros(vector_size)

def cosine_similarity_word_embeddings(target, job_titles, model, vector_size=100):
    # Get the embedding for the target title
    target_vector = average_word_embeddings(target, model, vector_size).reshape(1, -1)
    
    # Calculate cosine similarity between the target and each job title
    similarity_scores = []
    for title in job_titles:
        title_vector = average_word_embeddings(title, model, vector_size).reshape(1, -1)
        score = cosine_similarity(target_vector, title_vector)[0][0]
        similarity_scores.append(score)
    
    return similarity_scores

# Example usage
embedding_scores = cosine_similarity_word_embeddings(target_job, job_titles_list, word_vectors, vector_size=100)
print("Word Embeddings Cosine Similarity Scores:", embedding_scores)
