In [1]:
import requests

# Define the GraphQL endpoint
url = "https://stanfurdtime.com/api/graphql"

# Get the subject, number, title, and description for all
# courses
#
# Please do NOT add nested objects like sections or classes
# for right now as this may overload the API
coursesQuery = """
query CoursesQuery {
  courseList {
    number
    subject
    title
    description
  }
}
"""

# Make the request
requests.post(
    url,
    json={"query": coursesQuery},
)

# Get the title, and description for a single course
courseQuery = """
query CourseQuery($subject: String!, $courseNumber: String!) {
  course(subject: $subject, courseNumber: $courseNumber) {
    description
    title
  }
}
"""

# Set your variables
courseVariables = {
  "subject": "COMPSCI",
  "courseNumber": "61B",
}

# Make the request
response = requests.post(
    url,
    json={"query": courseQuery, "variables": courseVariables},
)

In [2]:
print(response.json()) # visualizing the response

{'data': {'course': {'description': 'Fundamental dynamic data structures, including linear lists, queues, trees, and other linked structures; arrays strings, and hash tables. Storage management. Elementary principles of software engineering. Abstract data types. Algorithms for sorting and searching. Introduction to the Java programming language. ', 'title': 'Data Structures'}}}


In [3]:
from transformers import BertTokenizer, BertModel
import torch

In [4]:
# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def get_embeddings(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze()

# Example: Get embedding for a course description
course_description = "Data structures and algorithms in computer science."
course_embedding = get_embeddings(course_description)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [16]:
print(course_embedding)

tensor([-2.9533e-01,  9.5684e-02, -6.1826e-01, -1.9730e-01,  4.3659e-01,
        -3.9704e-02, -3.6921e-01,  1.9937e-01, -1.1745e-01, -2.4417e-01,
        -7.5154e-02, -3.2642e-01, -3.4736e-01, -1.9599e-01, -5.9736e-01,
         2.3984e-01, -2.1487e-01,  1.6291e-01, -2.9479e-01,  2.4434e-02,
         1.3656e-01, -1.3911e-02, -3.3465e-01,  3.9405e-01,  3.7781e-01,
         1.7136e-01, -1.1770e-02,  3.7585e-01, -3.2381e-01, -9.1744e-02,
         1.1131e-01,  6.2904e-01, -4.2454e-01, -2.2036e-01,  2.7290e-01,
         3.8511e-01, -1.9116e-01, -1.1314e-01, -3.8580e-01,  7.6084e-01,
        -6.9957e-01, -1.8037e-01, -1.1372e-01,  1.9709e-01, -1.0725e-01,
        -7.8028e-01, -2.2661e-01,  4.9514e-02, -5.6597e-03,  5.2916e-03,
        -6.4060e-01,  5.7901e-02,  4.0442e-01, -4.6683e-02,  3.7345e-01,
         7.0166e-01,  2.2533e-01, -6.7020e-01,  6.2333e-02, -3.9357e-01,
         3.1943e-01,  4.6567e-01,  2.4615e-01, -9.2317e-01,  7.2272e-01,
         1.9125e-01, -2.2631e-01,  2.1382e-01, -2.4

In [7]:
response = requests.post(url, json={"query": coursesQuery}) # getting data for all the courses
data = response.json()
courses = data["data"]["courseList"]

courses = [f"{course['number']} {course['title']} {course['description']}" for course in courses]
course_embeddings = [get_embeddings(course) for course in courses] # creating embeddings for all the courses, with the data being number, title, and description

In [5]:
!pip install faiss-gpu

Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2


In [6]:
import faiss
import numpy as np

In [8]:
d = 768
index = faiss.IndexFlatL2(d)

course_embeddings = [course_embedding.numpy() for course_embedding in course_embeddings]
course_embeddings_array = np.array(course_embeddings)

index.add(course_embeddings_array)

In [9]:
user_query = "find me courses that are computer science related and have data structures" # sample query
query_embedding = get_embeddings(user_query)

In [10]:
k = 5 # how many courses to return
query_embedding_np = query_embedding.numpy().reshape(1, -1)
distances, indices = index.search(query_embedding_np, k)

print(indices)

[[4527 4460 1833 4326 3339]]


In [17]:
for idx in indices[0]:
    course = courses[idx]
    print(course)

236A Applied Data Science for Engineers This course aims at providing basics of Data Science to students and professionals who need to work with and analyze a large volume of data. The base programming language is Matlab, but techniques taught, and topics covered can be coded in any programming language (examples from Python and Fortran will be discussed). The course is aimed at graduate students in engineering, and therefore examples, assignments and the course project are from real life scenarios and engineering problems.
224 Introduction to Block-Based Programming for Teachers This course is designed to introduce future Computer Science teachers to fundamental CS concepts through block-based programming. It will cover abstraction and decomposition and how these processes allow problems to be made simpler and solved algorithmically. It will also introduce teachers to the concepts of variables, loops, conditionals, functions and arrays.
This course will focus on teaching computer scie