In [8]:
# Import the necessary modules, variables, etc ...
from canvasapi import Canvas
import pandas as pd
from tqdm import tqdm
from config import CANVAS_URL, CANVAS_TOKEN

canvas = Canvas(CANVAS_URL, CANVAS_TOKEN)

# Mining Canvas for Student Enrollments

This Notebook contains a few useful tools to:
+ Get all courses from Canvas and save to a list
+ Get all student enrollments from Canvas
+ Generate a set of student usernames for use in scraping Tulip

## Building a list of current and previous Canvas courses

In [None]:
# Get All course objects

courses = []

# Get last 5 terms (this is how long UoL has had Canvas for)
terms = ["201920", "202021", "202122", "202223", "202324"]

# Look for all LIFE courses
course_labels = [f"LIFE{str(code)}" for code in range(100,800)]

for term in terms:
    for label in course_labels:
        course_sis_id = f"{label}-{term}"
        try:
            courses.append(canvas.get_course(course_sis_id, use_sis_id=True))
            print(course_sis_id)
        except:
            continue

In [17]:
# Build a dataframe of course information
rows = []
for course in courses:
    row = {
        "id": course.id,
        "name": course.name,
        "url": CANVAS_URL + "/courses/" + str(course.id),
    }
    rows.append(row)

df = pd.DataFrame(rows)

# Save dataframe to a .csv file
df.to_csv("courses.csv", index=False)

## Get student enrollments for every course

In [31]:
# Create an empty "enrollments.csv" file
empty_df = pd.DataFrame(
    columns=[
        "user_id",
        "user_name",
        "sis_user_id",
        "course_id",
    ]
)
empty_df.to_csv("enrollments.csv", index=False)

In [34]:
df = pd.read_csv("courses.csv")

course_enrollments = []
for i, course in tqdm(df.iterrows(), total=len(df)):
    enrollments = [x for x in canvas.get_course(course["id"]).get_enrollments(include=["user"]) if x.type == "StudentEnrollment"]
    for enrollment in enrollments:
        try:
            row = {
                "user_id": enrollment.user_id,
                "user_name": enrollment.user["sortable_name"],
                "sis_user_id": enrollment.user["sis_user_id"],
                "course_id": enrollment.course_id,
            }
            course_enrollments.append(row)
        except KeyError:
            continue

    # Open enrollments.csv and update
    pd.concat([pd.read_csv("enrollments.csv"), pd.DataFrame(course_enrollments)]).to_csv("enrollments.csv", index=False)


  0%|          | 0/771 [00:00<?, ?it/s]

100%|██████████| 771/771 [7:05:10<00:00, 33.09s/it]  
