### Step 1. Download the college normalized dataset:
#### gsutil cp gs://cs327e-open-access/college_normalized.zip .
#### unzip college_normalized.zip

### Step 2: Run code samples in sequence to populate Firestore database

In [2]:
import pandas as pd
from google.cloud import firestore
db = firestore.Client()

In [None]:
batch = db.batch()

df = pd.read_csv('college_normalized/class.csv', sep=',', header=0, lineterminator='\n')
rows = df.values.tolist()

for row in rows:
    
    record = {}
    record['cno'] = row[0]
    record['cname'] = row[1]
    record['credits'] = row[2]
    
    class_ref = db.collection('class').document(row[0])
    
    batch.set(class_ref, record)
    
batch.commit()

In [None]:
batch = db.batch()

df = pd.read_csv('college_normalized/student.csv', sep=',', header=0, lineterminator='\n')
student_rows = df.values.tolist()

df = pd.read_csv('college_normalized/takes.csv', sep=',', header=0, lineterminator='\n')
takes_rows = df.values.tolist()

for student_row in student_rows:
    
    record = {}
    record['sid'] = student_row[0]
    record['fname'] = student_row[1]
    record['lname'] = student_row[2]
    record['dob'] = student_row[3]
    record['status'] = student_row[4]
    
    student_classes = []
    
    for takes_row in takes_rows:
        if student_row[0] == takes_row[0]:
            
            student_class = {}
            student_class['cno'] = takes_row[1]
            student_class['grade'] = takes_row[2]
            
            student_classes.append(student_class)
    
    if len(student_classes) > 0:
        record['classes'] = student_classes
    
    student_ref = db.collection('student').document(student_row[0])
    
    batch.set(student_ref, record)
    
batch.commit()

In [10]:
batch = db.batch()

df = pd.read_csv('college_normalized/teacher.csv', sep=',', header=0, lineterminator='\n')
teacher_rows = df.values.tolist()

df = pd.read_csv('college_normalized/teaches.csv', sep=',', header=0, lineterminator='\n')
teaches_rows = df.values.tolist()

for teacher_row in teacher_rows:
    
    record = {}
    record['tid'] = teacher_row[0]
    record['instructor'] = teacher_row[1]
    record['dept'] = teacher_row[2]
    
    classes_taught = []
    
    for teaches_row in teaches_rows:
        
        if teacher_row[0] == teaches_row[0]:
            classes_taught.append(teaches_row[1])
    
    if len(classes_taught) > 0:
        record['classes'] = classes_taught
    
    teacher_ref = db.collection('teacher').document(teacher_row[0])
    
    batch.set(teacher_ref, record)
    
batch.commit()

[update_time {
   seconds: 1601242321
   nanos: 881355000
 },
 update_time {
   seconds: 1601242321
   nanos: 881355000
 },
 update_time {
   seconds: 1601242321
   nanos: 881355000
 },
 update_time {
   seconds: 1601242321
   nanos: 881355000
 },
 update_time {
   seconds: 1601242321
   nanos: 881355000
 },
 update_time {
   seconds: 1601242321
   nanos: 881355000
 },
 update_time {
   seconds: 1601242321
   nanos: 881355000
 },
 update_time {
   seconds: 1601242321
   nanos: 881355000
 },
 update_time {
   seconds: 1601242321
   nanos: 881355000
 },
 update_time {
   seconds: 1601242321
   nanos: 881355000
 }]

### Step 3: Run some queries

In [9]:
student_ref = db.collection('student').document('paulg')
result = student_ref.get()

if result.exists:
    print(f'{result.id} => {result.to_dict()}')
else:
    print('No such student')

paulg => {'classes': [{'cno': 'CS313E', 'grade': nan}, {'cno': 'CS326E', 'grade': 'A-'}, {'grade': 'A ', 'cno': 'CS329E'}], 'fname': 'Paul', 'sid': 'paulg', 'status': 'CUR', 'lname': 'Gore', 'dob': '2000-09-17'}


In [7]:
student_ref = db.collection('student')
query = student_ref.where('status', '==', 'CUR')
results = query.stream()

for result in results:
    print(f'{result.id} => {result.to_dict()}')

aprilz => {'fname': 'April', 'sid': 'aprilz', 'status': 'CUR', 'dob': '2000-01-10', 'lname': 'Lopez'}
bzen26 => {'classes': [{'cno': 'CS313E', 'grade': 'B+'}], 'fname': 'Biswa', 'sid': 'bzen26', 'status': 'CUR', 'dob': '1998-04-22', 'lname': 'Zen'}
jc => {'classes': [{'grade': nan, 'cno': 'CS327E'}, {'grade': nan, 'cno': 'CS331E'}, {'grade': nan, 'cno': 'CS313E'}], 'fname': 'James', 'sid': 'jc', 'status': 'CUR', 'dob': '2000-04-22', 'lname': 'Cowe'}
jerryh => {'dob': '1999-01-03', 'lname': 'Hargrove', 'classes': [{'cno': 'CS329E', 'grade': 'A-'}, {'cno': 'CS327E', 'grade': 'B '}], 'fname': 'Jerry', 'sid': 'jerryh', 'status': 'CUR'}
kev18 => {'classes': [{'grade': nan, 'cno': 'CS329E'}], 'fname': 'Kevin', 'sid': 'kev18', 'status': 'CUR', 'lname': 'Lin', 'dob': '1999-05-10'}
paulg => {'status': 'CUR', 'lname': 'Gore', 'dob': '2000-09-17', 'classes': [{'cno': 'CS313E', 'grade': nan}, {'cno': 'CS326E', 'grade': 'A-'}, {'cno': 'CS329E', 'grade': 'A '}], 'fname': 'Paul', 'sid': 'paulg'}
sude

In [8]:
teacher_ref = db.collection('teacher')
query = teacher_ref.where(
    'classes', 'array_contains', 'CS313E').order_by('instructor').limit(5)
results = query.stream()

for result in results:
    print(f'{result.id} => {result.to_dict()}')

bulko => {'dept': 'Computer Science', 'tid': 'bulko', 'instructor': 'Bill Bulko', 'classes': ['CS313E']}
mitra => {'dept': 'Computer Science', 'tid': 'mitra', 'instructor': 'Shyamal Mitra', 'classes': ['CS329E', 'CS313E']}


In [7]:
results = db.collection('student').stream()

for result in results:
    student = result.to_dict()
    if 'classes' in student.keys():
        classes = student['classes']
        if len(classes) == 2:
            print(f'{result.id} => {student}')

jerryh => {'fname': 'Jerry', 'sid': 'jerryh', 'status': 'CUR', 'dob': '1999-01-03', 'lname': 'Hargrove', 'classes': [{'cno': 'CS329E', 'grade': 'A-'}, {'cno': 'CS327E', 'grade': 'B '}]}


In [16]:
class_ref = db.collection('class')
query = class_ref.where('cname', '==', 'Elements of Databases')
results = query.stream()

cno = None

for result in results:
    class_record = result.to_dict()
    cno = class_record['cno']
    print('cno: ' + cno)

results = db.collection('student').stream()

for result in results:
    student = result.to_dict()
    if 'classes' in student.keys():
        classes = student['classes']
        
        for aclass in classes:
            if cno == aclass['cno']:
                print(f'{result.id} => {student}')
                break

cno: CS327E
jc => {'classes': [{'grade': nan, 'cno': 'CS327E'}, {'grade': nan, 'cno': 'CS331E'}, {'cno': 'CS313E', 'grade': nan}], 'fname': 'James', 'sid': 'jc', 'status': 'CUR', 'dob': '2000-04-22', 'lname': 'Cowe'}
jerryh => {'fname': 'Jerry', 'sid': 'jerryh', 'status': 'CUR', 'dob': '1999-01-03', 'lname': 'Hargrove', 'classes': [{'cno': 'CS329E', 'grade': 'A-'}, {'cno': 'CS327E', 'grade': 'B '}]}
