In [1]:
from collections import defaultdict
import json
import os

ARCHIVE_DIR = './archive-Fall2019/'

# Notes

* Call numbers with less than 5 digits are meant to have leading zeroes, but I mistakenly turned them into int's in the scraper.

* If a location field is not missing but the location is 'NONE NONE', 'ONLINE ONLY', 'OTHR OTHER':
  * The names of courses with location 'NONE NONE' are all called "Julie's Test Course"
  * 'OTHR OTHER' I have no idea. There are 16 sections where the location was OTHR OTHER at any point. Only for 1 was the location real at any point (it was 303 Hamilton Hall until 9/3).

In [130]:
courses = []
dates = []

for d in range(0, 18):
    for f in sorted(os.listdir(ARCHIVE_DIR)):
        date = '2019-08-31' if d == 0 else f'2019-09-{d:02d}'
        if '.json'in f and date in f:
            dates.append(date)
            date_courses = json.load(open(ARCHIVE_DIR + f))
            
            for c in date_courses:
                c['date'] = date
                courses.append(c)
            # print(f'{date}: Added {len(date_courses)}.')
            break
print(f'Read in {len(courses)} from {len(dates)} dates.')

courses_indexed = defaultdict(dict)
for c in courses:
    courses_indexed[c['date']][c['Call Number']] = c
print('Indexed courses by date, and call number.')

courses_lists = {}
for call_num in courses_indexed[dates[-1]].keys():
    course_instances = [
        courses_indexed[date].get(call_num, {'Call Number': call_num, 'date': date})
        for date in dates
    ]
    
    if all('Location' not in c for c in course_instances): # Location was never a field that was set
        continue
    if any(c.get('Location', None) == 'OTHR OTHER' for c in course_instances):
        # Location was OTHR OTHER; see notes
        continue
        
    courses_lists[call_num] = course_instances

print(
    'Made courses lists by call number, but',
    f'skipped {len(courses_indexed[dates[-1]]) - len(courses_lists)} sections with bad locations.'
)

Read in 143908 from 18 dates.
Indexed courses by date, and call number.
Made courses lists by call number, but skipped 2788 sections with bad locations.


### Find number of sections whose locations were set for the first time, by day

The difference analysis is actually done in R, this just set up the data for it.

In [131]:
tba_calls = []
for call_num in courses_lists.keys():
    courses_instances = courses_lists[call_num]    
    if any(c.get('Location', None) == 'To be announced' or 'Location' not in c for c in courses_instances):
        # At some point the course was To be announced or the location was unset
        tba_calls.append(call_num)

output = []
for call_num in tba_calls:
    output += courses_lists[call_num]
json.dump(output, open('./tba_calls.json', 'w'))

### Find number of sections whose locations changed, by day

In [132]:

date_differences = {}
for i, date in enumerate(dates):
    date_differences[date] = []
    if i == 0:
        continue
    for call_num in courses_lists.keys():
        course_list = courses_lists[call_num]
        prev, current = course_list[i], course_list[i - 1]
        if prev.get('Location', None) != current.get('Location', None):
            date_differences[date].append(prev)
            date_differences[date].append(current)

json.dump(
    [
        {'date': date, 'differences': len(date_differences[date]) / 2}
        for date in date_differences.keys()
    ],
    open('./date_differences.json', 'w')
)

# Listing the actual location changes

Maybe I'll do something with it later.

In [133]:
for call_num in courses_lists.keys():
    course_list = courses_lists[call_num]
    prevLoc = course_list[0].get('Location', None)
    
    for c in course_list:
        if c.get('Location', '*****' + str(c['Call Number'])) != prevLoc:
            print(c['Call Number'])
            print(
                '\n'.join(['; '.join([c['date'], c.get('Location', 'None')]) for c in course_list])
            )
            print()
            break
        prevLoc = c['Location']

29258
2019-08-31; COTC Riverside Church
2019-09-01; COTC Riverside Church
2019-09-02; COTC Riverside Church
2019-09-03; COTC Riverside Church
2019-09-04; COTC Riverside Church
2019-09-05; COTC Riverside Church
2019-09-06; COTC Riverside Church
2019-09-07; COTC Riverside Church
2019-09-08; 311 Riverside Church
2019-09-09; 311 Riverside Church
2019-09-10; 311 Riverside Church
2019-09-11; 311 Riverside Church
2019-09-12; 311 Riverside Church
2019-09-13; 311 Riverside Church
2019-09-14; 311 Riverside Church
2019-09-15; 311 Riverside Church
2019-09-16; 311 Riverside Church
2019-09-17; 311 Riverside Church

29253
2019-08-31; COTC Riverside Church
2019-09-01; COTC Riverside Church
2019-09-02; COTC Riverside Church
2019-09-03; COTC Riverside Church
2019-09-04; COTC Riverside Church
2019-09-05; 330 Riverside Church
2019-09-06; 330 Riverside Church
2019-09-07; 330 Riverside Church
2019-09-08; 330 Riverside Church
2019-09-09; 330 Riverside Church
2019-09-10; 330 Riverside Church
2019-09-11; 330 R

29402
2019-08-31; To be announced
2019-09-01; 414 Pupin Laboratories
2019-09-02; 414 Pupin Laboratories
2019-09-03; 414 Pupin Laboratories
2019-09-04; 414 Pupin Laboratories
2019-09-05; 414 Pupin Laboratories
2019-09-06; 414 Pupin Laboratories
2019-09-07; 414 Pupin Laboratories
2019-09-08; 414 Pupin Laboratories
2019-09-09; 414 Pupin Laboratories
2019-09-10; 414 Pupin Laboratories
2019-09-11; 414 Pupin Laboratories
2019-09-12; 414 Pupin Laboratories
2019-09-13; 414 Pupin Laboratories
2019-09-14; 414 Pupin Laboratories
2019-09-15; 414 Pupin Laboratories
2019-09-16; 414 Pupin Laboratories
2019-09-17; 414 Pupin Laboratories

29356
2019-08-31; 222 Pupin Laboratories
2019-09-01; 222 Pupin Laboratories
2019-09-02; 222 Pupin Laboratories
2019-09-03; 222 Pupin Laboratories
2019-09-04; 222 Pupin Laboratories
2019-09-05; 222 Pupin Laboratories
2019-09-06; 222 Pupin Laboratories
2019-09-07; 222 Pupin Laboratories
2019-09-08; 106B Lewisohn Hall
2019-09-09; 106B Lewisohn Hall
2019-09-10; 106B Lewis

96111
2019-08-31; 270B International Affairs Building
2019-09-01; 270B International Affairs Building
2019-09-02; 270B International Affairs Building
2019-09-03; 270B International Affairs Building
2019-09-04; 270B International Affairs Building
2019-09-05; 270B International Affairs Building
2019-09-06; 270B International Affairs Building
2019-09-07; 270B International Affairs Building
2019-09-08; 270B International Affairs Building
2019-09-09; 270B International Affairs Building
2019-09-10; 270B International Affairs Building
2019-09-11; 270B International Affairs Building
2019-09-12; 405 International Affairs Building
2019-09-13; 405 International Affairs Building
2019-09-14; 405 International Affairs Building
2019-09-15; 405 International Affairs Building
2019-09-16; 405 International Affairs Building
2019-09-17; 405 International Affairs Building

96113
2019-08-31; To be announced
2019-09-01; To be announced
2019-09-02; To be announced
2019-09-03; 963 EXT Schermerhorn Hall [SCH]
2