# Oscar course lookup parsing

## Usage: 
1. Do a search that will display all the CS courses you're interested in
1. use CTRL + A to highlight everyhing on the resulting page.
1. use CTRL + C to copy the highlighted contents to your clipboard
1. Open a text editor and use CTRL + V to paste the contents of the search into the text file
1. Save the text file as "oscar_raw.txt" anywhere (but note the location)
1. Run this notebook

### Configure environment and packages

In [1]:
import os
import csv
import pandas as pd
import lxml
import pprint as pp

In [2]:
base_path = "/mnt/c/Users/samue/Desktop"
raw_name = 'Look-Up Classes to Add_.html'
file_path = os.path.join(base_path, raw_name)

In [3]:
with open(file_path, 'r') as f:
    data = pd.read_html(f)

In [4]:
contents_key = 5
school_key = "Computational Science & Engr"

In [5]:
table_data = data[contents_key][school_key]

In [6]:
subj_keys = table_data["Subj"].unique()
print(subj_keys)

['CSE' 'Computer Science' 'Subj' 'CS' 'Industrial & Systems Engr' 'ISYE'
 'Public Policy' 'PUBP']


In [7]:
display([(i, table_data[table_data["Subj"] == i].shape[0]) for i in subj_keys])

[('CSE', 7),
 ('Computer Science', 1),
 ('Subj', 3),
 ('CS', 48),
 ('Industrial & Systems Engr', 1),
 ('ISYE', 13),
 ('Public Policy', 1),
 ('PUBP', 4)]

In [8]:
subj_filtered_data = table_data[(table_data["Subj"] == "CSE") | (table_data["Subj"] == "CS") | (table_data["Subj"] == "ISYE") | (table_data["Subj"] == "PUBP")]
print(subj_filtered_data["Sec"].unique())

['OAN' 'O01' 'OCY' 'OCL' 'O03' 'O08' 'O10' 'O11' 'OAH']


In [9]:
sec_filtered_data = subj_filtered_data[(subj_filtered_data["Sec"] != "OAN") & (subj_filtered_data["Sec"] != "OCY") & (subj_filtered_data["Sec"] != "OCL") & (subj_filtered_data["Sec"] != "OAH")]

In [10]:
pp.pp(sec_filtered_data.columns)

Index(['Select', 'CRN', 'Subj', 'Crse', 'Sec', 'Cmp', 'Bas', 'Cred', 'Title',
       'Days', 'Time', 'Cap', 'Act', 'Rem', 'WL Cap', 'WL Act', 'WL Rem',
       'Instructor', 'Location', 'Attribute', 'Unnamed: 20_level_1',
       'Unnamed: 21_level_1', 'Unnamed: 22_level_1'],
      dtype='object')


In [11]:
numeric_col_names = ["CRN", "Crse", "Cred", "Cap", "Act", "Rem", "WL Cap", "WL Act", "WL Rem"]
useful_cols = ["CRN", "Subj", "Crse", "Sec", "Cred", "Title", "Cap", "Act", "Rem", "WL Cap", "WL Act", "WL Rem", "Instructor"]

In [12]:
sec_filtered_data[numeric_col_names] = sec_filtered_data[numeric_col_names].apply(pd.to_numeric)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [13]:
display(sec_filtered_data.sort_values(["Subj", "WL Act"])[useful_cols])

Unnamed: 0,CRN,Subj,Crse,Sec,Cred,Title,Cap,Act,Rem,WL Cap,WL Act,WL Rem,Instructor
25,88509,CS,6291,O01,3.0,Embedded Software Opt.,0,56,0,999,0,999,"Pande, S. (P)"
52,87888,CS,8803,O08,3.0,Special TopicsCompilers-Theory & Practice,0,76,0,999,0,999,"Pande, S. (P)"
12,86061,CS,6210,O01,3.0,Adv Operating Systems,0,280,0,999,1,998,"Ramachandran, U. (P)"
22,90336,CS,6265,O01,3.0,Info Sec Lab Binexp,0,24,0,999,1,998,"Kim, T. (P)"
14,89891,CS,6238,O01,3.0,Secure Computer Systems,0,65,0,999,2,997,"Ahamad, M. (P)"
54,92884,CS,8803,O11,3.0,Special TopicsSystem and Network Defenses,0,26,0,999,2,997,"Lee, W. (P)"
24,86065,CS,6290,O01,3.0,High Perform Comput Arch,0,248,0,999,3,996,"Prvulovic, M. (P)"
47,89811,CS,7639,O01,3.0,Cyber Physical Design,0,107,0,999,3,996,"Feron, E. (P)"
19,87164,CS,6262,O01,3.0,Network Security,0,175,0,999,5,994,"Lee, W. (P)"
29,87160,CS,6340,O01,3.0,Software Analysis & Test,0,176,0,999,7,992,"Naik, M. (P), Poch, C."
