# Goal
Create a dataset with the dates of the top 100 universities' covid decisions.

## Isolating the Top Universities (with individual characteristics)
I'll first isolate the top 100 universities from US NEWS. The below is all copied from [nahid18's uni-rank python package](https://github.com/nahid18/uni-rank). However, because I got a list index out of range error when using the package, I copied their code and fixed it locally. The problem was in the get_usa method, solved by changing the '/' to os.sep. I then saved the results in a csv.

In [27]:
from glob import glob
import pandas as pd
import requests
import pkgutil
import random
import json
import os

class Ranking:
    def __init__(self):
        self.api = "https://www.usnews.com/best-colleges/api/search?_sort=rank&_sortDirection=asc&schoolType=national-universities"


    def _useragent(self):
        agents = pkgutil.get_data(__package__, "useragents.txt").decode("utf-8")
        return random.choice(agents.splitlines())


    def __save_initial(self):
        usnews = header_selected = dict()
        header_selected = dict()
        fetched = False
        while fetched != True:
            try:
                ua = self._useragent()
                headers = {"User-Agent": ua}
                data = requests.get(url=self.api, headers=headers, timeout=3)
                if data.status_code == 200:
                    header_selected = headers
                    usnews = data.json()
                    with open("apidata/1.json", 'w') as fw:
                        json.dump(usnews, fw)
                    fetched = True
                    return {"totalPages": usnews['data']['totalPages'], "head": header_selected}
            except:
                pass


    def __save_rest(self, length, head):
        for page in range(2, length+1):
            url = self.api+"&_page="+str(page)
            try:
                page_raw = requests.get(url=url, headers=head, timeout=3)
                if page_raw.status_code == 200:
                    page_data = page_raw.json()
                    with open("apidata/"+str(page)+".json", 'w') as f:
                        json.dump(page_data, f)
            except:
                pass


    def __read_json(self, filepath):
        with open(filepath, 'r') as fp:
                return json.load(fp)


    def __check_directory(self, dirname):
        if not os.path.exists(dirname):
            os.makedirs(dirname)
            initials = self.__save_initial()
            head = initials["head"]
            self.__save_rest(initials['totalPages'], head)


    def get_usa(self):
        dirname = 'apidata'
        self.__check_directory(dirname)
        complete_list = list()
        # CHANGED '/' to os.sep so it would work for my Windows computer
        files = [int(i.split(os.sep)[1].split('.')[0]) for i in glob(dirname+"/*.json")]
        for num in sorted(files):
            apifile = dirname+"/"+str(num)+".json"
            file_json = self.__read_json(apifile)
            file_data = file_json["data"]
            for item in file_data["items"]:
                out = dict()
                detail = item["institution"]
                item_keys = ["displayName", "rankingDisplayRank", "state", "city", "zip"]
                for key in item_keys:
                    out[key] = detail[key]
                out["description"] = desc = item["blurb"]
                if desc.startswith("<p>"):
                    out["description"] = desc[3:-4]
                complete_list.append(out)
        return complete_list


    def get_names(self):
        usa = self.get_usa()
        names = [uni["displayName"] for uni in usa]
        return names


    def get_top_names(self, num):
        names = self.get_names()
        return names[:num]


    def select_by_state(self, state_list):
        df = pd.DataFrame(self.get_usa())
        sub = df[df["state"].isin(state_list)]
        return sub


    def select_by_city(self, city_list):
        df = pd.DataFrame(self.get_usa())
        sub = df[df["city"].isin(city_list)]
        return sub


    def save_json(self, inplist, filename):
        if inplist != []:
            with open(filename, 'w') as fh:
                fh.write(json.dumps(inplist, indent=4))


    def save_csv(self, inplist, filename):
        if inplist != []:
            df = pd.DataFrame(inplist)
            df.to_csv(filename, index=False)

In [29]:
# instantiate
rank = Ranking()

# get the ordered list of USA universities
usa = rank.get_usa()

In [32]:
rank.save_csv(usa, "usa_list.csv")

Now, I'll manually enter the dates each university moved online and required a vaccine using Excel. As there's no good centralized database, I'll look at the top 100 university websites myself.