In [6]:
import requests
import json
import pandas as pd
import time
import base64

def get_umass_professors(school_id="U2Nob29zLTE1MTM="):  # Base64 encoded "School-1513"
    headers = {
        'Authorization': 'Basic dGVzdDp0ZXN0',
        'Content-Type': 'application/json',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    query = {
        "query": """
        query NewSearch($query: TeacherSearchQuery!) {
          newSearch {
            teachers(query: $query, first: 1000) {
              edges {
                node {
                  id
                  firstName
                  lastName
                  department
                }
              }
              pageInfo {
                hasNextPage
                endCursor
              }
            }
          }
        }
        """,
        "variables": {
            "query": {
                "text": "",
                "schoolID": school_id,
                "fallback": False,
                "departmentID": None
            }
        }
    }
    
    response = requests.post(
        "https://www.ratemyprofessors.com/graphql",
        json=query,
        headers=headers
    )
    
    if response.status_code == 200:
        data = response.json()
        professors = []
        
        if 'data' in data and 'newSearch' in data['data']:
            try: 
                for edge in data['data']['newSearch']['teachers']['edges']:
                    prof = edge['node']
                    # Check if 'id' is in the expected format (Base64 encoded) and decode it
                    if 'id' in prof:
                        try:
                            decoded_id = base64.b64decode(prof['id']).decode('utf-8')
                            professors.append({
                                'id': decoded_id.split('-')[1],  # Remove the "Teacher-" prefix
                                'name': f"{prof['firstName']} {prof['lastName']}",
                                'department': prof['department']
                            })
                        except Exception as e:
                            print(f"Error decoding ID for professor: {prof}, Error: {e}")
                    else:
                        print(f"Unexpected ID format for professor: {prof}")
            except Exception as e:
                print(f"Error processing data: {e}")

            print(f"Found {len(professors)} professors")
            return professors
    else:
        print(f"Error: {response.status_code}")
        print(response.text)
        return None

# Get all UMass professors
professors = get_umass_professors()

# Print first few professors to verify
if professors:
    print("\nFirst 5 professors:")
    for prof in professors[:5]:
        print(f"ID: {prof['id']}, Name: {prof['name']}, Department: {prof['department']}")
    
    # Save to CSV for future use
    df = pd.DataFrame(professors)

professor_dict = {prof['id']: prof for prof in professors}




Found 1000 professors

First 5 professors:
ID: 203815, Name: John Bickford, Department: Psychology
ID: 77120, Name: Randall Phillis, Department: Biology
ID: 1621419, Name: Laura Francis, Department: Biology
ID: 192549, Name: Joanna Jeneralczuk, Department: Mathematics
ID: 1617241, Name: Chris McDaniel, Department: Chemistry


In [7]:
#now lets try and get ALL professors using pagination
def get_university_professors(school_id):  # Base64 encoded "School-1513"
    headers = {
        'Authorization': 'Basic dGVzdDp0ZXN0',
        'Content-Type': 'application/json',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    query = {
        "query": """
        query NewSearch($query: TeacherSearchQuery!, $after: String) {
          newSearch {
            teachers(query: $query, after: $after, first: 1000) {
              edges {
                node {
                  id
                  firstName
                  lastName
                  department
                }
              }
              pageInfo {
                startCursor
                hasNextPage
                endCursor
              }
            }
          }
        }
        """,
    }
    
    variables = {
        "query": {
            "text": "",
            "schoolID": school_id,
            "fallback": False,
            "departmentID": None
        },
        "after": None
    }
    
    professors = []
    hasNextPage = True
    
    while hasNextPage:
        response = requests.post(
            "https://www.ratemyprofessors.com/graphql",
            json={'query': query['query'], 'variables': variables},
            headers=headers
        )
        
        if response.status_code == 200:
            data = response.json()
            print(f"Page data: {data}")
            
            if 'data' in data and 'newSearch' in data['data']:
                teacher_data = data['data']['newSearch']['teachers']
                
                # Process current page of professors
                for edge in teacher_data['edges']:
                    prof = edge['node']
                    if 'id' in prof:
                        try:
                            decoded_id = base64.b64decode(prof['id']).decode('utf-8')
                            professors.append({
                                'id': decoded_id.split('-')[1],
                                'name': f"{prof['firstName']} {prof['lastName']}",
                                'department': prof['department']
                            })
                        except Exception as e:
                            print(f"Error decoding ID for professor: {prof}, Error: {e}")
                
                # Update pagination info
                hasNextPage = teacher_data['pageInfo']['hasNextPage']
                if hasNextPage:
                    variables['after'] = teacher_data['pageInfo']['endCursor']
            else:
                print("Unexpected data format received")
                break
        else:
            print(f"Error: {response.status_code}")
            print(response.text)
            break
            
        print(f"Found {len(professors)} professors so far...")
    
    return professors

# Get all UMass professors
professors = get_university_professors(Universities["University of Massachusetts--Amherst"]["base64"])

# Print first few professors to verify
if professors:
    print("\nFirst 5 professors:")
    for prof in professors[:5]:
        print(f"ID: {prof['id']}, Name: {prof['name']}, Department: {prof['department']}")
    
    # Save to CSV for future use
    df = pd.DataFrame(professors)

professor_dict = {prof['id']: prof for prof in professors}


Page data: {'data': {'newSearch': {'teachers': {'edges': [{'node': {'department': 'Psychology', 'firstName': 'John', 'id': 'VGVhY2hlci0yMDM4MTU=', 'lastName': 'Bickford'}}, {'node': {'department': 'Biology', 'firstName': 'Randall', 'id': 'VGVhY2hlci03NzEyMA==', 'lastName': 'Phillis'}}, {'node': {'department': 'Biology', 'firstName': 'Laura', 'id': 'VGVhY2hlci0xNjIxNDE5', 'lastName': 'Francis'}}, {'node': {'department': 'Mathematics', 'firstName': 'Joanna', 'id': 'VGVhY2hlci0xOTI1NDk=', 'lastName': 'Jeneralczuk'}}, {'node': {'department': 'Chemistry', 'firstName': 'Chris', 'id': 'VGVhY2hlci0xNjE3MjQx', 'lastName': 'McDaniel'}}, {'node': {'department': 'Communication', 'firstName': 'Sut', 'id': 'VGVhY2hlci00NzcwNA==', 'lastName': 'Jhally'}}, {'node': {'department': 'Mathematics', 'firstName': 'Catherine', 'id': 'VGVhY2hlci04ODc1MjY=', 'lastName': 'Benincasa'}}, {'node': {'department': 'Accounting', 'firstName': 'Catherine', 'id': 'VGVhY2hlci04MzcwNzA=', 'lastName': 'Lowry'}}, {'node': {'

In [8]:
#and lets bring our other function as well
import requests
import json
import pandas as pd
import time

import base64

def get_all_reviews(professor_id):
    headers = {
        'Authorization': 'Basic dGVzdDp0ZXN0',
        'Content-Type': 'application/json',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    # Encode the professor_id in base64
    encoded_professor_id = base64.b64encode(f"Teacher-{professor_id}".encode()).decode()
    
    payload = {
        "query": "query GetTeacherRatings($id: ID!) { node(id: $id) { ... on Teacher { id firstName lastName school { name } ratings(first: 1000) { edges { node { comment class difficultyRating qualityRating attendanceMandatory wouldTakeAgain isForCredit textbookUse date grade ratingTags} } pageInfo { hasNextPage endCursor } } } } }",
        "variables": {
            "id": encoded_professor_id
        }
    }
    
    response = requests.post(
        "https://www.ratemyprofessors.com/graphql",
        json=payload,
        headers=headers
    )
    
    if response.status_code != 200:
        print(f"Error: Status code {response.status_code}")
        print(response.text)
        return None
        
    data = response.json()
    
    # Check for errors in the response
    if 'errors' in data:
        print("GraphQL Errors:", data['errors'])
        return None
        
    node_list = data['data']['node']['ratings']['edges']

    rev_list = []
    for node in node_list:
        node['node']['tid'] = professor_id
        rev_list.append(node['node'])
    return rev_list

# Test the function
professor_id = "2936075"  # Mark Wilson's ID
result = get_all_reviews(professor_id)
print(json.dumps(result, indent=2))

[
  {
    "attendanceMandatory": "",
    "class": "CS575",
    "comment": "Didn't offer a make-up exam for the midterm, which made the final exam 50% of my grade. The final exam was extremely hard, he didn't offer any regrade requests for the final, and closed the Piazza almost immediately after the final so we couldn't dispute anything. Allowing him to teach a CS course again was a huge mistake for the CS department.",
    "date": "2025-01-15 21:34:45 +0000 UTC",
    "difficultyRating": 5,
    "grade": "D+",
    "isForCredit": true,
    "qualityRating": 1,
    "ratingTags": "Tough grader--Graded by few things",
    "textbookUse": null,
    "wouldTakeAgain": null,
    "tid": "2936075"
  },
  {
    "attendanceMandatory": "mandatory",
    "class": "CS575",
    "comment": "If you can avoid him, please do. As said before, he is extremely condescending and lacking in patience when answering questions. He will call a new examinable concept simple or powerful or both, then not explain it. He 

In [9]:
#and lets bring our other function as well
import requests
import json
import pandas as pd
import time
import base64

def get_all_reviews(professor_id):
    headers = {
        'Authorization': 'Basic dGVzdDp0ZXN0',
        'Content-Type': 'application/json',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    # Encode the professor_id in base64
    encoded_professor_id = base64.b64encode(f"Teacher-{professor_id}".encode()).decode()
    
    rev_list = []
    has_next_page = True
    end_cursor = None
    
    while has_next_page:
        # Add the after parameter if we have an end cursor
        ratings_params = "first: 1000"
        if end_cursor:
            ratings_params += f', after: "{end_cursor}"'
            
        payload = {
            "query": f"""query GetTeacherRatings($id: ID!) {{
                node(id: $id) {{
                    ... on Teacher {{
                        id firstName lastName
                        school {{ name }}
                        ratings({ratings_params}) {{
                            edges {{
                                node {{
                                    comment class difficultyRating qualityRating
                                    createdByUser attendanceMandatory wouldTakeAgain
                                    isForCredit textbookUse date grade ratingTags clarityRating
                                    helpfulRating isForOnlineClass thumbsUpTotal thumbsDownTotal
                                }}
                            }}
                            pageInfo {{
                                hasNextPage
                                endCursor
                            }}
                        }}
                    }}
                }}
            }}""",
            "variables": {
                "id": encoded_professor_id
            }
        }
        
        response = requests.post(
            "https://www.ratemyprofessors.com/graphql",
            json=payload,
            headers=headers
        )
        if response.status_code == 429:
            print("Rate limit exceeded, waiting 10 seconds before retrying...")
            time.sleep(10)
            continue

        if response.status_code == 503:
            print("Rate limit exceeded, waiting 10 seconds before retrying...")
            time.sleep(10)
            continue
        
        if response.status_code != 200:
            print(f"Error: Status code {response.status_code}")
            print(response.text)
            return None
            
        data = response.json()
        
        # Check for errors in the response
        if 'errors' in data:
            print("GraphQL Errors:", data['errors'])
            return None
            
        node_list = data['data']['node']['ratings']['edges']
        page_info = data['data']['node']['ratings']['pageInfo']
        
        # Add professor_id to each review and append to results
        for node in node_list:
            node['node']['pid'] = professor_id
            rev_list.append(node['node'])
            
        # Update pagination info
        has_next_page = page_info['hasNextPage']
        end_cursor = page_info['endCursor']
        
        # Add a small delay between requests
        if has_next_page:
            time.sleep(0.5)
            
    return rev_list

# Test the function
professor_id = "2936075"  # Mark Wilson's ID
result = get_all_reviews(professor_id)
print(json.dumps(result, indent=2))

[
  {
    "attendanceMandatory": "",
    "clarityRating": 1,
    "class": "CS575",
    "comment": "Didn't offer a make-up exam for the midterm, which made the final exam 50% of my grade. The final exam was extremely hard, he didn't offer any regrade requests for the final, and closed the Piazza almost immediately after the final so we couldn't dispute anything. Allowing him to teach a CS course again was a huge mistake for the CS department.",
    "createdByUser": false,
    "date": "2025-01-15 21:34:45 +0000 UTC",
    "difficultyRating": 5,
    "grade": "D+",
    "helpfulRating": 1,
    "isForCredit": true,
    "isForOnlineClass": false,
    "qualityRating": 1,
    "ratingTags": "Tough grader--Graded by few things",
    "textbookUse": null,
    "thumbsDownTotal": 0,
    "thumbsUpTotal": 0,
    "wouldTakeAgain": null,
    "pid": "2936075"
  },
  {
    "attendanceMandatory": "mandatory",
    "clarityRating": 1,
    "class": "CS575",
    "comment": "If you can avoid him, please do. As sa

In [None]:
#now lets try and fetch all reviews for umass professors, just so we can get an idea of how the edge cases work for each professor
professors = get_all_professors()
reviews = []
from tqdm import tqdm
for prof in tqdm(professors, desc="Fetching professor reviews"):
    result = get_all_reviews(prof['id'])
    reviews.append(result)
    time.sleep(0.5)


#and lets now try and flatten this data, and view the size
data = []
for review_list in reviews:
    for review in review_list:
        data.append(review)

df = pd.DataFrame(data)

print(df.shape)



Page data: {'data': {'newSearch': {'teachers': {'edges': [{'node': {'department': 'Psychology', 'firstName': 'John', 'id': 'VGVhY2hlci0yMDM4MTU=', 'lastName': 'Bickford'}}, {'node': {'department': 'Biology', 'firstName': 'Randall', 'id': 'VGVhY2hlci03NzEyMA==', 'lastName': 'Phillis'}}, {'node': {'department': 'Biology', 'firstName': 'Laura', 'id': 'VGVhY2hlci0xNjIxNDE5', 'lastName': 'Francis'}}, {'node': {'department': 'Mathematics', 'firstName': 'Joanna', 'id': 'VGVhY2hlci0xOTI1NDk=', 'lastName': 'Jeneralczuk'}}, {'node': {'department': 'Chemistry', 'firstName': 'Chris', 'id': 'VGVhY2hlci0xNjE3MjQx', 'lastName': 'McDaniel'}}, {'node': {'department': 'Communication', 'firstName': 'Sut', 'id': 'VGVhY2hlci00NzcwNA==', 'lastName': 'Jhally'}}, {'node': {'department': 'Mathematics', 'firstName': 'Catherine', 'id': 'VGVhY2hlci04ODc1MjY=', 'lastName': 'Benincasa'}}, {'node': {'department': 'Accounting', 'firstName': 'Catherine', 'id': 'VGVhY2hlci04MzcwNzA=', 'lastName': 'Lowry'}}, {'node': {'

Fetching professor reviews:   1%|▍                                                   | 40/4219 [00:34<58:03,  1.20it/s]

Fetching professor reviews:   1%|▍                                                 | 40/4219 [00:34<1:00:50,  1.14it/s]


KeyboardInterrupt: 

In [17]:
#Lets list out the top 100 colleges now, along with their id's, names, and total enrollment
#List from Jan 2025 - https://www.usnews.com/best-colleges/rankings/national-universities?_sort=rank&_sortDirection=asc
Universities = {
    "Princeton University": {
        "id": 'School-780',
        "base64": 'U2Nob29zLTc4MA==',
        "name": "Princeton University", 
        "undergraduate_enrollment": 5671,
        "location": "Princeton, NJ"
    },
    "Massachusetts Institute of Technology": {
        "id": 'School-580',
        "base64": 'U2Nob29zLTU4MA==',
        "name": "Massachusetts Institute of Technology",
        "undergraduate_enrollment": 4576,
        "location": "Cambridge, MA"
    },
    "Harvard University": {
        "id": 'School-399',
        "base64": 'U2Nob29zLTM5OQ==',
        "name": "Harvard University",
        "undergraduate_enrollment": 7110,
        "location": "Cambridge, MA"
    },
    "Stanford University": {
        "id": 'School-953',
        "base64": 'U2Nob29zLTk1Mw==',
        "name": "Stanford University",
        "undergraduate_enrollment": 8054,
        "location": "Stanford, CA"
    },
    "Yale University": {
        "id": 'School-1222',
        "base64": 'U2Nob29zLTEyMjI=',
        "name": "Yale University",
        "undergraduate_enrollment": 6818,
        "location": "New Haven, CT"
    },
    "California Institute of Technology": {
        "id": 'School-148',
        "base64": 'U2Nob29zLTE0OA==',
        "name": "California Institute of Technology",
        "undergraduate_enrollment": 1023,
        "location": "Pasadena, CA"
    },
    "Duke University": {
        "id": 'School-1350',
        "base64": 'U2Nob29zLTEzNTA=',
        "name": "Duke University",
        "undergraduate_enrollment": 6488,
        "location": "Durham, NC"
    },
    "Johns Hopkins University": {
        "id": 'School-464',
        "base64": 'U2Nob29zLTQ2NA==',
        "name": "Johns Hopkins University",
        "undergraduate_enrollment": 6090,
        "location": "Baltimore, MD"
    },
    "Northwestern University": {
        "id": 'School-709',
        "base64": 'U2Nob29zLTcwOQ==',
        "name": "Northwestern University",
        "undergraduate_enrollment": 8846,
        "location": "Evanston, IL"
    },
    "University of Pennsylvania": {
        "id": 'School-1275',
        "base64": 'U2Nob29zLTEyNzU=',
        "name": "University of Pennsylvania",
        "undergraduate_enrollment": 9995,
        "location": "Philadelphia, PA"
    },
    "Cornell University": {
        "id": 'School-298',
        "base64": 'U2Nob29zLTI5OA==',
        "name": "Cornell University",
        "undergraduate_enrollment": 16071,
        "location": "Ithaca, NY"
    },
    "University of Chicago": {
        "id": 'School-1085',
        "base64": 'U2Nob29zLTEwODU=',
        "name": "University of Chicago",
        "undergraduate_enrollment": 7489,
        "location": "Chicago, IL"
    },
    "Brown University": {
        "id": 'School-137',
        "base64": 'U2Nob29zLTEzNw==',
        "name": "Brown University",
        "undergraduate_enrollment": 7741,
        "location": "Providence, RI"
    },
    "Columbia University": {
        "id": 'School-278',
        "base64": 'U2Nob29zLTI3OA==',
        "name": "Columbia University",
        "undergraduate_enrollment": 8902,
        "location": "New York, NY"
    },
    "Dartmouth College": {
        "id": 'School-1339',
        "base64": 'U2Nob29zLTEzMzk=',
        "name": "Dartmouth College",
        "undergraduate_enrollment": 4447,
        "location": "Hanover, NH"
    },
    "University of California--Los Angeles": {
        "id": 'School-1075',
        "base64": 'U2Nob29zLTEwNzU=',
        "name": "University of California--Los Angeles",
        "undergraduate_enrollment": 33040,
        "location": "Los Angeles, CA"
    },
    "University of California, Berkeley": {
        "id": 'School-1072',
        "base64": 'U2Nob29zLTEwNzI=',
        "name": "University of California, Berkeley",
        "undergraduate_enrollment": 33405,
        "location": "Berkeley, CA"
    },
    "Rice University": {
        "id": 'School-799',
        "base64": 'U2Nob29zLTc5OQ==',
        "name": "Rice University",
        "undergraduate_enrollment": 4574,
        "location": "Houston, TX"
    },
    "University of Notre Dame": {
        "id": 'School-1576',
        "base64": 'U2Nob29zLTE1NzY=',
        "name": "University of Notre Dame",
        "undergraduate_enrollment": 8968,
        "location": "Notre Dame, IN"
    },
    "Vanderbilt University": {
        "id": 'School-4002',
        "base64": 'U2Nob29zLTQwMDI=',
        "name": "Vanderbilt University",
        "undergraduate_enrollment": 7152,
        "location": "Nashville, TN"
    },
    "Carnegie Mellon University": {
        "id": 'School-181',
        "base64": 'U2Nob29zLTE4MQ==',
        "name": "Carnegie Mellon University",
        "undergraduate_enrollment": 7707,
        "location": "Pittsburgh, PA"
    },
    "University of Michigan--Ann Arbor": {
        "id": 'School-1258',
        "base64": 'U2Nob29zLTEyNTg=',
        "name": "University of Michigan--Ann Arbor",
        "undergraduate_enrollment": 33730,
        "location": "Ann Arbor, MI"
    },
    "Washington University in St. Louis": {
        "id": 'School-1147',
        "base64": 'U2Nob29zLTExNDc=',
        "name": "Washington University in St. Louis",
        "undergraduate_enrollment": 8267,
        "location": "St. Louis, MO"
    },
    "Emory University": {
        "id": 'School-340',
        "base64": 'U2Nob29zLTM0MA==',
        "name": "Emory University",
        "undergraduate_enrollment": 7359,
        "location": "Atlanta, GA"
    },
    "Georgetown University": {
        "id": 'School-355',
        "base64": 'U2Nob29zLTM1NQ==',
        "name": "Georgetown University",
        "undergraduate_enrollment": 7968,
        "location": "Washington, DC"
    },
    "University of Virginia": {
        "id": 'School-1277',
        "base64": 'U2Nob29zLTEyNzc=',
        "name": "University of Virginia",
        "undergraduate_enrollment": 17618,
        "location": "Charlottesville, VA"
    },
    "University of North Carolina--Chapel Hill": {
        "id": 'School-1232',
        "base64": 'U2Nob29zLTEyMzI=',
        "name": "University of North Carolina--Chapel Hill",
        "undergraduate_enrollment": 20880,
        "location": "Chapel Hill, NC"
    },
    "University of Southern California": {
        "id": 'School-1381',
        "base64": 'U2Nob29zLTEzODE=',
        "name": "University of Southern California",
        "undergraduate_enrollment": 21023,
        "location": "Los Angeles, CA"
    },
    "University of California, San Diego": {
        "id": 'School-1079',
        "base64": 'U2Nob29zLTEwNzk=',
        "name": "University of California, San Diego",
        "undergraduate_enrollment": 33792,
        "location": "La Jolla, CA"
    },
    "New York University": {
        "id": 'School-675',
        "base64": 'U2Nob29zLTY3NQ==',
        "name": "New York University",
        "undergraduate_enrollment": 29760,
        "location": "New York, NY"
    },
    "University of Florida": {
        "id": 'School-1100',
        "base64": 'U2Nob29zLTExMDA=',
        "name": "University of Florida",
        "undergraduate_enrollment": 34924,
        "location": "Gainesville, FL"
    },
    "University of Texas--Austin": {
        "id": 'School-1255',
        "base64": 'U2Nob29zLTEyNTU=',
        "name": "University of Texas--Austin",
        "undergraduate_enrollment": 42444,
        "location": "Austin, TX"
    },
    "Georgia Institute of Technology": {
        "id": 'School-361',
        "base64": 'U2Nob29zLTM2MQ==',
        "name": "Georgia Institute of Technology",
        "undergraduate_enrollment": 19505,
        "location": "Atlanta, GA"
    },
    "University of California, Davis": {
        "id": 'School-1073',
        "base64": 'U2Nob29zLTEwNzM=',
        "name": "University of California, Davis",
        "undergraduate_enrollment": 31797,
        "location": "Davis, CA"
    },
    "University of California--Irvine": {
        "id": 'School-1074',
        "base64": 'U2Nob29zLTEwNzQ=',
        "name": "University of California--Irvine",
        "undergraduate_enrollment": 29503,
        "location": "Irvine, CA"
    },
    "University of Illinois Urbana-Champaign": {
        "id": 'School-1112',
        "base64": 'U2Nob29zLTExMTI=',
        "name": "University of Illinois Urbana-Champaign",
        "undergraduate_enrollment": 35564,
        "location": "Champaign, IL"
    },
    "Boston College": {
        "id": 'School-122',
        "base64": 'U2Nob29zLTEyMjI=',
        "name": "Boston College",
        "undergraduate_enrollment": 9575,
        "location": "Chestnut Hill, MA"
    },
    "Tufts University": {
        "id": 'School-1040',
        "base64": 'U2Nob29zLTEwNDA=',
        "name": "Tufts University",
        "undergraduate_enrollment": 6877,
        "location": "Medford, MA"
    },
    "University of California, Santa Barbara": {
        "id": 'School-1077',
        "base64": 'U2Nob29zLTEwNzc=',
        "name": "University of California, Santa Barbara",
        "undergraduate_enrollment": 23232,
        "location": "Santa Barbara, CA"
    },
    "University of Wisconsin--Madison": {
        "id": 'School-18418',
        "base64": 'U2Nob29zLTE4NDE4',
        "name": "University of Wisconsin--Madison",
        "undergraduate_enrollment": 37817,
        "location": "Madison, WI"
    },
    "Boston University": {
        "id": 'School-124',
        "base64": 'U2Nob29zLTEyNA==',
        "name": "Boston University",
        "undergraduate_enrollment": 18656,
        "location": "Boston, MA"
    },
    "The Ohio State University": {
        "id": 'School-724',
        "base64": 'U2Nob29zLTcyNA==',
        "name": "The Ohio State University",
        "undergraduate_enrollment": 45728,
        "location": "Columbus, OH"
    },
    "Rutgers University--New Brunswick": {
        "id": 'School-825',
        "base64": 'U2Nob29zLTgyNQ==',
        "name": "Rutgers University--New Brunswick",
        "undergraduate_enrollment": 36588,
        "location": "Piscataway, NJ"
    },
    "University of Maryland, College Park": {
        "id": 'School-1270',
        "base64": 'U2Nob29zLTEyNzA=',
        "name": "University of Maryland, College Park",
        "undergraduate_enrollment": 30608,
        "location": "College Park, MD"
    },
    "University of Rochester": {
        "id": 'School-1331',
        "base64": 'U2Nob29zLTEzMzE=',
        "name": "University of Rochester",
        "undergraduate_enrollment": 6764,
        "location": "Rochester, NY"
    },
    "Lehigh University": {
        "id": 'School-509',
        "base64": 'U2Nob29zLTUwOQ==',
        "name": "Lehigh University",
        "undergraduate_enrollment": 5811,
        "location": "Bethlehem, PA"
    },
    "Purdue University--Main Campus": {
        "id": 'School-783',
        "base64": 'U2Nob29zLTc4Mw==',
        "name": "Purdue University--Main Campus",
        "undergraduate_enrollment": 39170,
        "location": "West Lafayette, IN"
    },
    "University of Georgia": {
        "id": 'School-1101',
        "base64": 'U2Nob29zLTExMDE=',
        "name": "University of Georgia",
        "undergraduate_enrollment": 31514,
        "location": "Athens, GA"
    },
    "University of Washington": {
        "id": 'School-1530',
        "base64": 'U2Nob29zLTE1MzA=',
        "name": "University of Washington",
        "undergraduate_enrollment": 39125,
        "location": "Seattle, WA"
    },
    "Wake Forest University": {
        "id": 'School-1130',
        "base64": 'U2Nob29zLTExMzA=',
        "name": "Wake Forest University",
        "undergraduate_enrollment": 5471,
        "location": "Winston-Salem, NC"
    },
    "Case Western Reserve University": {
        "id": 'School-186',
        "base64": 'U2Nob29zLTE4Ng==',
        "name": "Case Western Reserve University",
        "undergraduate_enrollment": 6186,
        "location": "Cleveland, OH"
    },
    "Texas A&M University": {
        "id": 'School-1003',
        "base64": 'U2Nob29zLTEwMDM=',
        "name": "Texas A&M University",
        "undergraduate_enrollment": 59933,
        "location": "College Station, TX"
    },
    "Virginia Tech": {
        "id": 'School-1349',
        "base64": 'U2Nob29zLTEzNDk=',
        "name": "Virginia Tech",
        "undergraduate_enrollment": 30504,
        "location": "Blacksburg, VA"
    },
    "Florida State University": {
        "id": 'School-1237',
        "base64": 'U2Nob29zLTEyMzc=',
        "name": "Florida State University",
        "undergraduate_enrollment": 32217,
        "location": "Tallahassee, FL"
    },
    "Northeastern University": {
        "id": 'School-696',
        "base64": 'U2Nob29zLTY5Ng==',
        "name": "Northeastern University",
        "undergraduate_enrollment": 15891,
        "location": "Boston, MA"
    },
    "University of Minnesota--Twin Cities": {
        "id": 'School-1257',
        "base64": 'U2Nob29zLTEyNTc=',
        "name": "University of Minnesota--Twin Cities",
        "undergraduate_enrollment": 39556,
        "location": "Minneapolis, MN"
    },
    "William & Mary": {
        "id": 'School-269',
        "base64": 'U2Nob29zLTI2OQ==',
        "name": "William & Mary",
        "undergraduate_enrollment": 6963,
        "location": "Williamsburg, VA"
    },
    "North Carolina State University": {
        "id": 'School-685',
        "base64": 'U2Nob29zLTY4NQ==',
        "name": "North Carolina State University",
        "undergraduate_enrollment": 27323,
        "location": "Raleigh, NC"
    },
    "Stony Brook University--SUNY": {
        "id": 'School-971',
        "base64": 'U2Nob29zLTk3MQ==',
        "name": "Stony Brook University--SUNY",
        "undergraduate_enrollment": 17449,
        "location": "Stony Brook, NY"
    },
    "University of California, Merced": {
        "id": 'School-4767',
        "base64": 'U2Nob29zLTQ3Njc=',
        "name": "University of California, Merced",
        "undergraduate_enrollment": 8373,
        "location": "Merced, CA"
    },
    "University of Massachusetts--Amherst": {
        "id": 'School-1513',
        "base64": 'U2Nob29zLTE1MTM=',
        "name": "University of Massachusetts--Amherst",
        "undergraduate_enrollment": 23936,
        "location": "Amherst, MA"
    },
    "Villanova University": {
        "id": 'School-1236',
        "base64": 'U2Nob29zLTEyMzY=',
        "name": "Villanova University",
        "undergraduate_enrollment": 7065,
        "location": "Villanova, PA"
    },
    "Brandeis University": {
        "id": 'School-129',
        "base64": 'U2Nob29zLTEyOQ==',
        "name": "Brandeis University",
        "undergraduate_enrollment": 3675,
        "location": "Waltham, MA"
    },
    "George Washington University": {
        "id": 'School-353',
        "base64": 'U2Nob29zLTM1Mw==',
        "name": "George Washington University",
        "undergraduate_enrollment": 11387,
        "location": "Washington, DC"
    },
    "Michigan State University": {
        "id": 'School-601',
        "base64": 'U2Nob29zLTYwMQ==',
        "name": "Michigan State University",
        "undergraduate_enrollment": 40483,
        "location": "East Lansing, MI"
    },
    "The Pennsylvania State University--University Park": {
        "id": 'School-758',
        "base64": 'U2Nob29zLTc1OA==',
        "name": "The Pennsylvania State University--University Park",
        "undergraduate_enrollment": 42223,
        "location": "University Park, PA"
    },
    "Santa Clara University": {
        "id": 'School-882',
        "base64": 'U2Nob29zLTg4Mg==',
        "name": "Santa Clara University",
        "undergraduate_enrollment": 6249,
        "location": "Santa Clara, CA"
    },
    "Tulane University": {
        "id": 'School-1041',
        "base64": 'U2Nob29zLTEwNDE=',
        "name": "Tulane University",
        "undergraduate_enrollment": 7295,
        "location": "New Orleans, LA"
    },
    "University of Miami": {
        "id": 'School-1241',
        "base64": 'U2Nob29zLTEyNDE=',
        "name": "University of Miami",
        "undergraduate_enrollment": 12883,
        "location": "Coral Gables, FL"
    },
    "Rensselaer Polytechnic Institute": {
        "id": 'School-795',
        "base64": 'U2Nob29zLTc5NQ==',
        "name": "Rensselaer Polytechnic Institute",
        "undergraduate_enrollment": 5945,
        "location": "Troy, NY"
    },
    "University of Connecticut": {
        "id": 'School-1091',
        "base64": 'U2Nob29zLTEwOTE=',
        "name": "University of Connecticut",
        "undergraduate_enrollment": 19388,
        "location": "Storrs, CT"
    },
    "University of Pittsburgh": {
        "id": 'School-1247',
        "base64": 'U2Nob29zLTEyNDc=',
        "name": "University of Pittsburgh",
        "undergraduate_enrollment": 20220,
        "location": "Pittsburgh, PA"
    },
    "Binghamton University--SUNY": {
        "id": 'School-958',
        "base64": 'U2Nob29zLTk1OA==',
        "name": "Binghamton University--SUNY",
        "undergraduate_enrollment": 14408,
        "location": "Binghamton, NY"
    },
    "Indiana University--Bloomington": {
        "id": 'School-440',
        "base64": 'U2Nob29zLTQ0MA==',
        "name": "Indiana University--Bloomington",
        "undergraduate_enrollment": 36833,
        "location": "Bloomington, IN"
    },
    "Syracuse University": {
        "id": 'School-992',
        "base64": 'U2Nob29zLTk5Mg==',
        "name": "Syracuse University",
        "undergraduate_enrollment": 15739,
        "location": "Syracuse, NY"
    },
    "Colorado School of Mines": {
        "id": 'School-274',
        "base64": 'U2Nob29zLTI3NA==',
        "name": "Colorado School of Mines",
        "undergraduate_enrollment": 5852,
        "location": "Golden, CO"
    },
    "Stevens Institute of Technology": {
        "id": 'School-982',
        "base64": 'U2Nob29zLTk4Mg==',
        "name": "Stevens Institute of Technology",
        "undergraduate_enrollment": 4026,
        "location": "Hoboken, NJ"
    },
    "University at Buffalo--SUNY": {
        "id": 'School-960',
        "base64": 'U2Nob29zLTk2MA==',
        "name": "University at Buffalo--SUNY",
        "undergraduate_enrollment": 20463,
        "location": "Buffalo, NY"
    },
    "University of California, Riverside": {
        "id": 'School-1076',
        "base64": 'U2Nob29zLTEwNzY=',
        "name": "University of California, Riverside",
        "undergraduate_enrollment": 22646,
        "location": "Riverside, CA"
    },
    "Clemson University": {
        "id": 'School-242',
        "base64": 'U2Nob29zLTI0Mg==',
        "name": "Clemson University",
        "undergraduate_enrollment": 22875,
        "location": "Clemson, SC"
    },
    "Pepperdine University": {
        "id": 'School-759',
        "base64": 'U2Nob29zLTc1OQ==',
        "name": "Pepperdine University",
        "undergraduate_enrollment": 3629,
        "location": "Malibu, CA"
    },
    "Rutgers University--Newark": {
        "id": 'School-826',
        "base64": 'U2Nob29zLTgyNg==',
        "name": "Rutgers University--Newark",
        "undergraduate_enrollment": 7417,
        "location": "Newark, NJ"
    },
    "University of Illinois--Chicago": {
        "id": 'School-1111',
        "base64": 'U2Nob29zLTExMTE=',
        "name": "University of Illinois--Chicago",
        "undergraduate_enrollment": 22107,
        "location": "Chicago, IL"
    },
    "New Jersey Institute of Technology": {
        "id": 'School-668',
        "base64": 'U2Nob29zLTY2OA==',
        "name": "New Jersey Institute of Technology",
        "undergraduate_enrollment": 4026,
        "location": "Newark, NJ"
    },
    "University of California, Santa Cruz": {
        "id": 'School-1078',
        "base64": 'U2Nob29zLTEwNzg=',
        "name": "University of California, Santa Cruz",
        "undergraduate_enrollment": 19651,
        "location": "Santa Cruz, CA"
    },
    "Drexel University": {
        "id": 'School-1521',
        "base64": 'U2Nob29zLTE1MjE=',
        "name": "Drexel University",
        "undergraduate_enrollment": 12099,
        "location": "Philadelphia, PA"
    },
    "Howard University": {
        "id": 'School-421',
        "base64": 'U2Nob29zLTQyMQ==',
        "name": "Howard University",
        "undergraduate_enrollment": 10190,
        "location": "Washington, DC"
    },
    "Marquette University": {
        "id": 'School-565',
        "base64": 'U2Nob29zLTU2NQ==',
        "name": "Marquette University",
        "undergraduate_enrollment": 7652,
        "location": "Milwaukee, WI"
    },
    "University of Delaware": {
        "id": 'School-1094',
        "base64": 'U2Nob29zLTEwOTQ=',
        "name": "University of Delaware",
        "undergraduate_enrollment": 19119,
        "location": "Newark, DE"
    },
    "Worcester Polytechnic Institute": {
        "id": 'School-1220',
        "base64": 'U2Nob29zLTEyMjA=',
        "name": "Worcester Polytechnic Institute",
        "undergraduate_enrollment": 5453,
        "location": "Worcester, MA"
    },
    "American University": {
        "id": 'School-32',
        "base64": 'U2Nob29zLTMy',
        "name": "American University",
        "undergraduate_enrollment": 7817,
        "location": "Washington, DC"
    },
    "Baylor University": {
        "id": 'School-90',
        "base64": 'U2Nob29zLTkw',
        "name": "Baylor University",
        "undergraduate_enrollment": 15155,
        "location": "Waco, TX"
    },
    "Fordham University": {
        "id": 'School-1325',
        "base64": 'U2Nob29zLTEzMjU=',
        "name": "Fordham University",
        "undergraduate_enrollment": 10307,
        "location": "New York, NY"
    },
    "Loyola Marymount University": {
        "id": 'School-538',
        "base64": 'U2Nob29zLTUzOA==',
        "name": "Loyola Marymount University",
        "undergraduate_enrollment": 7336,
        "location": "Los Angeles, CA"
    },
    "Rochester Institute of Technology": {
        "id": 'School-807',
        "base64": 'U2Nob29zLTgwNw==',
        "name": "Rochester Institute of Technology",
        "undergraduate_enrollment": 14076,
        "location": "Rochester, NY"
    },
    "Southern Methodist University": {
        "id": 'School-927',
        "base64": 'U2Nob29zLTkyNw==',
        "name": "Southern Methodist University",
        "undergraduate_enrollment": 7115,
        "location": "Dallas, TX"
    },
    "University of South Florida": {
        "id": 'School-1262',
        "base64": 'U2Nob29zLTEyNjI=',
        "name": "University of South Florida",
        "undergraduate_enrollment": 37263,
        "location": "Tampa, FL"
    },
    "Florida International University": {
        "id": 'School-18445',
        "base64": 'U2Nob29zLTE4NDQ1',
        "name": "Florida International University",
        "undergraduate_enrollment": 44045,
        "location": "Miami, FL"
    },
    "Gonzaga University": {
        "id": 'School-370',
        "base64": 'U2Nob29zLTM3MA==',
        "name": "Gonzaga University",
        "undergraduate_enrollment": 5163,
        "location": "Spokane, WA"
    },
    "Rutgers University--Camden": {
        "id": 'School-19100',
        "base64": 'U2Nob29zLTE5MTAw',
        "name": "Rutgers University--Camden",
        "undergraduate_enrollment": 3922,
        "location": "Camden, NJ"
    },
    "Temple University": {
        "id": 'School-999',
        "base64": 'U2Nob29zLTk5OQ==',
        "name": "Temple University",
        "undergraduate_enrollment": 21720,
        "location": "Philadelphia, PA"
    },
    "University of Colorado Boulder": {
        "id": 'School-1087',
        "base64": 'U2Nob29zLTEwODc=',
        "name": "University of Colorado Boulder",
        "undergraduate_enrollment": 32100,
        "location": "Boulder, CO"
    },
    "University of Iowa": {
        "id": 'School-1115',
        "base64": 'U2Nob29zLTExMTU=',
        "name": "University of Iowa",
        "undergraduate_enrollment": 22130,
        "location": "Iowa City, IA"
    },
    "Yeshiva University": {
        "id": 'School-1223',
        "base64": 'U2Nob29zLTEyMjM=',
        "name": "Yeshiva University",
        "undergraduate_enrollment": 2319,
        "location": "New York, NY"
    }
}

In [19]:
def fetch_reviews(university_id):
    #now lets try and fetch all reviews for umass professors, just so we can get an idea of how the edge cases work for each professor
    professors = get_university_professors(university_id)
    reviews = []
    from tqdm import tqdm
    for prof in tqdm(professors, desc="Fetching professor reviews"):
        result = get_all_reviews(prof['id'])
        reviews.append(result)
        time.sleep(0.5)

    #and lets now try and flatten this data, and view the size
    data = []
    for review_list in reviews:
        for review in review_list:
            data.append(review)

    df = pd.DataFrame(data)
    print('Length of df:', len(df))

    professors_df = pd.DataFrame(professors)
    return df, professors_df

#check if the professors_index.csv exists
if os.path.exists('RateMyProfAnalysis/T100Reviews/combined/professors_index.csv'):
    combined_professors = pd.read_csv('RateMyProfAnalysis/T100Reviews/combined/professors_index.csv')
else:
    combined_professors = pd.DataFrame()
    combined_professors['id'] = []
    combined_professors['name'] = []
    combined_professors['University'] = []
    combined_professors['University_ID'] = []
    combined_professors['review_count'] = []
    combined_professors['file_location'] = []
    combined_professors['reviews_count'] = []
    combined_professors['department'] = []

if os.path.exists('RateMyProfAnalysis/T100Reviews/combined/universities_metadata.csv'):
    universities_metadata = pd.read_csv('RateMyProfAnalysis/T100Reviews/combined/universities_metadata.csv')
else:
    universities_metadata = pd.DataFrame()
    universities_metadata['name'] = []
    universities_metadata['id'] = []
    universities_metadata['base64'] = []
    universities_metadata['undergraduate_enrollment'] = []
    universities_metadata['location'] = []
    universities_metadata['total_professors'] = []
    universities_metadata['total_reviews'] = []
    universities_metadata['date_range'] = []
    universities_metadata['file_location'] = []

for university in Universities.values():

    import os

    reviews_csv_path = f'RateMyProfAnalysis/T100Reviews/Universities/{university["name"].replace(" ", "_")}/reviews.csv'
    professors_csv_path = f'RateMyProfAnalysis/T100Reviews/Universities/{university["name"].replace(" ", "_")}/professors.csv'

    if os.path.exists(reviews_csv_path) and os.path.exists(professors_csv_path):
        print(f'{university["name"]}Reviews.csv and {university["name"]}Professors.csv already exist. Skipping fetch.')
    else:
        print(f'fetch from {university["name"]}')
        os.makedirs(f'RateMyProfAnalysis/T100Reviews/Universities/{university["name"].replace(" ", "_")}', exist_ok=True)
        if "base64" in university:
            df, professors_df = fetch_reviews(university["base64"])

            professors_df['University'] = university["name"]
            professors_df['University_ID'] = university["id"]

            # Alternative method to count reviews per professor
            reviews_count = {}
            for _, row in df.iterrows():
                pid = row['pid']
                reviews_count[pid] = reviews_count.get(pid, 0) + 1
            
            professors_df['reviews_count'] = professors_df['id'].map(lambda x: reviews_count.get(x, 0))
            professors_df['file_location'] = f'RateMyProfAnalysis/T100Reviews/Universities/{university["name"].replace(" ", "_")}'


            # Parse date string and handle UTC timezone
            df['date'] = df['date'].str.replace(' +0000 UTC', ' UTC')  # Clean up timezone format
            df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M:%S UTC', errors='raise')
            df['class'] = df['class'].str.upper().str.strip()
            # Convert ratings to float
            rating_cols = ['qualityRating', 'difficultyRating', 'clarityRating', 'helpfulRating']
            df[rating_cols] = df[rating_cols].astype(float)

            # Create date ranges for each professor by grouping reviews by professor ID
            date_ranges = df.groupby('pid')['date'].agg(['min', 'max'])
            # Map the date ranges to professors using their IDs
            professors_df['date_range'] = professors_df['id'].map(lambda x: (date_ranges.loc[x, 'min'], date_ranges.loc[x, 'max']) if x in date_ranges.index else (None, None))

            df.to_csv(f'RateMyProfAnalysis/T100Reviews/Universities/{university["name"].replace(" ", "_")}/reviews.csv', index=False)
            professors_df.to_csv(f'RateMyProfAnalysis/T100Reviews/Universities/{university["name"].replace(" ", "_")}/professors.csv', index=False)
            combined_professors = pd.concat([combined_professors, professors_df], ignore_index=True)
            combined_professors.to_csv('RateMyProfAnalysis/T100Reviews/combined/professors_index.csv', index=False)
            print(f'saved to {university["name"]}Reviews.csv and {university["name"]}-Professors.csv')

            date_range = (df['date'].min(), df['date'].max())
            if university["name"] in universities_metadata['name'].values:
                print(f'{university["name"]} already exists in universities_metadata. Skipping fetch.')
                continue
            else:
                print(f'{university["name"]} does not exist in universities_metadata. Fetching reviews.')
                new_entry = {
                    'name': university["name"], 
                    'id': university["id"], 
                    'base64': university["base64"], 
                    'undergraduate_enrollment': university["undergraduate_enrollment"],
                    'location': university["location"],
                    'total_professors': len(professors_df),
                    'total_reviews': len(df),
                    'date_range': date_range,
                    'file_location': f'RateMyProfAnalysis/T100Reviews/Universities/{university["name"].replace(" ", "_")}'
                }
                universities_metadata = pd.concat([universities_metadata, pd.DataFrame([new_entry])], ignore_index=True)
                universities_metadata.to_csv('RateMyProfAnalysis/T100Reviews/combined/universities_metadata.csv', index=False)

        else:
            print(f'{university["name"]} does not have a base64 value')



Princeton UniversityReviews.csv and Princeton UniversityProfessors.csv already exist. Skipping fetch.
Massachusetts Institute of TechnologyReviews.csv and Massachusetts Institute of TechnologyProfessors.csv already exist. Skipping fetch.
Harvard UniversityReviews.csv and Harvard UniversityProfessors.csv already exist. Skipping fetch.
Stanford UniversityReviews.csv and Stanford UniversityProfessors.csv already exist. Skipping fetch.
Yale UniversityReviews.csv and Yale UniversityProfessors.csv already exist. Skipping fetch.
California Institute of TechnologyReviews.csv and California Institute of TechnologyProfessors.csv already exist. Skipping fetch.
Duke UniversityReviews.csv and Duke UniversityProfessors.csv already exist. Skipping fetch.
Johns Hopkins UniversityReviews.csv and Johns Hopkins UniversityProfessors.csv already exist. Skipping fetch.
Northwestern UniversityReviews.csv and Northwestern UniversityProfessors.csv already exist. Skipping fetch.
University of PennsylvaniaReview

Fetching professor reviews: 100%|████████████████████████████████████████████████████| 972/972 [10:58<00:00,  1.48it/s]


Length of df: 4267
saved to Columbia UniversityReviews.csv and Columbia University-Professors.csv
Columbia University does not exist in universities_metadata. Fetching reviews.
fetch from Dartmouth College
Page data: {'data': {'newSearch': {'teachers': {'edges': [{'node': {'department': 'Writing', 'firstName': 'Karen', 'id': 'VGVhY2hlci0xMTAwMzU5', 'lastName': 'Gocsik'}}, {'node': {'department': 'Computer Science', 'firstName': 'Prasad', 'id': 'VGVhY2hlci0yNjE0ODU=', 'lastName': 'Jayanti'}}, {'node': {'department': 'Economics', 'firstName': 'Meir', 'id': 'VGVhY2hlci03NDE5MTM=', 'lastName': 'Kohn'}}, {'node': {'department': 'Anthropology', 'firstName': 'Deborah', 'id': 'VGVhY2hlci0yODI0MTk=', 'lastName': 'Nichols'}}, {'node': {'department': 'Psychology', 'firstName': 'Yale', 'id': 'VGVhY2hlci0zMTIyMTU=', 'lastName': 'Cohen'}}, {'node': {'department': 'Humanities', 'firstName': 'Don', 'id': 'VGVhY2hlci0yNjQyMjg=', 'lastName': 'Pease'}}, {'node': {'department': 'Computer Science', 'firstN

Fetching professor reviews: 100%|████████████████████████████████████████████████████| 235/235 [02:37<00:00,  1.49it/s]


Length of df: 462
saved to Dartmouth CollegeReviews.csv and Dartmouth College-Professors.csv
Dartmouth College does not exist in universities_metadata. Fetching reviews.
fetch from University of California--Los Angeles
Page data: {'data': {'newSearch': {'teachers': {'edges': [{'node': {'department': 'Economics', 'firstName': 'Stephen', 'id': 'VGVhY2hlci0xNTc2MTAz', 'lastName': 'Ross'}}, {'node': {'department': 'Dental', 'firstName': 'Jay', 'id': 'VGVhY2hlci0yMjI0MDA0', 'lastName': 'Grossman'}}, {'node': {'department': 'Mathematics', 'firstName': 'Esmaail', 'id': 'VGVhY2hlci0yMjY1OTg3', 'lastName': 'Nikjeh'}}, {'node': {'department': 'Law', 'firstName': 'Brennan', 'id': 'VGVhY2hlci0yODk0NDg1', 'lastName': 'Patrick'}}, {'node': {'department': 'Sociology', 'firstName': 'Terri', 'id': 'VGVhY2hlci02NzM1NjA=', 'lastName': 'Anderson'}}, {'node': {'department': 'Physics', 'firstName': 'Jun', 'id': 'VGVhY2hlci0zNDMyOTg=', 'lastName': 'Park'}}, {'node': {'department': 'Accounting', 'firstName': 

Fetching professor reviews: 100%|██████████████████████████████████████████████████| 1432/1432 [16:11<00:00,  1.47it/s]


Length of df: 5151
saved to University of California--Los AngelesReviews.csv and University of California--Los Angeles-Professors.csv
University of California--Los Angeles does not exist in universities_metadata. Fetching reviews.
fetch from University of California, Berkeley
Page data: {'data': {'newSearch': {'teachers': {'edges': [{'node': {'department': 'Physical Ed', 'firstName': 'Toni', 'id': 'VGVhY2hlci0zMTI2MTU=', 'lastName': 'Mar'}}, {'node': {'department': 'Statistics', 'firstName': 'Howard', 'id': 'VGVhY2hlci04ODQ5MTU=', 'lastName': "D'Abrera"}}, {'node': {'department': 'Astronomy', 'firstName': 'Alex', 'id': 'VGVhY2hlci0xNjk4NA==', 'lastName': 'Filippenko'}}, {'node': {'department': 'Mathematics', 'firstName': 'Zvezdelina', 'id': 'VGVhY2hlci0zNzUyNjk=', 'lastName': 'Stankova'}}, {'node': {'department': 'Mathematics', 'firstName': 'Alexander', 'id': 'VGVhY2hlci0yMzM1NDc5', 'lastName': 'Paulin'}}, {'node': {'department': 'Biology', 'firstName': 'David E.', 'id': 'VGVhY2hlci0xM

Fetching professor reviews: 100%|██████████████████████████████████████████████████| 4246/4246 [49:04<00:00,  1.44it/s]


Length of df: 52700
saved to University of California, BerkeleyReviews.csv and University of California, Berkeley-Professors.csv
University of California, Berkeley does not exist in universities_metadata. Fetching reviews.
fetch from Rice University
Page data: {'data': {'newSearch': {'teachers': {'edges': [{'node': {'department': 'Mathematics', 'firstName': 'Zhiyong', 'id': 'VGVhY2hlci0xNDE0MTI=', 'lastName': 'Gao'}}, {'node': {'department': 'Science', 'firstName': 'J', 'id': 'VGVhY2hlci0xODIwMDE=', 'lastName': 'Tour'}}, {'node': {'department': 'Sociology', 'firstName': 'Craig', 'id': 'VGVhY2hlci0yMTExNTEz', 'lastName': 'Considine'}}, {'node': {'department': 'Psychology', 'firstName': 'Mikki', 'id': 'VGVhY2hlci0xMzYyNzM=', 'lastName': 'Hebl'}}, {'node': {'department': 'Biology', 'firstName': 'Mike', 'id': 'VGVhY2hlci00NzY3NzI=', 'lastName': 'Gustin'}}, {'node': {'department': 'Mathematics', 'firstName': 'Frank', 'id': 'VGVhY2hlci0yMjM5NTA=', 'lastName': 'Jones'}}, {'node': {'department

Fetching professor reviews: 100%|████████████████████████████████████████████████████| 275/275 [03:05<00:00,  1.48it/s]


Length of df: 748
saved to Rice UniversityReviews.csv and Rice University-Professors.csv
Rice University does not exist in universities_metadata. Fetching reviews.
fetch from University of Notre Dame
Page data: {'data': {'newSearch': {'teachers': {'edges': [{'node': {'department': 'Economics', 'firstName': 'Forrest', 'id': 'VGVhY2hlci0yMjAzNjY3', 'lastName': 'Spence'}}, {'node': {'department': 'Chemistry', 'firstName': 'Slavi', 'id': 'VGVhY2hlci0yNjQwMjUz', 'lastName': 'Sevov'}}, {'node': {'department': 'Chemistry', 'firstName': 'Seth', 'id': 'VGVhY2hlci03MjQyMTk=', 'lastName': 'Brown'}}, {'node': {'department': 'Chemistry', 'firstName': 'Steven', 'id': 'VGVhY2hlci0yMzE1ODk0', 'lastName': 'Wietstock'}}, {'node': {'department': 'Mathematics', 'firstName': 'Arthur', 'id': 'VGVhY2hlci0yMTIxNDgx', 'lastName': 'Lim'}}, {'node': {'department': 'Theology', 'firstName': 'Kevin', 'id': 'VGVhY2hlci0yNTEyNzg0', 'lastName': 'Grove'}}, {'node': {'department': 'Psychology', 'firstName': 'Anre', 'id'

Fetching professor reviews: 100%|██████████████████████████████████████████████████| 1221/1221 [13:49<00:00,  1.47it/s]


Length of df: 4468
saved to University of Notre DameReviews.csv and University of Notre Dame-Professors.csv
University of Notre Dame does not exist in universities_metadata. Fetching reviews.
fetch from Vanderbilt University
Page data: {'data': {'newSearch': {'teachers': {'edges': [{'node': {'department': 'Economics', 'firstName': 'Stephen', 'id': 'VGVhY2hlci0xMjY3Mjg=', 'lastName': 'Buckles'}}, {'node': {'department': 'Psychology', 'firstName': 'Leslie', 'id': 'VGVhY2hlci0xNDc1MDE=', 'lastName': 'Smith'}}, {'node': {'department': 'Mathematics', 'firstName': 'Pamela', 'id': 'VGVhY2hlci0xNTE1Mjk=', 'lastName': 'Pigg'}}, {'node': {'department': 'Chemistry', 'firstName': 'Shawn', 'id': 'VGVhY2hlci04NTc2NjA=', 'lastName': 'Phillips'}}, {'node': {'department': 'Psychology', 'firstName': 'Elisabeth', 'id': 'VGVhY2hlci01NDM5MDg=', 'lastName': 'Sandberg'}}, {'node': {'department': 'Chemistry', 'firstName': 'Tara', 'id': 'VGVhY2hlci01MTI0MDM=', 'lastName': 'Todd'}}, {'node': {'department': 'Com

Fetching professor reviews: 100%|██████████████████████████████████████████████████| 2329/2329 [27:02<00:00,  1.44it/s]


Length of df: 31508
saved to Vanderbilt UniversityReviews.csv and Vanderbilt University-Professors.csv
Vanderbilt University does not exist in universities_metadata. Fetching reviews.
fetch from Carnegie Mellon University
Page data: {'data': {'newSearch': {'teachers': {'edges': [{'node': {'department': 'Mathematics', 'firstName': 'Tim', 'id': 'VGVhY2hlci00NDIwMw==', 'lastName': 'Flaherty'}}, {'node': {'department': 'Computer Science', 'firstName': 'Iliano', 'id': 'VGVhY2hlci0yMjIwMDU0', 'lastName': 'Cervesato'}}, {'node': {'department': 'Computer Science', 'firstName': 'David', 'id': 'VGVhY2hlci0xMTc5NjI5', 'lastName': 'Kosbie'}}, {'node': {'department': 'Statistics', 'firstName': 'Gordon', 'id': 'VGVhY2hlci0xMzMzNDgy', 'lastName': 'Weinberg'}}, {'node': {'department': 'Mathematics', 'firstName': 'Gregory', 'id': 'VGVhY2hlci0xMzQyNzI1', 'lastName': 'Johnson'}}, {'node': {'department': 'Mathematics', 'firstName': 'Irina', 'id': 'VGVhY2hlci0xMDgyNDk1', 'lastName': 'Gheorghiciuc'}}, {'nod

Fetching professor reviews: 100%|████████████████████████████████████████████████████| 969/969 [11:01<00:00,  1.47it/s]


Length of df: 4810
saved to Carnegie Mellon UniversityReviews.csv and Carnegie Mellon University-Professors.csv
Carnegie Mellon University does not exist in universities_metadata. Fetching reviews.
fetch from University of Michigan--Ann Arbor
Page data: {'data': {'newSearch': {'teachers': {'edges': [{'node': {'department': 'Information Science', 'firstName': 'Charles', 'id': 'VGVhY2hlci0xMTU5Mjgw', 'lastName': 'Severance'}}, {'node': {'department': 'Chemistry', 'firstName': 'Kathleen', 'id': 'VGVhY2hlci0yMjQ0ODQ=', 'lastName': 'Nolta'}}, {'node': {'department': 'American Studies', 'firstName': 'Bruce', 'id': 'VGVhY2hlci02MDIyMTg=', 'lastName': 'Conforth'}}, {'node': {'department': 'Psychology', 'firstName': 'Shelly', 'id': 'VGVhY2hlci0yMjQ0NDU=', 'lastName': 'Schreier'}}, {'node': {'department': 'Psychology', 'firstName': 'Brian', 'id': 'VGVhY2hlci0yODc5ODE=', 'lastName': 'Malley'}}, {'node': {'department': 'Statistics', 'firstName': 'Brenda', 'id': 'VGVhY2hlci0yMDQ3MzI=', 'lastName': 

Fetching professor reviews: 100%|████████████████████████████████████████████████| 5530/5530 [1:03:59<00:00,  1.44it/s]


Length of df: 67498
saved to University of Michigan--Ann ArborReviews.csv and University of Michigan--Ann Arbor-Professors.csv
University of Michigan--Ann Arbor does not exist in universities_metadata. Fetching reviews.
fetch from Washington University in St. Louis
Page data: {'data': {'newSearch': {'teachers': {'edges': [{'node': {'department': 'Economics', 'firstName': 'Sudeshna', 'id': 'VGVhY2hlci0xNjc2MDU=', 'lastName': 'Bandyopadhyay'}}, {'node': {'department': 'Chemistry', 'firstName': 'Richard', 'id': 'VGVhY2hlci00OTcwMDM=', 'lastName': 'Loomis'}}, {'node': {'department': 'Mathematics', 'firstName': 'Karl', 'id': 'VGVhY2hlci0yNjM3NTAz', 'lastName': 'Schaefer'}}, {'node': {'department': 'Economics', 'firstName': 'Dottie', 'id': 'VGVhY2hlci0yNjA1ODk=', 'lastName': 'Petersen'}}, {'node': {'department': 'Mathematics', 'firstName': 'Jack', 'id': 'VGVhY2hlci00NTg2NjU=', 'lastName': 'Shapiro'}}, {'node': {'department': 'Chemistry', 'firstName': 'Bryn', 'id': 'VGVhY2hlci0yNjQ2MjUw', 'la

Fetching professor reviews: 100%|██████████████████████████████████████████████████| 1884/1884 [21:38<00:00,  1.45it/s]


Length of df: 15982
saved to Washington University in St. LouisReviews.csv and Washington University in St. Louis-Professors.csv
Washington University in St. Louis does not exist in universities_metadata. Fetching reviews.
fetch from Emory University
Page data: {'data': {'newSearch': {'teachers': {'edges': [{'node': {'department': 'Psychology', 'firstName': 'Andrew', 'id': 'VGVhY2hlci0xOTc2NDQ4', 'lastName': 'Kazama'}}, {'node': {'department': 'Chemistry', 'firstName': 'Douglas', 'id': 'VGVhY2hlci05NTE2NDQ=', 'lastName': 'Mulford'}}, {'node': {'department': 'Psychology', 'firstName': 'David', 'id': 'VGVhY2hlci01NTA4NQ==', 'lastName': 'Edwards'}}, {'node': {'department': 'Chemistry', 'firstName': 'Matthew', 'id': 'VGVhY2hlci03NDM2NA==', 'lastName': 'Weinschenk'}}, {'node': {'department': 'Business Administration', 'firstName': 'Reshma', 'id': 'VGVhY2hlci0yNTg4ODg0', 'lastName': 'Shah'}}, {'node': {'department': 'Chemistry', 'firstName': 'Tracy', 'id': 'VGVhY2hlci0yNTE4MzA=', 'lastName':

Fetching professor reviews: 100%|██████████████████████████████████████████████████| 1976/1976 [22:48<00:00,  1.44it/s]


Length of df: 20242
saved to Emory UniversityReviews.csv and Emory University-Professors.csv
Emory University does not exist in universities_metadata. Fetching reviews.
fetch from Georgetown University
Page data: {'data': {'newSearch': {'teachers': {'edges': [{'node': {'department': 'Economics', 'firstName': 'Carol', 'id': 'VGVhY2hlci0zNjU5NDA=', 'lastName': 'Rogers'}}, {'node': {'department': 'Economics', 'firstName': 'Arik', 'id': 'VGVhY2hlci0yMTIyOTA=', 'lastName': 'Levinson'}}, {'node': {'department': 'Mathematics', 'firstName': 'Oded', 'id': 'VGVhY2hlci0xNTEzMDI5', 'lastName': 'Meyer'}}, {'node': {'department': 'Government', 'firstName': 'Eric', 'id': 'VGVhY2hlci0xMzMwNDc=', 'lastName': 'Langenbacher'}}, {'node': {'department': 'Economics', 'firstName': 'Marius', 'id': 'VGVhY2hlci00NDM0NA==', 'lastName': 'Schwartz'}}, {'node': {'department': 'Economics', 'firstName': 'David', 'id': 'VGVhY2hlci0yNjI3MDIw', 'lastName': 'Burk'}}, {'node': {'department': 'Theology', 'firstName': 'Lauv

Fetching professor reviews: 100%|██████████████████████████████████████████████████| 2721/2721 [31:38<00:00,  1.43it/s]


Length of df: 33528
saved to Georgetown UniversityReviews.csv and Georgetown University-Professors.csv
Georgetown University does not exist in universities_metadata. Fetching reviews.
fetch from University of Virginia
Page data: {'data': {'newSearch': {'teachers': {'edges': [{'node': {'department': 'Economics', 'firstName': 'Kenneth', 'id': 'VGVhY2hlci0yNzMw', 'lastName': 'Elzinga'}}, {'node': {'department': 'Biology', 'firstName': 'David', 'id': 'VGVhY2hlci04NjA5Njg=', 'lastName': 'Kittlesen'}}, {'node': {'department': 'Economics', 'firstName': 'Lee', 'id': 'VGVhY2hlci0zODM3MTc=', 'lastName': 'Coppock'}}, {'node': {'department': 'Science', 'firstName': 'Dave', 'id': 'VGVhY2hlci0yMDc0OQ==', 'lastName': 'Metcalf'}}, {'node': {'department': 'Chemistry', 'firstName': 'Dean', 'id': 'VGVhY2hlci02MDA0', 'lastName': 'Harman'}}, {'node': {'department': 'Astronomy', 'firstName': 'Charlie', 'id': 'VGVhY2hlci05MzE5MA==', 'lastName': 'Tolbert'}}, {'node': {'department': 'Engineering', 'firstName':

Fetching professor reviews: 100%|██████████████████████████████████████████████████| 2098/2098 [24:09<00:00,  1.45it/s]


Length of df: 17227
saved to University of VirginiaReviews.csv and University of Virginia-Professors.csv
University of Virginia does not exist in universities_metadata. Fetching reviews.
fetch from University of North Carolina--Chapel Hill
Page data: {'data': {'newSearch': {'teachers': {'edges': [{'node': {'department': 'Computer Science', 'firstName': 'Ketan', 'id': 'VGVhY2hlci0xNTkyMDIz', 'lastName': 'Mayer-Patel'}}, {'node': {'department': 'Economics', 'firstName': 'Kalina ', 'id': 'VGVhY2hlci0yMDQ2ODUx', 'lastName': 'Staub'}}, {'node': {'department': 'Computer Science', 'firstName': 'Kristopher', 'id': 'VGVhY2hlci0yMDU3Njc0', 'lastName': 'Jordan'}}, {'node': {'department': 'Mathematics', 'firstName': 'Elizabeth', 'id': 'VGVhY2hlci00ODg5NzU=', 'lastName': 'McLaughlin'}}, {'node': {'department': 'Biology', 'firstName': 'Alaina', 'id': 'VGVhY2hlci0yMjgyNTY5', 'lastName': 'Garland'}}, {'node': {'department': 'Economics', 'firstName': 'Rita', 'id': 'VGVhY2hlci04ODY1ODA=', 'lastName': 'B

Fetching professor reviews: 100%|██████████████████████████████████████████████████| 4382/4382 [50:38<00:00,  1.44it/s]


Length of df: 56251
saved to University of North Carolina--Chapel HillReviews.csv and University of North Carolina--Chapel Hill-Professors.csv
University of North Carolina--Chapel Hill does not exist in universities_metadata. Fetching reviews.
fetch from University of Southern California
Page data: {'data': {'newSearch': {'teachers': {'edges': [{'node': {'department': 'Computer Science', 'firstName': 'Marco', 'id': 'VGVhY2hlci0xMTA0Nzgy', 'lastName': 'Papa'}}, {'node': {'department': 'Chemistry', 'firstName': 'Catherine', 'id': 'VGVhY2hlci0xNzM4NjU5', 'lastName': 'Skibo'}}, {'node': {'department': 'Religion', 'firstName': 'Bruce', 'id': 'VGVhY2hlci02NjYwMjc=', 'lastName': 'Zuckerman'}}, {'node': {'department': 'Engineering', 'firstName': 'Mark', 'id': 'VGVhY2hlci04NjA4ODg=', 'lastName': 'Redekopp'}}, {'node': {'department': 'Film', 'firstName': 'Drew', 'id': 'VGVhY2hlci00OTkz', 'lastName': 'Casper'}}, {'node': {'department': 'Chemistry', 'firstName': 'Thomas', 'id': 'VGVhY2hlci0xNDM2MT

Fetching professor reviews: 100%|████████████████████████████████████████████████| 5134/5134 [9:19:54<00:00,  6.54s/it]


Length of df: 61036
saved to University of Southern CaliforniaReviews.csv and University of Southern California-Professors.csv
University of Southern California does not exist in universities_metadata. Fetching reviews.
fetch from University of California, San Diego
Page data: {'data': {'newSearch': {'teachers': {'edges': [{'node': {'department': 'Chemistry', 'firstName': 'Carl', 'id': 'VGVhY2hlci03ODU1', 'lastName': 'Hoeger'}}, {'node': {'department': 'Mathematics', 'firstName': 'John', 'id': 'VGVhY2hlci0xMDU3ODQ=', 'lastName': 'Eggers'}}, {'node': {'department': 'Chemistry', 'firstName': 'Bob', 'id': 'VGVhY2hlci0zODc4ODk=', 'lastName': 'Ternansky'}}, {'node': {'department': 'Economics', 'firstName': 'Steve', 'id': 'VGVhY2hlci0xNzM5MDI3', 'lastName': 'Levkoff'}}, {'node': {'department': 'Chemistry', 'firstName': 'Christina A.', 'id': 'VGVhY2hlci00ODM2OTI=', 'lastName': 'Johnson'}}, {'node': {'department': 'Chemistry', 'firstName': 'Stacey', 'id': 'VGVhY2hlci0xMTcxMjI4', 'lastName': 'B

Fetching professor reviews:   1%|▍                                                   | 33/3608 [00:30<54:15,  1.10it/s]


KeyboardInterrupt: 

In [None]:
#TODO: drop reviews_count from professors_index.csv and each professors.csv
