In [97]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [98]:
SUBJECTS = {
    "matematica": "mathematics",
    "informatică": "computer science",
    "biologie": "biology",
    "chimie": "chemistry",
    "fizică": "physics",
    "geografie": "geography",
    "istorie": "history",
    "limba română": "Romanian language",
    "limba engleză": "English language",
    "limba franceză": "French language",
    "limba germană": "German language",
    "limba spaniolă": "Spanish language",
    "economie": "economics",
    "filosofie": "philosophy",
    "psihologie": "psychology",
    "sociologie": "sociology",
    "educație civică": "civic education",
    "științe politice": "political science",
    "arte vizuale": "visual arts",
    "muzică": "music",
    "educație fizică": "physical education",
    "tehnologia informației": "information technology",
    "religie": "religion",
    "management": "management",
    "contabilitate": "accounting"
}

In [99]:

URL_TEMPLATE_PROFESSORS = 'https://www.meetnlearn.ro/api/v2/teachers/search/results?lang=ro&page=3&query={}&location=&online=0&identityVerification=0&freeTrialLesson=0&fastResponse=0&minimumRating=0'
URL_TEMPLATE_STUDENTS = 'https://www.meetnlearn.ro/solicitare-de-meditatii?q={}'

In [100]:
def get_professor_count(url : str):
    response = requests.request('GET', url)
    data = response.json()
    return data['data']['totalTeachersCount']

def get_students_count(url : str):
    response = requests.request('GET', url)
    soup = BeautifulSoup(response.text)
    col = soup.select_one('body > div.container > div.d-lg-flex.gap-3.align-items-start.mb-5.pb-3 > div.col')
    cnt = len([x for x in col.findChildren('div', recursive = False) if 'bg-white' in x['class']])
    return cnt

In [101]:
def scrape_subject(subject : str):
    
    url_professors = URL_TEMPLATE_PROFESSORS.format(subject)
    url_students = URL_TEMPLATE_STUDENTS.format(subject)
    
    professors_count = get_professor_count(url_professors)
    student_count = get_students_count(url_students)
    
    print(f'Student count for {subject} is {student_count}')
    print(f'Professor count for {subject} is {professors_count}')
    
    return professors_count, student_count

In [102]:
results = []
for subject_ro, subject_eng in SUBJECTS.items():
    pc, sc = scrape_subject(subject_ro)
    results.append((subject_ro, subject_eng, pc, sc))

Student count for matematica is 12
Professor count for matematica is 562
Student count for informatică is 4
Professor count for informatică is 102
Student count for biologie is 1
Professor count for biologie is 186


KeyboardInterrupt: 

In [95]:
results = pd.DataFrame(results)
results.columns = ['subject_ro', 'subject_eng', 'professor_count', 'student_count']

In [96]:
results.to_csv('results.csv', index = False)