In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [2]:
SUBJECTS = {
    "matematica": "Mathematics",
    "informatică": "Computer science",
    "biologie": "Biology",
    "chimie": "Chemistry",
    "fizică": "Physics",
    "geografie": "Geography",
    "istorie": "History",
    "limba română": "Romanian language",
    "limba engleză": "English language",
    "limba franceză": "French language",
    "limba germană": "German language",
    "limba spaniolă": "Spanish language",
    "economie": "Economics",
    "filosofie": "Philosophy",
    "psihologie": "Psychology",
    "sociologie": "Sociology",
    "educație civică": "Civic education",
    "științe politice": "Political science",
    "arte vizuale": "Visual arts",
    "muzică": "Music",
    "educație fizică": "Physical education",
    "tehnologia informației": "Information technology",
    "religie": "Religion",
    "management": "Management",
    "contabilitate": "Accounting"
}

In [3]:

URL_TEMPLATE_PROFESSORS = 'https://www.meetnlearn.ro/api/v2/teachers/search/results?lang=ro&page=3&query={}&location=&online=0&identityVerification=0&freeTrialLesson=0&fastResponse=0&minimumRating=0'
URL_TEMPLATE_STUDENTS = 'https://www.meetnlearn.ro/solicitare-de-meditatii?q={}'

In [4]:
def get_professor_count(url : str):
    response = requests.request('GET', url)
    data = response.json()
    return data['data']['totalTeachersCount']

def get_students_count(url : str):
    response = requests.request('GET', url)
    soup = BeautifulSoup(response.text)
    col = soup.select_one('body > div.container > div.d-lg-flex.gap-3.align-items-start.mb-5.pb-3 > div.col')
    cnt = len([x for x in col.findChildren('div', recursive = False) if 'bg-white' in x['class']])
    return cnt

In [5]:
def scrape_subject(subject : str):
    
    url_professors = URL_TEMPLATE_PROFESSORS.format(subject)
    url_students = URL_TEMPLATE_STUDENTS.format(subject)
    
    professors_count = get_professor_count(url_professors)
    student_count = get_students_count(url_students)
    
    print(f'Student count for {subject} is {student_count}')
    print(f'Professor count for {subject} is {professors_count}')
    
    return professors_count, student_count

In [6]:
results = []
for subject_ro, subject_eng in SUBJECTS.items():
    pc, sc = scrape_subject(subject_ro)
    results.append((subject_ro, subject_eng, pc, sc))

Student count for matematica is 12
Professor count for matematica is 562
Student count for informatică is 4
Professor count for informatică is 102
Student count for biologie is 1
Professor count for biologie is 186
Student count for chimie is 3
Professor count for chimie is 117
Student count for fizică is 2
Professor count for fizică is 82
Student count for geografie is 0
Professor count for geografie is 72
Student count for istorie is 2
Professor count for istorie is 148
Student count for limba română is 3
Professor count for limba română is 431
Student count for limba engleză is 2
Professor count for limba engleză is 532
Student count for limba franceză is 2
Professor count for limba franceză is 129
Student count for limba germană is 3
Professor count for limba germană is 95
Student count for limba spaniolă is 1
Professor count for limba spaniolă is 7
Student count for economie is 0
Professor count for economie is 23
Student count for filosofie is 0
Professor count for filosofie is 5

In [7]:
results = pd.DataFrame(results)
results.columns = ['subject_ro', 'subject_eng', 'professor_count', 'student_count']

In [8]:
results.to_csv('results.csv', index = False)