In [1]:
import requests
import csv

In [2]:
class Scraper:
    def __init__(self):
        self.file_name = "score_raw"
        self.cities = {1:"Hà Nội", 2:"Hồ Chí Minh", 3:"Hải Phòng", 4:"Đà Nẵng", 5:"Hà Giang", 6:"Cao Bằng", 7:"Lai Châu", 8:"Lào Cai", 9:"Tuyên Quang", 10:"Lạng Sơn", 11:"Bắc Kạn", 12:"Thái Nguyên", 13:"Yên Bái", 14:"Sơn La", 15:"Phú Thọ", 16:"Vĩnh Phúc", 17:"Quảng Ninh", 18:"Bắc Giang", 19:"Bắc Ninh", 21:"Hải Dương", 22:"Hưng Yên", 23:"Hòa Bình", 24:"Hà Nam", 25:"Nam Định", 26:"Thái Bình", 27:"Ninh Bình", 28:"Thanh Hóa", 29:"Nghệ An", 30:"Hà Tĩnh", 31:"Quảng Bình", 32:"Quảng Trị", 33:"Thừa Thiên - Huế", 34:"Quảng Nam", 35:"Quảng Ngãi", 36:"Kon Tum", 37:"Bình Định", 38:"Gia Lai", 39:"Phú Yên", 40:"Đắk Lắk", 41:"Khánh Hòa", 42:"Lâm Đồng", 43:"Bình Phước", 44:"Bình Dương", 45:"Ninh Thuận", 46:"Tây Ninh", 47:"Bình Thuận", 48:"Đồng Nai", 49:"Long An", 50:"Đồng Tháp", 51:"An Giang", 52:"Bà Rịa", 53:"Tiền Giang", 54:"Kiên Giang", 55:"Cần Thơ", 56:"Bến Tre", 57:"Vĩnh Long", 58:"Trà Vinh", 59:"Sóc Trăng", 60:"Bạc Liêu", 61:"Cà Mau", 62:"Điện Biên", 63:"Đăk Nông", 64:"Hậu Giang"}
        self.url = 'https://api.giaoducthoidai.vn/api/diem-thi'

    def access_page(self, keyword):
        params = {
            'type': '0',
            'keyword': keyword,
            'kythi': 'THPT',
            'nam': '2023',
            'cumthi': '0',
        }
        response = requests.get(self.url, params=params)
        if response.ok:
            json_score = response.json()
            return json_score['data']['results']
        return None

    def write_to_csv(self, output_data):
        with open(f"{self.file_name}.csv", 'a', encoding='utf-8', newline='') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=output_data[0].keys())
            if csvfile.tell() == 0:
                writer.writeheader()
            writer.writerows(output_data)

    def run(self):
        for edu_id, city_name in self.cities.items():
            print(f"Working on city = {city_name}.", end = " ")
            student_order = 0
            while True:
                # Access page
                keyword = f"{edu_id:02d}{student_order:04d}"
                json_score = self.access_page(keyword)
                if json_score is None:
                    print(f"Error at keyword {keyword}")
                    continue
                if json_score == []:
                    print(f"Stopped at keyword {keyword}")
                    break

                # Write score to csv
                self.write_to_csv(json_score)

                student_order += 1

In [3]:
score_scraper = Scraper()
score_scraper.run()

Working on city = Hà Nội. 

Stopped at keyword 011021
Working on city = Hồ Chí Minh. Stopped at keyword 020849
Working on city = Hải Phòng. Stopped at keyword 030228
Working on city = Đà Nẵng. Stopped at keyword 040132
Working on city = Hà Giang. Stopped at keyword 050063
Working on city = Cao Bằng. Stopped at keyword 060051
Working on city = Lai Châu. Stopped at keyword 070039
Working on city = Lào Cai. Stopped at keyword 080079
Working on city = Tuyên Quang. Stopped at keyword 090084
Working on city = Lạng Sơn. Stopped at keyword 100094
Working on city = Bắc Kạn. Stopped at keyword 110030
Working on city = Thái Nguyên. Stopped at keyword 120162
Working on city = Yên Bái. Stopped at keyword 130083
Working on city = Sơn La. Stopped at keyword 140118
Working on city = Phú Thọ. Stopped at keyword 150159
Working on city = Vĩnh Phúc. Stopped at keyword 160141
Working on city = Quảng Ninh. Stopped at keyword 170161
Working on city = Bắc Giang. Stopped at keyword 180211
Working on city = Bắc Ninh. Stopped at keyword 19