# 1. Crawl data

In [11]:
import requests
import csv

In [12]:
api_hotel = "https://mixivivu.com/api/hotels/get-list?size=5&page="
page = 1

In [13]:
with open("../data/hotel.csv", "w", encoding="utf-8", newline="") as hotel_file,\
     open("../data/hotel_rooms.csv", "w", encoding="utf-8", newline="") as room_file,\
     open("../data/hotel_description.csv", "w", encoding="utf-8", newline="") as long_description_file:

    hotel_writer = csv.writer(hotel_file)
    room_writer = csv.writer(room_file)
    long_description_writer = csv.writer(long_description_file)

    hotel_writer.writerow(["hotel_id", "hotel_name", "total_rooms", "admin", "hotel_price", "city", "address", "map_link", "hotel_features", "short_description", "long_description"])
    room_writer.writerow(["room_id", "hotel_id", "room_name", "room_price", "room_features", "size", "max_persons", "bed_type", "view"])
    long_description_writer.writerow(["hotel_id", "block_id", "type", "data"])

    hotel_id = 1
    room_id = 1
    while True:
        try:
            response = requests.get(api_hotel + str(page))
            response.raise_for_status()

            data = response.json()
            hotels = data.get("result", {}).get("data", [])

            if not hotels:
                print(f"No more data on page {page}. Stopping...")
                break

            for hotel in hotels:
                hotel_name = hotel.get("title")
                total_rooms = hotel.get("spec", {}).get("hotel", {}).get("totalRooms")
                admin = hotel.get("spec", {}).get("hotel", {}).get("admin")
                hotel_price = hotel.get("defaultPrice")
                city = hotel.get("city", {}).get("name")
                address = hotel.get("address")
                map_link = hotel.get("mapLink")

                short_description_list = hotel.get("shortDescription", [])
                short_description = " ".join(short_description_list)

                features_list = hotel.get("features", [])
                hotel_features = ", ".join(feature.get("text", "") for feature in features_list)

                long_description = hotel.get("longDescription", {}).get("blocks")
                long_description_text = ""

                block_id = 1
                for block in long_description:
                    block_type = block["type"]
                    if block_type == "paragraph":
                        data = block["data"]["text"]
                        long_description_text += data + " "
                    elif block_type == "image":
                        data = block["data"]["file"]["url"]
                    else:
                        continue

                    long_description_writer.writerow([hotel_id, block_id, block_type, data])
                    block_id += 1

                rooms = hotel.get("rooms")

                hotel_writer.writerow([hotel_id, hotel_name, total_rooms, admin, hotel_price, city, address, map_link, hotel_features, short_description.strip(), long_description_text.strip()])

                for room in rooms:
                    room_name = room.get("title")
                    room_price = room.get("price")
                    size = room.get("size")
                    max_persons = room.get("maxPersons")

                    room_features_list = room.get("features", [])
                    room_features = ", ".join(feature.get("text", "") for feature in room_features_list)

                    bed_type = room.get("bedType")
                    view = room.get("view")

                    room_writer.writerow([room_id, hotel_id, room_name, room_price, room_features, size, max_persons, bed_type, view])
                    room_id += 1

                hotel_id += 1

            page += 1
        except requests.RequestException as e:
            print(f"Error while fetching page {page}: {e}")
            break

print("Data crawling and saving completed!")

No more data on page 45. Stopping...
Data crawling and saving completed!
