In [1]:
import requests
import csv
import pandas as pd

In [2]:
api_ship = "https://mixivivu.com/api/products/get-list?size=5&page="
api_features = "https://mixivivu.com/api/features/get-list"
api_room_features = "https://mixivivu.com/api/features/get-list?size=30"
page = 1

In [3]:
response = requests.get(api_features)
response.raise_for_status()
features_data = response.json().get("result", {}).get("data", [])
feature_dict = {feature["_id"]: feature["text"] for feature in features_data}

In [4]:
response = requests.get(api_room_features)
response.raise_for_status()
room_features_data = response.json().get("result", {}).get("data", [])
room_feature_dict = {feature["_id"]: feature["text"] for feature in room_features_data}

In [5]:
df_feature = pd.read_csv("../data/feature.csv")
feature_map = dict(zip(df_feature["default_id"], df_feature["feature_id"]))

In [6]:
with open("../data/ship/ship.csv", "w", encoding="utf-8", newline="") as ship_file, \
     open("../final_data/ship/ship_room.csv", "w", encoding="utf-8", newline="") as room_file,\
     open("../final_data/ship/ship_feature.csv", "w", encoding="utf-8", newline="") as feature_file,\
     open("../final_data/ship/ship_room_feature.csv", "w", encoding="utf-8", newline="") as room_feature_file,\
     open("../final_data/ship/ship_short_description.csv", "w", encoding="utf-8", newline="") as short_description_file,\
     open("../final_data/ship/ship_long_description.csv", "w", encoding="utf-8", newline="") as long_description_file:

    ship_writer = csv.writer(ship_file)
    room_writer = csv.writer(room_file)
    feature_writer = csv.writer(feature_file)
    room_feature_writer = csv.writer(room_feature_file)
    short_description_writer = csv.writer(short_description_file)
    long_description_writer = csv.writer(long_description_file)

    ship_writer.writerow(["ship_id", "ship_name", "launch", "cabin", "shell", "trip", "admin", "ship_price", "address", "map_link", "ship_features", "short_description", "long_description"])
    room_writer.writerow(["room_id", "ship_id", "room_name", "size", "max_persons", "room_price", "room_features", ])
    feature_writer.writerow(["ship_id", "feature_id"])
    room_feature_writer.writerow(["room_id", "feature_id"])
    short_description_writer.writerow(["ship_id", "block_id", "data"])
    long_description_writer.writerow(["ship_id", "block_id", "type", "data"])

    ship_id = 1
    room_id = 1
    while True:
        try:
            response = requests.get(api_ship + str(page))
            response.raise_for_status()

            data = response.json()
            ships = data.get("result", {}).get("data", [])

            if not ships:
                print(f"No more data on page {page}. Stopping...")
                break

            for ship in ships:
                ship_name = ship.get("title")
                launch = ship.get("spec", {}).get("ship", {}).get("launch")
                cabin = ship.get("spec", {}).get("ship", {}).get("cabin")
                shell = ship.get("spec", {}).get("ship", {}).get("shell")
                trip = ship.get("spec", {}).get("ship", {}).get("trip")
                admin = ship.get("spec", {}).get("ship", {}).get("admin")
                ship_price = ship.get("defaultPrice")
                address = ship.get("address")
                map_link = ship.get("mapLink")

                ship_features_list = ship.get("features", [])
                ship_features = ", ".join(feature_dict.get(feature, feature) for feature in ship_features_list if feature in feature_dict)

                for feature in ship_features_list:
                    if feature in feature_dict:
                        default_id = feature
                        feature_id = feature_map.get(default_id)
                        feature_writer.writerow([ship_id, feature_id])

                short_description_list = ship.get("shortDescription", [])
                short_description = " ".join(short_description_list)
                block_id = 1
                for description in short_description_list:
                    if description.strip():
                        short_description_writer.writerow([ship_id, block_id, description.strip()])
                        block_id += 1

                long_description = ship.get("longDescription", {}).get("blocks")
                long_description_text = ""

                block_id = 1
                for block in long_description:
                    block_type = block["type"]
                    if block_type == "paragraph":
                        data = block["data"]["text"]
                        long_description_text += data + " "
                    elif block_type == "image":
                        data = block["data"]["file"]["url"]
                    else:
                        continue

                    long_description_writer.writerow([ship_id, block_id, block_type, data])
                    block_id += 1

                rooms = ship.get("rooms")

                ship_writer.writerow([ship_id, ship_name, launch, cabin, shell, trip, admin, ship_price, address, map_link, ship_features, short_description.strip(), long_description_text.strip()])

                for room in rooms:
                    room_name = room.get("title")
                    room_price = room.get("price")
                    size = room.get("size")
                    max_persons = room.get("maxPersons")

                    room_features_list = room.get("features", [])
                    room_features = ", ".join(room_feature_dict.get(feature, feature) for feature in room_features_list)
                    for feature in room_features_list:
                        if feature in room_feature_dict:
                            feature_id = feature_map.get(feature)
                            room_feature_writer.writerow([room_id, feature_id])

                    room_writer.writerow([room_id, ship_id, room_name, size, max_persons, room_price, room_features])
                    room_id += 1

                ship_id += 1

            page += 1
        except requests.RequestException as e:
            print(f"Error while fetching page {page}: {e}")
            break

print("Data crawling and saving completed!")

No more data on page 16. Stopping...
Data crawling and saving completed!
