In [17]:
import requests
import csv

In [18]:
api_hotel = "https://mixivivu.com/api/hotels/get-list?size=5&page="
page = 1

In [19]:
with open("../data/feature.csv", "w", encoding="utf-8", newline="") as feature_file:
    feature_writer = csv.writer(feature_file)
    feature_writer.writerow(["feature_id", "feature_description"])

    while True:
        try:
            response = requests.get(api_hotel + str(page))
            response.raise_for_status()

            data = response.json()
            hotels = data.get("result", {}).get("data", [])

            if not hotels:
                print(f"No more data on page {page}. Stopping...")
                break

            for hotel in hotels:
                features_list = hotel.get("features", [])
                hotel_features = ", ".join(feature.get("text", "") for feature in features_list)
                rooms = hotel.get("rooms", [])

                for feature in features_list:
                    feature_id = feature.get("_id")
                    feature_description = feature.get("text")
                    feature_writer.writerow([feature_id, feature_description])

                for room in rooms:
                    room_features_list = room.get("features", [])
                    for feature in room_features_list:
                        feature_id = feature.get("_id")
                        feature_description = feature.get("text")
                        feature_writer.writerow([feature_id, feature_description])


            page += 1
        except requests.RequestException as e:
            print(f"Error while fetching page {page}: {e}")
            break

print("Data crawling and saving completed!")

No more data on page 45. Stopping...
Data crawling and saving completed!


In [20]:
api = "https://mixivivu.com/api/features/get-list?size=30"

In [21]:
with open("../data/feature.csv", "a", encoding="utf-8", newline="") as feature_file:
    feature_writer = csv.writer(feature_file)

    try:
        response = requests.get(api)
        response.raise_for_status()

        data = response.json()
        features = data.get("result", {}).get("data", [])

        for feature in features:
            feature_id = feature.get("_id")
            feature_description = feature.get("text")
            feature_writer.writerow([feature_id, feature_description])

    except requests.RequestException as e:
            print(f"Error while fetching page: {e}")

# Remove duplicates

In [22]:
unique_rows = set()
with open("../data/feature.csv", "r", encoding="utf-8", newline="") as file:
    reader = csv.reader(file)
    header = next(reader)
    for row in reader:
        unique_rows.add(tuple(row))

with open("../data/feature.csv", "w", encoding="utf-8", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(header)
    writer.writerows(unique_rows)

In [23]:
import pandas as pd

In [24]:
df = pd.read_csv("../data/feature.csv")

In [25]:
df.rename(columns={"feature_id": "default_id"}, inplace=True)

In [26]:
df.insert(0, "feature_id", range(1, len(df) + 1))

In [27]:
df.to_csv("../data/feature.csv", index=False, encoding="utf-8")