In [1]:
import numpy as np
import pandas as pd
from database import Database

In [2]:
bookings = pd.read_csv("bookings.csv")

db = Database()

get_hotel_mapping = db.get_hotel_mapping_as_df()
get_room_mapping = db.get_room_mapping_as_df()
get_meal_mapping = db.get_meal_mapping_as_df()
get_operator_mapping = db.get_operator_mapping_as_df()

hotel_mapping = dict(
    zip(get_hotel_mapping["external_code"], get_hotel_mapping["hotel_id"])
)
room_mapping = dict(
    zip(
        get_room_mapping.apply(lambda x: (x["external_code"], x["hotel_id"]), axis=1),
        get_room_mapping["id"],
    )
)
meal_mapping = dict(zip(get_meal_mapping["code"], get_meal_mapping["id"]))

operator_mapping = dict(
    zip(get_operator_mapping["external_code"], get_operator_mapping["operator_id"])
)

  self.mapping_hotel = Table(


In [3]:
# Operator Code

bookings["operator_code"] = (
    bookings["operator_code"].fillna(bookings["hotel_id"]).apply(str)
)

In [4]:
# Text Case

for col in ["guest_name", "room_code", "meal"]:
    bookings[col] = bookings[col].str.upper()

for col in ["status", "status4", "status5"]:
    bookings[col] = bookings[col].str.capitalize()

In [5]:
# Cancellation Date

# Convert "1900-01-01" to pd.NA
bookings["cancellation_date"] = bookings["cancellation_date"].replace(
    "1900-01-01", pd.NA
)

# Fill missing values with "last_modified_date" where "status" is "Can"
bookings.loc[
    bookings["cancellation_date"].isna() & (bookings["status"] == "Can"),
    "cancellation_date",
] = bookings["last_modified_date"]

# Set non-matching "status" values to pd.NA
bookings.loc[
    ~bookings["cancellation_date"].isna() & (bookings["status"] != "Can"),
    "cancellation_date",
] = pd.NA

# Update Status
bookings["status"] = bookings["status"].apply(lambda x: x if x == "Can" else "Ok")

In [6]:
# Price Info

bookings.loc[bookings["purchase_price"] < 1, "purchase_price"] = 0
bookings.loc[bookings["sales_price"] < 1, "sales_price"] = 0

In [7]:
# Fill the rest of blank fields with 0 if numerical

bookings = bookings.apply(lambda x: x.fillna(0) if x.dtype.kind in "biufc" else x)

In [8]:
# Hotel ID
bookings["external_code_hotel"] = bookings["hotel_id"]
bookings["hotel_id"] = pd.to_numeric(
    bookings["external_code_hotel"].map(hotel_mapping), errors="coerce"
)
bookings["hotel_id"] = bookings["hotel_id"].astype("Int64")

In [9]:
# Room ID
bookings["external_code_room"] = bookings["room_code"]
bookings["room_id"] = bookings.apply(
    lambda x: room_mapping.get(
        (
            x["external_code_room"],
            x["hotel_id"],
        )
    ),
    axis=1,
)
bookings["room_id"] = pd.to_numeric(bookings["room_id"], errors="coerce").astype(
    "Int64"
)

In [10]:
# Meal ID
bookings["external_code_meal"] = bookings["meal"]
bookings["meal_id"] = bookings["external_code_meal"].map(meal_mapping)
bookings["meal_id"] = pd.to_numeric(bookings["meal_id"], errors="coerce").astype(
    "Int64"
)

In [11]:
# Operator ID
bookings["external_code_operator"] = bookings["operator_id"]
bookings["operator_id"] = bookings["external_code_operator"].map(operator_mapping)
bookings["operator_id"] = pd.to_numeric(
    bookings["operator_id"], errors="coerce"
).astype("Int64")

In [12]:
# Fill the rest of blank fields with None for database insertion

bookings.replace({pd.NaT: None, pd.NA: None, np.NaN: None}, inplace=True)

In [13]:
# Drop fields
bookings.drop(
    [
        "room_type",
        "room_code",
        "meal",
        "main_season",
    ],
    axis=1,
    inplace=True,
)

In [14]:
bookings.to_csv("output.csv")