In [1]:
import numpy as np
import pandas as pd
from database import Database

In [2]:
bookings = pd.read_csv("bookings.csv")

db = Database()

get_hotel_mapping = db.get_hotel_mapping_as_df()
get_room_mapping = db.get_room_mapping_as_df()
get_meal_mapping = db.get_meal_mapping_as_df()
get_operator_mapping = db.get_operator_mapping_as_df()

hotel_mapping = dict(
    zip(get_hotel_mapping["external_code"], get_hotel_mapping["hotel_id"])
)
room_mapping = dict(
    zip(
        get_room_mapping.apply(lambda x: (x["external_code"], x["hotel_id"]), axis=1),
        get_room_mapping["id"],
    )
)
meal_mapping = dict(zip(get_meal_mapping["code"], get_meal_mapping["id"]))

operator_mapping = dict(
    zip(get_operator_mapping["external_code"], get_operator_mapping["operator_id"])
)

  self.mapping_hotel = Table(


In [3]:
# Operator Code

bookings["operator_code"] = (
    bookings["operator_code"].fillna(bookings["hotel_id"]).apply(str)
)

In [4]:
# Text Case

for col in ["guest_name", "room_code", "meal"]:
    bookings[col] = bookings[col].str.upper()

for col in ["status", "status4", "status5"]:
    bookings[col] = bookings[col].str.capitalize()

In [5]:
# Cancellation Date

# Convert "1900-01-01" to pd.NA
bookings["cancellation_date"] = bookings["cancellation_date"].replace(
    "1900-01-01", pd.NA
)

# Fill missing values with "last_modified_date" where "status" is "Can"
bookings.loc[
    bookings["cancellation_date"].isna() & (bookings["status"] == "Can"),
    "cancellation_date",
] = bookings["last_modified_date"]

# Set non-matching "status" values to pd.NA
bookings.loc[
    ~bookings["cancellation_date"].isna() & (bookings["status"] != "Can"),
    "cancellation_date",
] = pd.NA

# Update Status
bookings["status"] = bookings["status"].apply(lambda x: x if x == "Can" else "Ok")

In [6]:
# Price Info

bookings.loc[bookings["purchase_price"] < 1, "purchase_price"] = 0
bookings.loc[bookings["sales_price"] < 1, "sales_price"] = 0

In [7]:
# Fill the rest of blank fields with 0 if numerical

bookings = bookings.apply(lambda x: x.fillna(0) if x.dtype.kind in "biufc" else x)

In [8]:
# Room Code
bookings["room_code"] = bookings["room_code"].apply(
    lambda x: "" if len(str(x)) > 3 else x
)

In [9]:
# Fill the rest of blank fields with None for database insertion

bookings.replace({pd.NaT: None, pd.NA: None, np.NaN: None}, inplace=True)

In [10]:
# Drop fields

bookings.drop(["main_season"], axis=1, inplace=True)

In [11]:
bookings.to_csv("output.csv")

In [12]:
bookings.isna().sum()

ref_id                        0
res_id                        0
hotel_id                      0
operator_id                   0
operator_code                 0
bkg_ref                       0
guest_name                    0
sales_date                    0
in_date                       0
out_date                      0
room_type                     2
room_code                     3
meal                          0
days                          0
adult                         0
child                         0
purchase_price                0
purchase_currency            10
sales_price                   0
sales_currency               10
purchase_price_indicator      1
sales_price_indicator         1
create_date                   0
last_modified_date            0
cancellation_date           595
status                        0
status4                       0
status5                       0
purchase_contract_id          0
purchase_spo_id               0
sales_contract_id             0
sales_sp