In [21]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 200)

In [22]:
file_path = "../data/luxury_housing_cleaned.csv"
df = pd.read_csv(file_path)

In [23]:
# Creating a new column 'Booking Status' to get the actual status
df['Booking_Status'] = np.where(
    df['Transaction_Type'].str.contains('Sale|Booked', case=False, na=False),
    'Booked',
    'Not Booked'
)

In [24]:
# Creating a new column 'Booking Flag' to do the conversion rate calculations
df['Booking_Flag'] = np.where(df['Booking_Status'] == 'Booked', 1, 0)

In [25]:
# Calculating the price per sqft
df['Price_per_Sqft'] = df['Ticket_Price_Cr'] * 1e7 / df['Unit_Size_Sqft']

In [26]:
# Extracting Quarter & Year
df['Purchase_Date'] = pd.to_datetime(
    df['Purchase_Quarter'].str.strip(),
    errors='coerce'
)

In [27]:
df['Quarter_Number'] = df['Purchase_Date'].dt.quarter
df['Purchase_Year'] = df['Purchase_Date'].dt.year

In [28]:
df[['Purchase_Quarter', 'Purchase_Date', 'Quarter_Number', 'Purchase_Year']].head()

Unnamed: 0,Purchase_Quarter,Purchase_Date,Quarter_Number,Purchase_Year
0,2025-03-31,2025-03-31,1,2025
1,2024-06-30,2024-06-30,2,2024
2,2023-12-31,2023-12-31,4,2023
3,2024-03-31,2024-03-31,1,2024
4,2024-12-31,2024-12-31,4,2024


In [29]:
df[['Transaction_Type', 'Booking_Status', 'Booking_Flag']].head()

Unnamed: 0,Transaction_Type,Booking_Status,Booking_Flag
0,Primary,Not Booked,0
1,Primary,Not Booked,0
2,Primary,Not Booked,0
3,Primary,Not Booked,0
4,Secondary,Not Booked,0


In [30]:
df[['Ticket_Price_Cr', 'Unit_Size_Sqft', 'Price_per_Sqft']].describe()

Unnamed: 0,Ticket_Price_Cr,Unit_Size_Sqft,Price_per_Sqft
count,101000.0,101000.0,101000.0
mean,12.53044,6003.992812,22704.505063
std,6.520855,1638.52963,14185.257289
min,0.776524,3000.0,1181.384845
25%,10.844681,4683.0,15802.017101
50%,12.034831,5990.0,20091.53805
75%,13.233249,7332.0,26388.414214
max,100.0,8999.0,324359.390204


In [31]:
df[['Purchase_Quarter', 'Quarter_Number', 'Purchase_Year']].drop_duplicates().head()

Unnamed: 0,Purchase_Quarter,Quarter_Number,Purchase_Year
0,2025-03-31,1,2025
1,2024-06-30,2,2024
2,2023-12-31,4,2023
3,2024-03-31,1,2024
4,2024-12-31,4,2024


In [32]:
df.to_csv("../data/luxury_housing_features.csv", index=False)