# Imports

In [None]:
from IPython.display import display, HTML

def displayHorizontalDataframes(dfs, titles):
    """
    Display a list of pandas dataframes horizontally with titles on top.
    
    Parameters:
    dfs (list): List of pandas DataFrames.
    titles (list): List of titles (strings) corresponding to each DataFrame.
    """
    if len(dfs) != len(titles):
        raise ValueError("The number of dataframes and titles must be the same.")

    # Start a flex container to align content horizontally and center them.
    html = (
        "<div style='display: flex; justify-content: center; "
        "align-items: flex-start;'>"
    )
    
    # Iterate over the dataframes and their corresponding titles.
    for df, title in zip(dfs, titles):
        title = " " if not title else title
        html += (
            "<div style='margin-left: 20px; margin-right: 20px; text-align: center;'>"
            f"<h3>{title}</h3>"
            f"{df.to_html(classes='dataframe', border=1)}"
            "</div>"
        )
    
    html += "</div>"
    display(HTML(html))

In [None]:
import pandas as pd
# import plotly.express as px
# import plotly.graph_objects as go
# from ydata_profiling import ProfileReport
import numpy as np

dtype_mapping = {
    'propertyId': pd.StringDtype(),
    'localityName': 'category',
    'landMarks': pd.StringDtype(),
    'locality': pd.StringDtype(),
    'price': pd.Int64Dtype(),
    'nameOfSociety': pd.StringDtype(),
    'projectName': pd.StringDtype(),
    'carpetArea': pd.Int64Dtype(),
    'coveredArea': pd.Int64Dtype(),
    'carpetAreaSqft': pd.Int64Dtype(),
    'possessionStatus': pd.StringDtype(),
    'developerName': pd.StringDtype(),
    'flooringType': pd.StringDtype(),
    'floorNumber': pd.Int64Dtype(),
    'unitCountonFloor': pd.Int64Dtype(),
    'totalFloorNumber': pd.Int64Dtype(),
    'electricityStatus': pd.StringDtype(),
    'waterStatus': pd.StringDtype(),
    'longitude': pd.Float64Dtype(),
    'latitude': pd.Float64Dtype(),
    'transactionType': 'category',
    'facing': pd.StringDtype(),
    'ownershipType': pd.StringDtype(),
    'carParking': pd.StringDtype(),
    'furnished': 'category',
    'bedrooms': pd.Int64Dtype(),
    'bathrooms': pd.Int64Dtype(),
    'numberOfBalconied': pd.Int64Dtype(),
    'propertyType': 'category',
    'additionalRooms': pd.StringDtype(),
    'bookingAmountExact': pd.Int64Dtype(),
    'maintenanceChargesFrequency': 'category',
    'maintenanceCharges': pd.Int64Dtype(),
    'ageofcons': 'category',
    'isVerified': 'category',
    'listingTypeDesc': 'category',
    'premiumProperty': pd.BooleanDtype(),
    'noOfLifts': pd.Int64Dtype(),
    'propertyAmenities': pd.StringDtype(),
    'facilitiesDesc': pd.StringDtype(),
    'uuid': pd.StringDtype(),
    'flooringType_Vitrified': pd.BooleanDtype(),
    'flooringType_CeramicTiles': pd.BooleanDtype(),
    'flooringType_Marble': pd.BooleanDtype(),
    'flooringType_NormalTilesKotahStone': pd.BooleanDtype(),
    'flooringType_Granite': pd.BooleanDtype(),
    'flooringType_Wooden': pd.BooleanDtype(),
    'flooringType_Mosaic': pd.BooleanDtype(),
    'flooringType_Marbonite': pd.BooleanDtype(),
    'additionalRoom_PujaRoom': pd.BooleanDtype(),
    'additionalRoom_Study': pd.BooleanDtype(),
    'additionalRoom_Store': pd.BooleanDtype(),
    'additionalRoom_ServantRoom': pd.BooleanDtype(),
    'carParking_Open': pd.Int64Dtype(),
    'carParking_Covered': pd.Int64Dtype(),
    'ReservedParking': pd.BooleanDtype(),
}

COLUMNS_TO_DROP = [
    'coveredArea',
    'ReservedParking',
]

df = pd.read_csv(
    '../Data/train_imputed.csv',
    dtype = dtype_mapping,
    index_col=0
)

df.fillna(pd.NA, inplace=True)

df.drop(COLUMNS_TO_DROP, axis=1, inplace=True)

# Feature Encoding

## Linear

## Tree (numeric only)

## Tree (numeric and categorical)

# Models

## Linear Regression

## K nearest neighbors

## Decision Tree 

## XGBoost Regressor

## Random Forest

## XGBoost Random Forest

## LightGBM

## CatBoost