In [75]:
import pandas as pd 
import numpy as np 
import torch 
from sklearn.pipeline import Pipeline 
from sklearn.impute import SimpleImputer 
from sklearn.compose import ColumnTransformer 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder 
from sklearn.preprocessing import OneHotEncoder, StandardScaler 
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, root_mean_squared_error 

In [76]:
df = pd.read_parquet('data.parquet')
df['total_floor'] = df['total_floor'].fillna('-999') 
df['building_type'] = df['building_type'].fillna('missing')

In [77]:
def train_test_split_features(X, y, flavour="numpy"):
    if flavour == "torch":
        X = torch.tensor(X, dtype=torch.float32)
        y = torch.tensor(y, dtype=torch.float32)
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.1, random_state=1
        )
    else:
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.1, random_state=1
        )

    return X_train, X_test, y_train, y_test

In [78]:
X = df.drop(columns=['request_day_within_7d', '7d_class', 'request_day_within_3d', '3d_class'])
y_set = df[['request_day_within_7d', '7d_class', 'request_day_within_3d', '3d_class']]
y_regression_7d = df[['request_day_within_7d']]
y_regression_3d = df[['request_day_within_3d']]


encoder_7d = LabelEncoder()
y_classification_7d = encoder_7d.fit_transform(df['7d_class'])


encoder_3d = LabelEncoder()
y_classification_3d = encoder_3d.fit_transform(df['3d_class'])

In [79]:
numerical_features = [
    "bathroom",
    "floor",
    "total_floor",
    "gym",
    "latitude",
    "longitude",
    "lift",
    "property_age",
    "property_size",
    "swimming_pool",
    "rent",
    "deposit",
    "photo_count",
]

categorical_features = ['type','furnishing','lease_type','parking','building_type']

In [80]:
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder()

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numerical_transformer, numerical_features),
        ("cat", categorical_transformer, categorical_features),
    ]
)

pipeline = Pipeline([
    ('preprocessor',preprocessor)
])

In [81]:
# cat_features = list(preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features))

In [82]:
X_train, X_test, y_train, y_test = train_test_split(X,y_regression_3d, flavour = 'torch')

TypeError: got an unexpected keyword argument 'flavour'

In [None]:
X_train.shape

(21666, 18)

Unnamed: 0,bathroom,floor,total_floor,gym,latitude,longitude,lift,property_age,property_size,swimming_pool,...,lease_type_COMPANY,lease_type_FAMILY,parking_BOTH,parking_FOUR_WHEELER,parking_NONE,parking_TWO_WHEELER,building_type_AP,building_type_IF,building_type_IH,building_type_missing
0,-1.022095,0.513752,0.050868,2.241844,-0.372148,-0.009354,1.607520,-0.319159,-0.187832,2.452214,...,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.262159,0.937171,0.231302,2.241844,0.276015,0.079192,1.607520,-0.453229,0.430529,2.452214,...,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,0.262159,-0.756505,0.050868,2.241844,0.078420,0.065884,1.607520,-0.587299,0.377250,-0.407795,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
3,0.262159,0.513752,0.050868,-0.446061,-0.317502,-0.015097,-0.622076,-0.453229,0.538702,-0.407795,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,-1.022095,-0.333086,-0.000684,-0.446061,0.147536,0.117022,-0.622076,-0.051019,-0.833639,-0.407795,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28883,-1.022095,-0.756505,-0.000684,-0.446061,-0.069462,0.018136,-0.622076,1.423752,0.377250,-0.407795,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
28884,0.262159,0.513752,0.025092,-0.446061,-0.234755,-0.014546,1.607520,0.485262,0.554847,-0.407795,...,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
28885,0.262159,1.784009,0.308631,2.241844,-0.243724,-0.085043,1.607520,-0.587299,0.013983,2.452214,...,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
28886,-1.022095,-0.333086,-0.000684,-0.446061,-0.090443,0.025963,-0.622076,0.083051,-0.591461,-0.407795,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
