In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import KFold, cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.pipeline import Pipeline 
from sklearn.preprocessing import OneHotEncoder, StandardScaler, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, AdaBoostRegressor
from sklearn.neural_network import MLPRegressor

from xgboost import XGBRegressor

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

from sklearn.decomposition import PCA

In [2]:
df = pd.read_csv("Final4_gurgaon_properties_cleaned(Feature_selection2).csv")
df.head()

Unnamed: 0,property_type,sector,price,bedRoom,bathroom,balcony,agePossession,built_up_area,servant room,store room,furnishing_type,luxury_category,floor_category
0,flat,sector 36,0.82,3.0,2.0,2,New Property,850.0,0.0,0.0,0.0,Low,Low Floor
1,flat,sector 89,0.95,2.0,2.0,2,New Property,1226.0,1.0,0.0,0.0,Low,Mid Floor
2,flat,sohna road,0.32,2.0,2.0,1,New Property,1000.0,0.0,0.0,0.0,Low,High Floor
3,flat,sector 92,1.6,3.0,4.0,3+,Relatively New,1615.0,1.0,0.0,1.0,High,Mid Floor
4,flat,sector 102,0.48,2.0,2.0,1,Relatively New,582.0,0.0,1.0,0.0,High,Mid Floor


In [3]:
df.shape

(3554, 13)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3554 entries, 0 to 3553
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   property_type    3554 non-null   object 
 1   sector           3554 non-null   object 
 2   price            3554 non-null   float64
 3   bedRoom          3554 non-null   float64
 4   bathroom         3554 non-null   float64
 5   balcony          3554 non-null   object 
 6   agePossession    3554 non-null   object 
 7   built_up_area    3554 non-null   float64
 8   servant room     3554 non-null   float64
 9   store room       3554 non-null   float64
 10  furnishing_type  3554 non-null   float64
 11  luxury_category  3554 non-null   object 
 12  floor_category   3554 non-null   object 
dtypes: float64(7), object(6)
memory usage: 361.1+ KB


In [5]:
df['furnishing_type'].value_counts()

furnishing_type
0.0    2349
1.0    1018
2.0     187
Name: count, dtype: int64

In [6]:
# convert furnishing_type column into textual 
# 0 = unfurnished
# 1 = semifurnished
# 2 = furnished

df['furnishing_type'] = df['furnishing_type'].replace({0.0 : 'unfurnished', 1.0 : 'semifurnished', 2.0 : 'furnished'})

In [7]:
df.head()

Unnamed: 0,property_type,sector,price,bedRoom,bathroom,balcony,agePossession,built_up_area,servant room,store room,furnishing_type,luxury_category,floor_category
0,flat,sector 36,0.82,3.0,2.0,2,New Property,850.0,0.0,0.0,unfurnished,Low,Low Floor
1,flat,sector 89,0.95,2.0,2.0,2,New Property,1226.0,1.0,0.0,unfurnished,Low,Mid Floor
2,flat,sohna road,0.32,2.0,2.0,1,New Property,1000.0,0.0,0.0,unfurnished,Low,High Floor
3,flat,sector 92,1.6,3.0,4.0,3+,Relatively New,1615.0,1.0,0.0,semifurnished,High,Mid Floor
4,flat,sector 102,0.48,2.0,2.0,1,Relatively New,582.0,0.0,1.0,unfurnished,High,Mid Floor


In [8]:
X = df.drop(columns = ['price'])
y = df['price']

In [9]:
y_transformed = np.log1p(y)

# ordinal encoding 

In [10]:
columns_to_encode = ['property_type', 'sector', 'balcony', 'agePossession', 'furnishing_type', 'luxury_category', 'floor_category']

In [11]:
# Creating a column transformer for preprocessing 
preprocessor = ColumnTransformer(
    transformers = [
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode)
    ],
    remainder = 'passthrough'
)

In [12]:
# Creating pipeline 
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [13]:
# K-fold cross-validation 
Kfold = KFold(n_splits = 10, shuffle = True, random_state = 42)
scores = cross_val_score(pipeline, X, y_transformed, cv = Kfold, scoring  = 'r2')

In [14]:
print('r2_score:-',scores.mean())

r2_score:- 0.7363096633436826


In [15]:
print('std:-', scores.std())

std:- 0.03238005754429934


In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y_transformed, test_size = 0.2, random_state = 42)

In [17]:
pipeline.fit(X_train, y_train)

In [18]:
y_pred = pipeline.predict(X_test)

In [19]:
y_pred

array([1.15700081, 0.62935155, 0.78923869, 1.55827521, 1.37806207,
       1.53155016, 2.13474919, 1.23304871, 1.1204441 , 1.11267576,
       1.41235811, 0.71257546, 1.17963125, 0.94687144, 2.82323315,
       0.53201954, 0.61372621, 0.47943071, 0.75822332, 1.45440647,
       0.91377109, 0.80868333, 0.5414655 , 0.57317462, 1.01128127,
       1.23224246, 0.73995281, 0.4814698 , 0.91215684, 1.80808657,
       0.34147627, 0.68461037, 1.31543477, 0.47491893, 0.62086681,
       1.88091792, 0.77551852, 0.55149482, 0.5482935 , 0.38064681,
       0.67804879, 0.82079697, 0.45183075, 1.19277196, 1.19992839,
       0.38720351, 1.74781288, 0.91663109, 0.97387759, 1.1551884 ,
       0.99269947, 0.60040363, 1.54234043, 0.95755836, 0.94013957,
       0.34856039, 2.70521147, 1.67161834, 1.66305621, 1.00936094,
       1.1297326 , 1.2844578 , 0.5637419 , 0.77588884, 0.3702542 ,
       1.03630085, 0.8251405 , 1.62536322, 0.60463535, 1.0252044 ,
       1.13906142, 0.59046895, 0.93467746, 0.9488749 , 1.09670

In [20]:
y_pred = np.expm1(y_pred)

In [21]:
print(mean_absolute_error(np.expm1(y_test), y_pred))

0.9463822160089356


## Converting into functions

In [22]:
def scorer(model_name, model):
    output = []

    output.append(model_name)

    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor' , model)
    ])

    # Kfold cross-validation 
    kfold = KFold(n_splits = 10, shuffle = True, random_state = 42)
    scores = cross_val_score(pipeline, X, y_transformed, cv = Kfold, scoring  = 'r2')

    output.append(scores.mean())

    X_train, X_test, y_train, y_test = train_test_split(X, y_transformed, test_size = 0.2, random_state = 42)
    
    pipeline.fit(X_train, y_train)

    y_pred = pipeline.predict(X_test)

    y_pred = np.expm1(y_pred)

    output.append(mean_absolute_error(np.expm1(y_test), y_pred))

    return output

In [23]:
models = {
    'linear_reg' : LinearRegression(),
    'svr' : SVR(),
    'ridge' : Ridge(),
    'lasso' : Lasso(),
    'decision tree' : DecisionTreeRegressor(),
    'random forest' : RandomForestRegressor(),
    'extra trees' : ExtraTreesRegressor(),
    'gradient boosting' : GradientBoostingRegressor(),
    'adaboost' : AdaBoostRegressor(),
    'mlp' :  MLPRegressor(),
    'xgboost' : XGBRegressor()
}

In [24]:
model_output = []

for model_name, model in models.items():
    model_output.append(scorer(model_name, model))

In [25]:
model_output

[['linear_reg', np.float64(0.7363096633436826), 0.9463822160089356],
 ['svr', np.float64(0.7642012011196353), 0.8472636473483934],
 ['ridge', np.float64(0.7363125343993555), 0.9463387741853383],
 ['lasso', np.float64(0.05943378064493573), 1.528905986892753],
 ['decision tree', np.float64(0.77877982241408), 0.7440976965356393],
 ['random forest', np.float64(0.8816502817179745), 0.5316594544644183],
 ['extra trees', np.float64(0.8688541180641467), 0.5470144154405266],
 ['gradient boosting', np.float64(0.8726388814940907), 0.5756337799207787],
 ['adaboost', np.float64(0.7580136761059298), 0.8120374488420476],
 ['mlp', np.float64(0.8149066437395256), 0.6672747555777752],
 ['xgboost', np.float64(0.8894876835260124), 0.5040475141482346]]

In [26]:
model_df = pd.DataFrame(model_output, columns = ['Name', 'R2_score', 'MAE'])

In [27]:
model_df.sort_values(['MAE'])

Unnamed: 0,Name,R2_score,MAE
10,xgboost,0.889488,0.504048
5,random forest,0.88165,0.531659
6,extra trees,0.868854,0.547014
7,gradient boosting,0.872639,0.575634
9,mlp,0.814907,0.667275
4,decision tree,0.77878,0.744098
8,adaboost,0.758014,0.812037
1,svr,0.764201,0.847264
2,ridge,0.736313,0.946339
0,linear_reg,0.73631,0.946382


#### Observations:- 
- From this all models `xgboost` and `random forest` perform well on data
- `random forest` = R2_score (0.88) and MAE (0.52)
- `xgboost` = R2_score (0.88) and MAE (0.50)

# OneHotEncoding

In [28]:
# Creating a column transformer for preprocessing 
preprocessor = ColumnTransformer(
    transformers = [
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode),
        ('cat1',OneHotEncoder(drop = 'first'),['sector', 'agePossession', 'furnishing_type'])
    ],
    remainder = 'passthrough'
)

In [29]:
# Creating pipeline 
pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor' , LinearRegression())
    ])

In [30]:
# Kfold cross-validation 
kfold = KFold(n_splits = 10, shuffle = True, random_state = 42)
scores = cross_val_score(pipeline, X, y_transformed, cv = Kfold, scoring  = 'r2')

In [31]:
print(scores.mean())

0.8546098138146467


In [32]:
print(scores.std())

0.016002496624190985


In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y_transformed, test_size = 0.2, random_state = 42)

In [34]:
pipeline.fit(X_train, y_train)

In [35]:
y_pred = pipeline.predict(X_test)

In [36]:
y_pred

array([0.9594161 , 0.44528554, 0.7801262 , 1.33060588, 1.29961832,
       1.39137176, 2.1914202 , 1.03930225, 1.02135563, 1.00048243,
       1.34342936, 0.73905555, 1.35086058, 0.92079237, 2.91801052,
       0.5123525 , 1.1648734 , 0.31029843, 0.63484822, 1.332026  ,
       0.81984361, 0.83092692, 0.58487103, 0.73795598, 1.03892331,
       1.50909402, 0.54346046, 0.57349933, 0.72627889, 2.18000269,
       0.34704025, 0.46693103, 1.61388375, 0.33067798, 0.67033673,
       1.62818881, 0.96356376, 0.50892081, 0.43094053, 0.32720079,
       0.5108209 , 1.11120725, 0.42946732, 1.03561131, 1.00050079,
       0.44747276, 1.84140042, 1.0032755 , 1.14770734, 0.97871026,
       0.97162498, 0.50737721, 1.79850815, 1.15307672, 0.73014935,
       1.00948443, 2.50605956, 1.41924835, 1.77340513, 0.98914465,
       0.95620449, 1.41794861, 0.49275934, 1.46276283, 0.28872682,
       1.03926035, 1.37968487, 1.32171297, 0.43518529, 0.8039986 ,
       1.35271645, 0.58522092, 1.0196685 , 1.11284932, 1.42135

In [37]:
y_pred = np.expm1(y_pred)

In [38]:
mean_absolute_error(np.expm1(y_test), y_pred)

0.6497148092826402

In [39]:
def scorer(model_name, model):
    output = []

    output.append(model_name)

    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor' , model)
    ])

    # Kfold cross-validation 
    kfold = KFold(n_splits = 10, shuffle = True, random_state = 42)
    scores = cross_val_score(pipeline, X, y_transformed, cv = Kfold, scoring  = 'r2')

    output.append(scores.mean())

    X_train, X_test, y_train, y_test = train_test_split(X, y_transformed, test_size = 0.2, random_state = 42)
    
    pipeline.fit(X_train, y_train)

    y_pred = pipeline.predict(X_test)

    y_pred = np.expm1(y_pred)

    output.append(mean_absolute_error(np.expm1(y_test), y_pred))

    return output

In [40]:
models = {
    'linear_reg' : LinearRegression(),
    'svr' : SVR(),
    'ridge' : Ridge(),
    'lasso' : Lasso(),
    'decision tree' : DecisionTreeRegressor(),
    'random forest' : RandomForestRegressor(),
    'extra trees' : ExtraTreesRegressor(),
    'gradient boosting' : GradientBoostingRegressor(),
    'adaboost' : AdaBoostRegressor(),
    'mlp' :  MLPRegressor(),
    'xgboost' : XGBRegressor()
}

In [41]:
model_output = []

for model_name, model in models.items():
    model_output.append(scorer(model_name, model))

In [42]:
model_df1 = pd.DataFrame(model_output, columns = ['Name', 'R2_score', 'MAE'])

In [43]:
model_df1.sort_values(['MAE'])

Unnamed: 0,Name,R2_score,MAE
6,extra trees,0.895138,0.469985
10,xgboost,0.89585,0.493456
5,random forest,0.891004,0.497927
9,mlp,0.873575,0.557385
7,gradient boosting,0.876905,0.569836
0,linear_reg,0.85461,0.649715
2,ridge,0.854685,0.652931
4,decision tree,0.807235,0.698251
8,adaboost,0.750281,0.821068
1,svr,0.769741,0.834124


# OneHotEncoding With PCA

In [44]:
# Creating a column transformer for preprocessing 
preprocessor = ColumnTransformer(
    transformers = [
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode),
        ('cat1',OneHotEncoder(drop = 'first', sparse_output = False),['sector', 'agePossession'])
    ],
    remainder = 'passthrough'
)

In [45]:
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('pca' , PCA(n_components = 0.95)),
    ('regressor', LinearRegression())    
])

In [46]:
# Kfold cross-validation 
kfold = KFold(n_splits = 10, shuffle = True, random_state = 42)
scores = cross_val_score(pipeline, X, y_transformed, cv = Kfold, scoring  = 'r2')

In [47]:
print(scores.mean())

0.06225201431451133


In [48]:
print(scores.std())

0.01986059407164017


In [49]:
def scorer(model_name, model):
    output = []

    output.append(model_name)

    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('pca', PCA(n_components = 0.95)),
        ('regressor' , model)
    ])

    # Kfold cross-validation 
    kfold = KFold(n_splits = 10, shuffle = True, random_state = 42)
    scores = cross_val_score(pipeline, X, y_transformed, cv = Kfold, scoring  = 'r2')

    output.append(scores.mean())

    X_train, X_test, y_train, y_test = train_test_split(X, y_transformed, test_size = 0.2, random_state = 42)
    
    pipeline.fit(X_train, y_train)

    y_pred = pipeline.predict(X_test)

    y_pred = np.expm1(y_pred)

    output.append(mean_absolute_error(np.expm1(y_test), y_pred))

    return output

In [50]:
models = {
    'linear_reg' : LinearRegression(),
    'svr' : SVR(),
    'ridge' : Ridge(),
    'lasso' : Lasso(),
    'decision tree' : DecisionTreeRegressor(),
    'random forest' : RandomForestRegressor(),
    'extra trees' : ExtraTreesRegressor(),
    'gradient boosting' : GradientBoostingRegressor(),
    'adaboost' : AdaBoostRegressor(),
    'mlp' :  MLPRegressor(),
    'xgboost' : XGBRegressor()
}

In [51]:
model_output = []

for model_name, model in models.items():
    model_output.append(scorer(model_name, model))

In [52]:
model_df2 = pd.DataFrame(model_output, columns = ['Name', 'R2_score', 'MAE'])

In [53]:
model_df2.sort_values(['MAE'])

Unnamed: 0,Name,R2_score,MAE
5,random forest,0.762542,0.65271
6,extra trees,0.739356,0.706622
4,decision tree,0.696442,0.761509
10,xgboost,0.622205,0.967581
7,gradient boosting,0.610623,0.987906
1,svr,0.218073,1.361163
8,adaboost,0.294255,1.392914
9,mlp,0.217636,1.400412
2,ridge,0.062252,1.526707
0,linear_reg,0.062252,1.526707


# Target encoding 
- This encoding will give best output
- Be carefull because data leaking happen

In [54]:
import category_encoders as ce

In [55]:
columns_to_encode = ['property_type', 'sector', 'balcony', 'agePossession', 'furnishing_type', 'luxury_category', 'floor_category']

In [56]:
# Creating a column transformer for preprocessing 
preprocessor = ColumnTransformer(
    transformers = [
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode),
        ('cat1',OneHotEncoder(drop = 'first', sparse_output = False),['agePossession']),
        ('target_enc', ce.TargetEncoder(), ['sector'])
    ],
    remainder = 'passthrough'
)

In [57]:
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())    
])

In [58]:
# Kfold cross-validation 
kfold = KFold(n_splits = 10, shuffle = True, random_state = 42)
scores = cross_val_score(pipeline, X, y_transformed, cv = Kfold, scoring  = 'r2')

In [59]:
print(scores.mean())

0.829521918225536


In [60]:
print(scores.std())

0.01838446337912286


In [61]:
def scorer(model_name, model):
    output = []

    output.append(model_name)

    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor' , model)
    ])

    # Kfold cross-validation 
    kfold = KFold(n_splits = 10, shuffle = True, random_state = 42)
    scores = cross_val_score(pipeline, X, y_transformed, cv = Kfold, scoring  = 'r2')

    output.append(scores.mean())

    X_train, X_test, y_train, y_test = train_test_split(X, y_transformed, test_size = 0.2, random_state = 42)
    
    pipeline.fit(X_train, y_train)

    y_pred = pipeline.predict(X_test)

    y_pred = np.expm1(y_pred)

    output.append(mean_absolute_error(np.expm1(y_test), y_pred))

    return output

In [62]:
models = {
    'linear_reg' : LinearRegression(),
    'svr' : SVR(),
    'ridge' : Ridge(),
    'lasso' : Lasso(),
    'decision tree' : DecisionTreeRegressor(),
    'random forest' : RandomForestRegressor(),
    'extra trees' : ExtraTreesRegressor(),
    'gradient boosting' : GradientBoostingRegressor(),
    'adaboost' : AdaBoostRegressor(),
    'mlp' :  MLPRegressor(),
    'xgboost' : XGBRegressor()
}

In [63]:
model_output = []

for model_name, model in models.items():
    model_output.append(scorer(model_name, model))

In [64]:
model_df3 = pd.DataFrame(model_output, columns = ['Name', 'R2_score', 'MAE'])

In [65]:
model_df3.sort_values(['MAE'])

Unnamed: 0,Name,R2_score,MAE
10,xgboost,0.904798,0.447518
6,extra trees,0.903085,0.452407
5,random forest,0.900772,0.454213
7,gradient boosting,0.889079,0.508402
4,decision tree,0.826701,0.554455
9,mlp,0.850178,0.602735
8,adaboost,0.817116,0.682166
0,linear_reg,0.829522,0.713011
2,ridge,0.829536,0.713523
1,svr,0.782917,0.818851


# Hyperparameter tuning

In [66]:
from sklearn.model_selection import GridSearchCV

In [67]:
param_grid = {
    'regressor__n_estimators' : [100, 200, 300],
    'regressor__max_depth' : [3, 5, 7, 10],
    'regressor__learning_rate' : [0.01, 0.1, 0.2],
    'regressor__subsample' : [0.6, 0.8, 1.0],
    'regressor__colsample_bytree' : [0.6, 0.8, 1.0]
}

In [68]:
columns_to_encode = ['property_type', 'sector', 'balcony', 'agePossession', 'furnishing_type', 'luxury_category', 'floor_category']

In [69]:
# Creating a column transformer for preprocessing 
preprocessor = ColumnTransformer(
    transformers = [
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode),
        ('cat1',OneHotEncoder(drop = 'first', sparse_output = False),['agePossession']),
        ('target_enc', ce.TargetEncoder(), ['sector'])
    ],
    remainder = 'passthrough'
)

In [70]:
# Creating pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor',  XGBRegressor(objective='reg:squarederror', random_state=42))    
])

In [71]:
Kfold = KFold(n_splits = 10, shuffle = True, random_state = 42)

In [72]:
search = GridSearchCV(pipeline, param_grid, cv = Kfold, scoring = 'r2', n_jobs = -1, verbose = 4)

In [73]:
search.fit(X, y_transformed)

Fitting 10 folds for each of 324 candidates, totalling 3240 fits


In [74]:
final_pipe = search.best_estimator_

In [75]:
search.best_params_

{'regressor__colsample_bytree': 0.8,
 'regressor__learning_rate': 0.1,
 'regressor__max_depth': 5,
 'regressor__n_estimators': 300,
 'regressor__subsample': 0.8}

In [76]:
print(search.best_score_)

0.9078854784206809


# Exporting the model 

In [77]:
preprocessor = ColumnTransformer(
    transformers = [
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode),
        ('cat1',OneHotEncoder(drop = 'first', sparse_output = False),['sector', 'agePossession'])
    ],
    remainder = 'passthrough'
)

In [78]:
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor',  XGBRegressor(objective='reg:squarederror', random_state=42))    
])

In [79]:
pipeline.fit(X, y_transformed)

In [80]:
# import pickle

# with open('pipeline.pkl', 'wb') as file:
#     pickle.dump(pipeline, file)

In [81]:
# with open('df.pkl', 'wb') as file:
#     pickle.dump(X, file)

# Trying out predictions

In [82]:
X.columns

Index(['property_type', 'sector', 'bedRoom', 'bathroom', 'balcony',
       'agePossession', 'built_up_area', 'servant room', 'store room',
       'furnishing_type', 'luxury_category', 'floor_category'],
      dtype='object')

In [84]:
print(X.iloc[0].values)

['flat' 'sector 36' np.float64(3.0) np.float64(2.0) '2' 'New Property'
 np.float64(850.0) np.float64(0.0) np.float64(0.0) 'unfurnished' 'Low'
 'Low Floor']


In [105]:
data = [['house', 'sector 102', 2, 3, '3+', 'Relatively New', 1350, 0, 1, 'furnished', 'Medium', 'Low Floor']]
columns = ['property_type', 'sector', 'bedRoom', 'bathroom', 'balcony',
       'agePossession', 'built_up_area', 'servant room', 'store room',
       'furnishing_type', 'luxury_category', 'floor_category']

one_df = pd.DataFrame(data, columns = columns)

one_df

Unnamed: 0,property_type,sector,bedRoom,bathroom,balcony,agePossession,built_up_area,servant room,store room,furnishing_type,luxury_category,floor_category
0,house,sector 102,2,3,3+,Relatively New,1350,0,1,furnished,Medium,Low Floor


In [106]:
print('Price:-',np.expm1(pipeline.predict(one_df)))

Price:- [2.2984738]
