# This notebook will serve for the analysis and feature engineering of the Room Features for the TFW project

In [668]:
# Import necassary libraries
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt

ROOT = os.environ.get('PWD')


In [669]:

# read in dataset

df_room_features = pd.read_csv('../data/rooms_features_20210713.csv', low_memory=False)


In [670]:
df_room_features.nunique()

listing_id       30269
description         29
area               345
description.1      156
dtype: int64

# prepare file for the pivot, delete columns with little usable data

In [671]:
del df_room_features['area']

In [672]:
del df_room_features['description.1']

In [673]:
df_room_features['quantity'] = 1

In [674]:
df = df_room_features.reset_index

In [675]:
p = pd.pivot_table(df_room_features, index='listing_id', columns='description', values='quantity', fill_value=0)

In [676]:
p

description,1 room apartment,Library,balcony,bathroom,bedroom,category,children`s room,corridor,dining room,en suite bathroom,...,living room,lounge,roof-deck,separate WC,storage room,terrace,veranda,washroom,wellness,winter garden
listing_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00005de7-6ed8-541b-be83-2573a46cd307,0,0,1,1,1,0,0,0,0,0,...,0,0,0,0,1,1,0,0,0,0
0002653c-7e59-59a2-89f2-f6574ba14a0a,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0003f153-450a-5837-8a46-bd5db5b536b1,1,0,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
00058d0d-579c-5b84-814b-8920fb5f1a27,0,0,1,1,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
000cd267-f826-50f4-aa55-cec9aaa079ae,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ffedb526-cde3-54d6-9db9-f865ed0a0f13,0,0,1,1,1,0,0,0,0,0,...,0,0,0,0,1,1,0,0,1,0
fff6b775-e07f-578e-b4f6-a792426ce3aa,0,0,0,1,1,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
fffed690-31dd-5409-bc1e-81121ec35345,0,0,0,1,1,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
ffffa8d1-d94a-5475-8e0d-2057495f4734,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [677]:
p.info()

<class 'pandas.core.frame.DataFrame'>
Index: 30269 entries, 00005de7-6ed8-541b-be83-2573a46cd307 to ffffafae-e827-5f6a-9691-97b4e788d498
Data columns (total 29 columns):
 #   Column                Non-Null Count  Dtype
---  ------                --------------  -----
 0   1 room apartment      30269 non-null  int64
 1   Library               30269 non-null  int64
 2   balcony               30269 non-null  int64
 3   bathroom              30269 non-null  int64
 4   bedroom               30269 non-null  int64
 5   category              30269 non-null  int64
 6   children`s room       30269 non-null  int64
 7   corridor              30269 non-null  int64
 8   dining room           30269 non-null  int64
 9   en suite bathroom     30269 non-null  int64
 10  gallery               30269 non-null  int64
 11  garden                30269 non-null  int64
 12  general               30269 non-null  int64
 13  heating room          30269 non-null  int64
 14  kitchen               30269 non-null  int

In [678]:
del p['garden']

In [679]:
del p['general']

In [680]:
del p['bathroom']

In [681]:
del p['bedroom']

In [682]:
del p['balcony']

In [683]:
del p['category']

In [684]:
del p['terrace']

In [685]:
p.info()

<class 'pandas.core.frame.DataFrame'>
Index: 30269 entries, 00005de7-6ed8-541b-be83-2573a46cd307 to ffffafae-e827-5f6a-9691-97b4e788d498
Data columns (total 22 columns):
 #   Column                Non-Null Count  Dtype
---  ------                --------------  -----
 0   1 room apartment      30269 non-null  int64
 1   Library               30269 non-null  int64
 2   children`s room       30269 non-null  int64
 3   corridor              30269 non-null  int64
 4   dining room           30269 non-null  int64
 5   en suite bathroom     30269 non-null  int64
 6   gallery               30269 non-null  int64
 7   heating room          30269 non-null  int64
 8   kitchen               30269 non-null  int64
 9   kitchen living        30269 non-null  int64
 10  kitchenette           30269 non-null  int64
 11  living / bedroom      30269 non-null  int64
 12  living / dining room  30269 non-null  int64
 13  living room           30269 non-null  int64
 14  lounge                30269 non-null  int

In [686]:
# drop additonal unnecessary columns

p.loc[p['kitchenette'] == 1, 'kitchen'] = 1

In [687]:
p.drop(['kitchenette'],axis=1,inplace=True)

In [688]:
p.head()

description,1 room apartment,Library,children`s room,corridor,dining room,en suite bathroom,gallery,heating room,kitchen,kitchen living,...,living / dining room,living room,lounge,roof-deck,separate WC,storage room,veranda,washroom,wellness,winter garden
listing_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00005de7-6ed8-541b-be83-2573a46cd307,0,0,0,0,0,0,0,0,1,0,...,1,0,0,0,0,1,0,0,0,0
0002653c-7e59-59a2-89f2-f6574ba14a0a,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0003f153-450a-5837-8a46-bd5db5b536b1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
00058d0d-579c-5b84-814b-8920fb5f1a27,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
000cd267-f826-50f4-aa55-cec9aaa079ae,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [729]:
list(p.columns.values)

['1 room apartment',
 'Library',
 'children`s room',
 'corridor',
 'dining room',
 'en suite bathroom',
 'gallery',
 'heating room',
 'kitchen',
 'kitchen living',
 'living / bedroom',
 'living / dining room',
 'living room',
 'lounge',
 'roof-deck',
 'separate WC',
 'storage room',
 'veranda',
 'washroom',
 'wellness',
 'winter garden']

In [689]:
# save to temp file

p.to_csv('../data/tempo_p.csv')

In [730]:
# read in dataset again

df_room_features = pd.read_csv('../data/rooms_features_20210713.csv', low_memory=False)


In [731]:
del df_room_features['area']

In [732]:
del df_room_features['description']

In [733]:
# add column with quantities

df_room_features['quantity'] = 1

In [734]:
df_room_features

Unnamed: 0,listing_id,description.1,quantity
0,fbd74bce-23dd-53e7-b988-8beb85160d6f,garden furniture,1
1,fbd74bce-23dd-53e7-b988-8beb85160d6f,mirror,1
2,fbd74bce-23dd-53e7-b988-8beb85160d6f,shower,1
3,fbd74bce-23dd-53e7-b988-8beb85160d6f,toilet,1
4,fbd74bce-23dd-53e7-b988-8beb85160d6f,wash basin,1
...,...,...,...
1417755,ab6fd3ee-a49a-5dc5-bc67-784515bccde7,toilet,1
1417756,ab6fd3ee-a49a-5dc5-bc67-784515bccde7,wash basin,1
1417757,ab6fd3ee-a49a-5dc5-bc67-784515bccde7,windbreak,1
1417758,ab6fd3ee-a49a-5dc5-bc67-784515bccde7,drying rack,1


In [735]:
p1 = pd.pivot_table(df_room_features, index='listing_id', columns='description.1', values='quantity', fill_value=0)

In [736]:
p1.info()

<class 'pandas.core.frame.DataFrame'>
Index: 30227 entries, 00005de7-6ed8-541b-be83-2573a46cd307 to ffffafae-e827-5f6a-9691-97b4e788d498
Columns: 156 entries, Blu-ray player to window
dtypes: int64(156)
memory usage: 36.2+ MB


In [697]:
list(p1.columns.values)

['Blu-ray player',
 'CD player',
 'CDs/ DVDs',
 'DVD-player',
 'Internet (DSL)',
 'Internet (ISDN)',
 'Internet connection',
 'VCR',
 'WiFi access',
 'additional bed',
 'air conditioning ',
 'alarm system',
 'armchair',
 'awning ',
 'baby cot',
 'baby monitor',
 'balcony',
 'barbecue area',
 'bath towels',
 'bathrobe',
 'bathtub',
 'beach chair',
 'bed linen',
 'bicycles',
 'bidet',
 'boat',
 'boat dock',
 'books',
 'bottle warmer',
 'bread service',
 'bunk bed',
 'cable/sat TV',
 'carport',
 'central heating',
 'ceramic hob',
 'changing mat',
 'charcoal grill',
 'chest of drawers',
 "child's bed",
 'children toilet seat',
 'cleaning supplies',
 'coffee machine',
 'colouring book / pencils',
 'computer',
 'cooker (2)',
 'cooker (3)',
 'cooker (4)',
 'crockery',
 'darts',
 'desk',
 'digital TV',
 'dining table',
 'dishwasher',
 'double bed',
 'double wash basin',
 'dryer',
 'drying rack',
 'egg cooker',
 'electric grill',
 'electric kitchen stove',
 'exercise room',
 'fan',
 'fax',
 'fi

In [698]:
# delete features already in the data from other datasets

p1.drop(['cable/sat TV','digital TV', 'standard TV', 'washing machine', 'Internet (DSL)','Internet (ISDN)','Internet connection','WiFi access', 'sauna','grill','gas grill','charcoal grill', 'balcony', 'barbecue area','window','dryer','terrace','dishwasher'],axis=1, inplace=True)

In [699]:
list(p1.columns.values)

['Blu-ray player',
 'CD player',
 'CDs/ DVDs',
 'DVD-player',
 'VCR',
 'additional bed',
 'air conditioning ',
 'alarm system',
 'armchair',
 'awning ',
 'baby cot',
 'baby monitor',
 'bath towels',
 'bathrobe',
 'bathtub',
 'beach chair',
 'bed linen',
 'bicycles',
 'bidet',
 'boat',
 'boat dock',
 'books',
 'bottle warmer',
 'bread service',
 'bunk bed',
 'carport',
 'central heating',
 'ceramic hob',
 'changing mat',
 'chest of drawers',
 "child's bed",
 'children toilet seat',
 'cleaning supplies',
 'coffee machine',
 'colouring book / pencils',
 'computer',
 'cooker (2)',
 'cooker (3)',
 'cooker (4)',
 'crockery',
 'darts',
 'desk',
 'dining table',
 'double bed',
 'double wash basin',
 'drying rack',
 'egg cooker',
 'electric grill',
 'electric kitchen stove',
 'exercise room',
 'fan',
 'fax',
 'fire alarm',
 'fireplace',
 'first-aid kit',
 'flat iron',
 'fly screen',
 'folding bed',
 'food processor',
 'freezer',
 'freezer compartment',
 'game console',
 'games',
 'garage',
 'ga

In [700]:
p1

description.1,Blu-ray player,CD player,CDs/ DVDs,DVD-player,VCR,additional bed,air conditioning,alarm system,armchair,awning,...,toys,trampoline,underfloor heating,vacuum cleaner,walk-in shower,wall bed,wardrobe,wash basin,water bed,windbreak
listing_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00005de7-6ed8-541b-be83-2573a46cd307,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,1,1,0,0
0002653c-7e59-59a2-89f2-f6574ba14a0a,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,0
0003f153-450a-5837-8a46-bd5db5b536b1,0,1,0,1,0,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0
00058d0d-579c-5b84-814b-8920fb5f1a27,1,1,0,1,0,0,0,0,1,0,...,0,0,0,1,0,0,1,1,0,0
000cd267-f826-50f4-aa55-cec9aaa079ae,0,1,0,1,0,0,0,0,0,0,...,0,0,1,1,1,0,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ffedb526-cde3-54d6-9db9-f865ed0a0f13,1,1,0,1,0,0,0,0,1,0,...,1,0,0,1,0,0,1,1,0,1
fff6b775-e07f-578e-b4f6-a792426ce3aa,0,1,0,0,0,0,0,0,1,0,...,1,0,0,1,0,0,1,1,0,0
fffed690-31dd-5409-bc1e-81121ec35345,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
ffffa8d1-d94a-5475-8e0d-2057495f4734,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,1,1,0,0


In [701]:
# consolidate features for the purposes of modelling
p1.loc[p1['freezer compartment'] == 1, 'freezer'] = 1


In [702]:
p1['cooking'] = p1['stove']

In [703]:
p1.loc[p1['induction hob'] == 1, 'cooking'] = 1

In [704]:

p1.loc[p1['gas stove'] == 1, 'cooking'] = 1

In [705]:
p1.loc[p1['electric kitchen stove'] == 1, 'cooking'] = 1

In [706]:
p1.loc[p1['ceramic hob'] == 1, 'cooking'] = 1

In [707]:
p1.loc[p1['oven'] == 1, 'cooking'] = 1

In [708]:
p1.loc[p1['cooker (2)'] == 1, 'cooking'] = 1

In [709]:
p1.loc[p1['cooker (3)'] == 1, 'cooking'] = 1

In [710]:
p1.loc[p1['cooker (4)'] == 1, 'cooking'] = 1

In [711]:
p1.loc[p1['laptop'] == 1, 'computer'] = 1

In [712]:
p1.loc[p1['CD player'] == 1, 'stereo system'] = 1

In [713]:
list(p1.columns.values)

['Blu-ray player',
 'CD player',
 'CDs/ DVDs',
 'DVD-player',
 'VCR',
 'additional bed',
 'air conditioning ',
 'alarm system',
 'armchair',
 'awning ',
 'baby cot',
 'baby monitor',
 'bath towels',
 'bathrobe',
 'bathtub',
 'beach chair',
 'bed linen',
 'bicycles',
 'bidet',
 'boat',
 'boat dock',
 'books',
 'bottle warmer',
 'bread service',
 'bunk bed',
 'carport',
 'central heating',
 'ceramic hob',
 'changing mat',
 'chest of drawers',
 "child's bed",
 'children toilet seat',
 'cleaning supplies',
 'coffee machine',
 'colouring book / pencils',
 'computer',
 'cooker (2)',
 'cooker (3)',
 'cooker (4)',
 'crockery',
 'darts',
 'desk',
 'dining table',
 'double bed',
 'double wash basin',
 'drying rack',
 'egg cooker',
 'electric grill',
 'electric kitchen stove',
 'exercise room',
 'fan',
 'fax',
 'fire alarm',
 'fireplace',
 'first-aid kit',
 'flat iron',
 'fly screen',
 'folding bed',
 'food processor',
 'freezer',
 'freezer compartment',
 'game console',
 'games',
 'garage',
 'ga

In [737]:
# drop consolidatred categories 

p1.drop(['kitchenette','freezer compartment','gas stove','induction hob', 'stove', 'ceramic hob','oven', 'electric kitchen stove','cooker (2)','cooker (3)','cooker (4)','laptop','CD player'],axis=1,inplace=True)

In [738]:
list(p1.columns.values)

['Blu-ray player',
 'CDs/ DVDs',
 'DVD-player',
 'Internet (DSL)',
 'Internet (ISDN)',
 'Internet connection',
 'VCR',
 'WiFi access',
 'additional bed',
 'air conditioning ',
 'alarm system',
 'armchair',
 'awning ',
 'baby cot',
 'baby monitor',
 'balcony',
 'barbecue area',
 'bath towels',
 'bathrobe',
 'bathtub',
 'beach chair',
 'bed linen',
 'bicycles',
 'bidet',
 'boat',
 'boat dock',
 'books',
 'bottle warmer',
 'bread service',
 'bunk bed',
 'cable/sat TV',
 'carport',
 'central heating',
 'changing mat',
 'charcoal grill',
 'chest of drawers',
 "child's bed",
 'children toilet seat',
 'cleaning supplies',
 'coffee machine',
 'colouring book / pencils',
 'computer',
 'crockery',
 'darts',
 'desk',
 'digital TV',
 'dining table',
 'dishwasher',
 'double bed',
 'double wash basin',
 'dryer',
 'drying rack',
 'egg cooker',
 'electric grill',
 'exercise room',
 'fan',
 'fax',
 'fire alarm',
 'fireplace',
 'first-aid kit',
 'flat iron',
 'fly screen',
 'folding bed',
 'food process

In [739]:
# drop additonal unnecessary columns

p1.drop(['air conditioning ','baby cot'],axis=1,inplace=True)

In [717]:
list(p1.columns.values)

['Blu-ray player',
 'CDs/ DVDs',
 'DVD-player',
 'VCR',
 'additional bed',
 'alarm system',
 'armchair',
 'awning ',
 'baby monitor',
 'bath towels',
 'bathrobe',
 'bathtub',
 'beach chair',
 'bed linen',
 'bicycles',
 'bidet',
 'boat',
 'boat dock',
 'books',
 'bottle warmer',
 'bread service',
 'bunk bed',
 'carport',
 'central heating',
 'changing mat',
 'chest of drawers',
 "child's bed",
 'children toilet seat',
 'cleaning supplies',
 'coffee machine',
 'colouring book / pencils',
 'computer',
 'crockery',
 'darts',
 'desk',
 'dining table',
 'double bed',
 'double wash basin',
 'drying rack',
 'egg cooker',
 'electric grill',
 'exercise room',
 'fan',
 'fax',
 'fire alarm',
 'fireplace',
 'first-aid kit',
 'flat iron',
 'fly screen',
 'folding bed',
 'food processor',
 'freezer',
 'game console',
 'games',
 'garage',
 'garden furniture',
 'garden shed',
 'guest toilet',
 'hair dryer',
 'high chair',
 'hot tub',
 'hot water',
 'infrared cabin',
 'ironing board',
 'kettle',
 'kings

In [718]:
p1.info()

<class 'pandas.core.frame.DataFrame'>
Index: 30227 entries, 00005de7-6ed8-541b-be83-2573a46cd307 to ffffafae-e827-5f6a-9691-97b4e788d498
Columns: 125 entries, Blu-ray player to cooking
dtypes: int64(125)
memory usage: 29.1+ MB


In [740]:
# save intermediate set

p1.to_csv('../data/pivot2_room_features.csv')

# now merge the two pivot tables into one big reshaped dataset

In [741]:
df_flat_room_features = pd.merge(p, p1, left_on='listing_id', right_on='listing_id', how='right')

In [721]:
df_flat_room_features.info()

<class 'pandas.core.frame.DataFrame'>
Index: 30227 entries, 00005de7-6ed8-541b-be83-2573a46cd307 to ffffafae-e827-5f6a-9691-97b4e788d498
Columns: 146 entries, 1 room apartment to cooking
dtypes: int64(146)
memory usage: 33.9+ MB


In [742]:
list(df_flat_room_features.columns.values)

['1 room apartment',
 'Library',
 'children`s room',
 'corridor',
 'dining room',
 'en suite bathroom',
 'gallery',
 'heating room',
 'kitchen',
 'kitchen living',
 'living / bedroom',
 'living / dining room',
 'living room',
 'lounge',
 'roof-deck',
 'separate WC',
 'storage room',
 'veranda',
 'washroom',
 'wellness',
 'winter garden',
 'Blu-ray player',
 'CDs/ DVDs',
 'DVD-player',
 'Internet (DSL)',
 'Internet (ISDN)',
 'Internet connection',
 'VCR',
 'WiFi access',
 'additional bed',
 'alarm system',
 'armchair',
 'awning ',
 'baby monitor',
 'balcony',
 'barbecue area',
 'bath towels',
 'bathrobe',
 'bathtub',
 'beach chair',
 'bed linen',
 'bicycles',
 'bidet',
 'boat',
 'boat dock',
 'books',
 'bottle warmer',
 'bread service',
 'bunk bed',
 'cable/sat TV',
 'carport',
 'central heating',
 'changing mat',
 'charcoal grill',
 'chest of drawers',
 "child's bed",
 'children toilet seat',
 'cleaning supplies',
 'coffee machine',
 'colouring book / pencils',
 'computer',
 'crockery'

In [743]:
# removal of columns closer inspection has revealed to still be in the dataset, but that should not be

df_flat_room_features.drop(['electric grill'],axis=1,inplace=True)

In [744]:
# change spaces in column names and different cases etc to underscores and lower case

df_flat_room_features.rename(columns={'Library': 'library', 'colouring book / pencils':'colouring book pencils', 'living / bedroom': 'living bedroom','living / dining': 'living bedroom', 'CDs/ DVDs':'CDs DVDs'},inplace=True)

In [745]:
df_flat_room_features.columns = df_flat_room_features.columns.str.replace(' ','_')

In [746]:
df_flat_room_features

Unnamed: 0_level_0,1_room_apartment,library,children`s_room,corridor,dining_room,en_suite_bathroom,gallery,heating_room,kitchen,kitchen_living,...,underfloor_heating,vacuum_cleaner,walk-in_shower,wall_bed,wardrobe,wash_basin,washing_machine,water_bed,windbreak,window
listing_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00005de7-6ed8-541b-be83-2573a46cd307,0,0,0,0,0,0,0,0,1,0,...,0,1,0,0,1,1,1,0,0,0
0002653c-7e59-59a2-89f2-f6574ba14a0a,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,0,0,0,0,0
0003f153-450a-5837-8a46-bd5db5b536b1,1,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0
00058d0d-579c-5b84-814b-8920fb5f1a27,0,0,0,0,0,0,0,0,1,0,...,0,1,0,0,1,1,1,0,0,0
000cd267-f826-50f4-aa55-cec9aaa079ae,0,0,0,0,0,0,0,0,0,1,...,1,1,1,0,1,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ffedb526-cde3-54d6-9db9-f865ed0a0f13,0,0,0,0,0,0,0,0,1,0,...,0,1,0,0,1,1,0,0,1,0
fff6b775-e07f-578e-b4f6-a792426ce3aa,0,0,0,1,0,0,0,0,1,0,...,0,1,0,0,1,1,1,0,0,0
fffed690-31dd-5409-bc1e-81121ec35345,0,0,0,0,0,0,0,0,1,0,...,0,1,0,0,0,0,1,0,0,0
ffffa8d1-d94a-5475-8e0d-2057495f4734,0,0,0,0,0,0,0,0,0,1,...,0,1,0,0,1,1,0,0,0,0


In [727]:
list(df_flat_room_features.columns.values)

['1_room_apartment',
 'library',
 'children`s_room',
 'corridor',
 'dining_room',
 'en_suite_bathroom',
 'gallery',
 'heating_room',
 'kitchen',
 'kitchen_living',
 'living_bedroom',
 'living_/_dining_room',
 'living_room',
 'lounge',
 'roof-deck',
 'separate_WC',
 'storage_room',
 'veranda',
 'washroom',
 'wellness',
 'winter_garden',
 'Blu-ray_player',
 'CDs_DVDs',
 'DVD-player',
 'VCR',
 'additional_bed',
 'alarm_system',
 'armchair',
 'awning_',
 'baby_monitor',
 'bath_towels',
 'bathrobe',
 'bathtub',
 'beach_chair',
 'bed_linen',
 'bicycles',
 'bidet',
 'boat',
 'boat_dock',
 'books',
 'bottle_warmer',
 'bread_service',
 'bunk_bed',
 'carport',
 'central_heating',
 'changing_mat',
 'chest_of_drawers',
 "child's_bed",
 'children_toilet_seat',
 'cleaning_supplies',
 'coffee_machine',
 'colouring_book_pencils',
 'computer',
 'crockery',
 'darts',
 'desk',
 'dining_table',
 'double_bed',
 'double_wash_basin',
 'drying_rack',
 'egg_cooker',
 'exercise_room',
 'fan',
 'fax',
 'fire_ala

In [747]:
# save the final dataset to csv

df_flat_room_features.to_csv('../data/room_features20210719.csv')