In [1]:
import pandas as pd 
from sklearn.preprocessing import MinMaxScaler

In [2]:
raw = pd.read_csv('apartments.csv')
raw

Unnamed: 0,beds,baths,area,hoa,parking,year,floor,exposure,elevator,price
0,1,1,700,230.0,0,1895,2,W,No,199000.0
1,1,1,750,546.0,1,1951,4,W,Yes,185000.0
2,1,1,985,424.0,1,1892,3,W,Yes,210000.0
3,2,2,1200,973.0,1,1965,8,E,Yes,209900.0
4,3,2,1600,150.0,0,2020,3,E,No,279000.0


In [3]:
print(raw.to_markdown().replace(' ', ''))

||beds|baths|area|hoa|parking|year|floor|exposure|elevator|price|
|---:|-------:|--------:|-------:|------:|----------:|-------:|--------:|:-----------|:-----------|--------:|
|0|1|1|700|230|0|1895|2|W|No|199000|
|1|1|1|750|546|1|1951|4|W|Yes|185000|
|2|1|1|985|424|1|1892|3|W|Yes|210000|
|3|2|2|1200|973|1|1965|8|E|Yes|209900|
|4|3|2|1600|150|0|2020|3|E|No|279000|


In [4]:
df = raw.copy()

In [5]:
df['exposure'] = df.apply(lambda r: 1 if r['exposure'] == 'E' else 0, axis=1)

In [6]:
df['elevator'] = df.apply(lambda r: 1 if r['elevator'] == 'Yes' else 0, axis=1)

In [7]:
df

Unnamed: 0,beds,baths,area,hoa,parking,year,floor,exposure,elevator,price
0,1,1,700,230.0,0,1895,2,0,0,199000.0
1,1,1,750,546.0,1,1951,4,0,1,185000.0
2,1,1,985,424.0,1,1892,3,0,1,210000.0
3,2,2,1200,973.0,1,1965,8,1,1,209900.0
4,3,2,1600,150.0,0,2020,3,1,0,279000.0


In [8]:
print(df.to_markdown().replace(' ', ''))

||beds|baths|area|hoa|parking|year|floor|exposure|elevator|price|
|---:|-------:|--------:|-------:|------:|----------:|-------:|--------:|-----------:|-----------:|--------:|
|0|1|1|700|230|0|1895|2|0|0|199000|
|1|1|1|750|546|1|1951|4|0|1|185000|
|2|1|1|985|424|1|1892|3|0|1|210000|
|3|2|2|1200|973|1|1965|8|1|1|209900|
|4|3|2|1600|150|0|2020|3|1|0|279000|


In [9]:
df.describe()

Unnamed: 0,beds,baths,area,hoa,parking,year,floor,exposure,elevator,price
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,1.6,1.4,1047.0,464.6,0.6,1944.6,4.0,0.4,0.6,216580.0
std,0.894427,0.547723,367.994565,324.328229,0.547723,53.313225,2.345208,0.547723,0.547723,36368.007919
min,1.0,1.0,700.0,150.0,0.0,1892.0,2.0,0.0,0.0,185000.0
25%,1.0,1.0,750.0,230.0,0.0,1895.0,3.0,0.0,0.0,199000.0
50%,1.0,1.0,985.0,424.0,1.0,1951.0,3.0,0.0,1.0,209900.0
75%,2.0,2.0,1200.0,546.0,1.0,1965.0,4.0,1.0,1.0,210000.0
max,3.0,2.0,1600.0,973.0,1.0,2020.0,8.0,1.0,1.0,279000.0


In [10]:
mms = MinMaxScaler(feature_range=(10, 100))

In [11]:
df = pd.DataFrame(mms.fit_transform(df), columns=df.columns)
df

Unnamed: 0,beds,baths,area,hoa,parking,year,floor,exposure,elevator,price
0,10.0,10.0,10.0,18.748481,10.0,12.109375,10.0,10.0,10.0,23.404255
1,10.0,10.0,15.0,53.304982,100.0,51.484375,40.0,10.0,100.0,10.0
2,10.0,10.0,38.5,39.963548,100.0,10.0,25.0,10.0,100.0,33.93617
3,55.0,100.0,60.0,100.0,100.0,61.328125,100.0,100.0,100.0,33.840426
4,100.0,100.0,100.0,10.0,10.0,100.0,25.0,100.0,10.0,100.0


In [12]:
print(df.to_markdown().replace(' ', ''))

||beds|baths|area|hoa|parking|year|floor|exposure|elevator|price|
|---:|-------:|--------:|-------:|---------:|----------:|---------:|--------:|-----------:|-----------:|---------:|
|0|10|10|10|18.7485|10|12.1094|10|10|10|23.4043|
|1|10|10|15|53.305|100|51.4844|40|10|100|10|
|2|10|10|38.5|39.9635|100|10|25|10|100|33.9362|
|3|55|100|60|100|100|61.3281|100|100|100|33.8404|
|4|100|100|100|10|10|100|25|100|10|100|


In [13]:
weights = {'beds'          :  1.,
           'baths'         :  1.,
           'area'          :  1.,
           'hoa'           : -1.,
           'parking'       :  1.,
           'year'          :  1.,
           'floor'         : -1.,
           'price'         : -1.,
           'exposure'      :  1.,
           'elevator'      :  1.}

In [14]:
(10        **  1) * \
(10        **  1) * \
(10        **  1) * \
(18.748481 ** -1) * \
(10        **  1) * \
(12.109375 **  1) * \
(10        ** -1) * \
(10        **  1) * \
(10        **  1) * \
(23.404255 ** -1)

2759.6933065525714

In [15]:
def wpm(option, weights):
    value = 1
    for column in option.keys():
        try:
            value *= option[column] ** weights[column]
        except KeyError:
            pass
    return value

In [16]:
wpm(df.loc[0], weights)

2759.6932444176073

In [17]:
df['wpm'] = df.apply(lambda r: wpm(r, weights), axis=1)
df.sort_values(by='wpm', ascending=False)

Unnamed: 0,beds,baths,area,hoa,parking,year,floor,exposure,elevator,price,wpm
3,55.0,100.0,60.0,100.0,100.0,61.328125,100.0,100.0,100.0,33.840426,59805040.0
4,100.0,100.0,100.0,10.0,10.0,100.0,25.0,100.0,10.0,100.0,40000000.0
1,10.0,10.0,15.0,53.304982,100.0,51.484375,40.0,10.0,100.0,10.0,362192.1
2,10.0,10.0,38.5,39.963548,100.0,10.0,25.0,10.0,100.0,33.93617,113551.8
0,10.0,10.0,10.0,18.748481,10.0,12.109375,10.0,10.0,10.0,23.404255,2759.693


In [18]:
dff = pd.merge(raw, df['wpm'], left_index=True, right_index=True) \
        .sort_values(by='wpm', ascending=False)
dff

Unnamed: 0,beds,baths,area,hoa,parking,year,floor,exposure,elevator,price,wpm
3,2,2,1200,973.0,1,1965,8,E,Yes,209900.0,59805040.0
4,3,2,1600,150.0,0,2020,3,E,No,279000.0,40000000.0
1,1,1,750,546.0,1,1951,4,W,Yes,185000.0,362192.1
2,1,1,985,424.0,1,1892,3,W,Yes,210000.0,113551.8
0,1,1,700,230.0,0,1895,2,W,No,199000.0,2759.693


In [19]:
print(dff.to_markdown().replace(' ', ''))

||beds|baths|area|hoa|parking|year|floor|exposure|elevator|price|wpm|
|---:|-------:|--------:|-------:|------:|----------:|-------:|--------:|:-----------|:-----------|--------:|----------------:|
|3|2|2|1200|973|1|1965|8|E|Yes|209900|5.9805e+07|
|4|3|2|1600|150|0|2020|3|E|No|279000|4e+07|
|1|1|1|750|546|1|1951|4|W|Yes|185000|362192|
|2|1|1|985|424|1|1892|3|W|Yes|210000|113552|
|0|1|1|700|230|0|1895|2|W|No|199000|2759.69|


In [20]:
weights = {'beds'          :  1.,
           'baths'         :  1.,
           'area'          :  3.,
           'hoa'           : -5.,
           'parking'       :  5.,
           'year'          :  1.,
           'floor'         : -5.,
           'price'         : -3.,
           'exposure'      :  1.,
           'elevator'      :  5.}

In [21]:
df['wpm'] = df.apply(lambda r: wpm(r, weights), axis=1)
df.sort_values(by='wpm', ascending=False)

Unnamed: 0,beds,baths,area,hoa,parking,year,floor,exposure,elevator,price,wpm
2,10.0,10.0,38.5,39.963548,100.0,10.0,25.0,10.0,100.0,33.93617,1466809000.0
1,10.0,10.0,15.0,53.304982,100.0,51.484375,40.0,10.0,100.0,10.0,394285000.0
3,55.0,100.0,60.0,100.0,100.0,61.328125,100.0,100.0,100.0,33.840426,188004700.0
4,100.0,100.0,100.0,10.0,10.0,100.0,25.0,100.0,10.0,100.0,1024000.0
0,10.0,10.0,10.0,18.748481,10.0,12.109375,10.0,10.0,10.0,23.404255,40.7762


In [22]:
dff = pd.merge(raw, df['wpm'], left_index=True, right_index=True) \
        .sort_values(by='wpm', ascending=False)
dff

Unnamed: 0,beds,baths,area,hoa,parking,year,floor,exposure,elevator,price,wpm
2,1,1,985,424.0,1,1892,3,W,Yes,210000.0,1466809000.0
1,1,1,750,546.0,1,1951,4,W,Yes,185000.0,394285000.0
3,2,2,1200,973.0,1,1965,8,E,Yes,209900.0,188004700.0
4,3,2,1600,150.0,0,2020,3,E,No,279000.0,1024000.0
0,1,1,700,230.0,0,1895,2,W,No,199000.0,40.7762


In [23]:
print(dff.to_markdown().replace(' ', ''))

||beds|baths|area|hoa|parking|year|floor|exposure|elevator|price|wpm|
|---:|-------:|--------:|-------:|------:|----------:|-------:|--------:|:-----------|:-----------|--------:|-------------:|
|2|1|1|985|424|1|1892|3|W|Yes|210000|1.46681e+09|
|1|1|1|750|546|1|1951|4|W|Yes|185000|3.94285e+08|
|3|2|2|1200|973|1|1965|8|E|Yes|209900|1.88005e+08|
|4|3|2|1600|150|0|2020|3|E|No|279000|1.024e+06|
|0|1|1|700|230|0|1895|2|W|No|199000|40.7762|
