In [250]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [251]:
df_data = pd.read_csv('data/Housing.csv')
df_data.drop(columns=['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus'], inplace=True)
df_data 

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking
0,13300000,7420,4,2,3,2
1,12250000,8960,4,4,4,3
2,12250000,9960,3,2,2,2
3,12215000,7500,4,2,2,3
4,11410000,7420,4,1,2,2
...,...,...,...,...,...,...
540,1820000,3000,2,1,1,2
541,1767150,2400,3,1,1,0
542,1750000,3620,2,1,1,0
543,1750000,2910,3,1,1,0


In [252]:
x_train, x_test, y_train, y_test = train_test_split(df_data.drop('price', axis=1), df_data['price'], test_size=0.2, random_state=42)
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((436, 5), (436,), (109, 5), (109,))

In [253]:
df_rule = pd.DataFrame()
df_rule['price'] = y_train.apply(lambda x: 'low' if x <= 5125000 else ('medium' if 5125000 < x <= 9835000 else 'high'))
df_rule['area'] = x_train['area'].apply(lambda x: 'low' if x <= 6015 else ('medium' if 6015 < x <= 10380 else 'high'))
df_rule['bedrooms'] = x_train['bedrooms'].apply(lambda x: 'low' if x <= 2 else ('medium' if 2 < x <= 4 else 'high'))
df_rule['bathrooms'] = x_train['bathrooms'].apply(lambda x: 'low' if x <= 1 else ('medium' if 2 < x <= 3 else 'high'))
df_rule['stories'] = x_train['stories'].apply(lambda x: 'low' if x <= 1 else ('medium' if 2 < x <= 3 else 'high'))
df_rule['parking'] = x_train['parking'].apply(lambda x: 'low' if x <= 1 else ('medium' if x == 2 else 'high'))



In [254]:
df_rule.drop_duplicates(inplace=True)
df_rule.reset_index(drop=True,inplace=True)

In [255]:
df_rule

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking
0,medium,low,medium,high,high,low
1,medium,medium,medium,high,low,high
2,low,low,low,low,low,medium
3,low,low,medium,low,high,low
4,low,low,medium,low,low,low
...,...,...,...,...,...,...
76,low,low,medium,low,medium,medium
77,low,high,medium,low,high,low
78,medium,low,medium,low,low,low
79,medium,low,medium,low,low,medium


In [256]:
dict_rule_to_index = {
    'low': 0,
    'medium': 1,
    'high': 2,
}

In [257]:
df_rule = df_rule.applymap(lambda x: dict_rule_to_index[x] if x in dict_rule_to_index else x)

  df_rule = df_rule.applymap(lambda x: dict_rule_to_index[x] if x in dict_rule_to_index else x)


In [258]:
df_rule

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking
0,1,0,1,2,2,0
1,1,1,1,2,0,2
2,0,0,0,0,0,1
3,0,0,1,0,2,0
4,0,0,1,0,0,0
...,...,...,...,...,...,...
76,0,0,1,0,1,1
77,0,2,1,0,2,0
78,1,0,1,0,0,0
79,1,0,1,0,0,1


In [259]:
def down_function(x, a, b):
    result = (b - x) / (b - a)
    result = round(result, 2)
    return max(result, 0)

def up_function(x, a, b):
    result = (x - a) / (b - a)
    result = round(result, 2)
    return max(result, 0)

def triangle_function(x, a, b, c):
    if x <= a or x >= c:
        return 0
    if a < x <= b:
        return up_function(x, a, b)
    if b < x < c:
        return down_function(x, b, c)

In [260]:
def area_membership(x):
    low = down_function(x, 0, 8100)
    medium = triangle_function(x, 0, 8100, 16200)
    high = up_function(x, 8100, 16200)
    return low, medium, high

def bedrooms_membership(x):
    low = down_function(x, 0, 3)
    medium = triangle_function(x, 0, 3, 6)
    high = up_function(x, 3, 6)
    return low, medium, high

def bathrooms_membership(x):
    low = down_function(x, 0, 2)
    medium = triangle_function(x, 0, 2, 4)
    high = up_function(x, 2, 4)
    return low, medium, high

def stories_membership(x):
    low = down_function(x, 0, 2)
    medium = triangle_function(x, 0, 2, 4)
    high = up_function(x, 2, 4)
    return low, medium, high

def parking_membership(x):
    low = down_function(x, 0, 2)
    medium = triangle_function(x, 0, 2, 3)
    high = up_function(x, 2, 3)
    return low, medium, high

In [261]:
def calculate_membership(df_data):
    membership_functions = {
        'area': area_membership,
        'bedrooms': bedrooms_membership,
        'bathrooms': bathrooms_membership,
        'stories': stories_membership,
        'parking': parking_membership,
    }
    df_result = pd.DataFrame()
    for column, membership_function in membership_functions.items():
        df_result[column] = df_data[column].apply(lambda x: membership_function(x))
    return df_result

In [262]:
df_membership = calculate_membership(x_test)

In [263]:
df_membership.reset_index(drop=True, inplace=True)
df_membership

Unnamed: 0,area,bedrooms,bathrooms,stories,parking
0,"(0.27, 0.73, 0)","(0, 0.67, 0.33)","(0.0, 1.0, 0.0)","(0.0, 1.0, 0.0)","(0.5, 0.5, 0)"
1,"(0.2, 0.8, 0)","(0.0, 1.0, 0.0)","(0.0, 1.0, 0.0)","(0, 0.5, 0.5)","(1.0, 0, 0)"
2,"(0.5, 0.5, 0)","(0.33, 0.67, 0)","(0.5, 0.5, 0)","(0.5, 0.5, 0)","(1.0, 0, 0)"
3,"(0.38, 0.62, 0)","(0.0, 1.0, 0.0)","(0.5, 0.5, 0)","(0.0, 1.0, 0.0)","(1.0, 0, 0)"
4,"(0.51, 0.49, 0)","(0.0, 1.0, 0.0)","(0.5, 0.5, 0)","(0.5, 0.5, 0)","(1.0, 0, 0)"
...,...,...,...,...,...
104,"(0.26, 0.74, 0)","(0, 0.67, 0.33)","(0.5, 0.5, 0)","(0.0, 1.0, 0.0)","(0.0, 1.0, 0.0)"
105,"(0.14, 0.86, 0)","(0, 0.67, 0.33)","(0.5, 0.5, 0)","(0.0, 1.0, 0.0)","(0.5, 0.5, 0)"
106,"(0.26, 0.74, 0)","(0, 0.67, 0.33)","(0.0, 1.0, 0.0)","(0, 0, 1.0)","(0.5, 0.5, 0)"
107,"(0.26, 0.74, 0)","(0.0, 1.0, 0.0)","(0.0, 1.0, 0.0)","(0.0, 1.0, 0.0)","(0.5, 0.5, 0)"


In [264]:
def aggregate(df_membership, df_rule):
    alpha_predicate = np.zeros((df_membership.shape[0], df_rule.shape[0]))
    for i, row_member in df_membership.iterrows():
        for j, row_rule in df_rule.iterrows():
            temp = []
            for cell_member, cell_rule in zip(row_member, row_rule):
                temp.append(cell_member[cell_rule])
            alpha_predicate[i, j] = min(temp)
    return alpha_predicate

In [265]:
df_aggregated = aggregate(df_membership, df_rule)
df_aggregated

array([[0.  , 0.  , 0.  , ..., 0.  , 0.  , 0.  ],
       [0.  , 0.5 , 0.  , ..., 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.33, ..., 0.33, 0.33, 0.  ],
       ...,
       [0.  , 0.5 , 0.  , ..., 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , ..., 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , ..., 0.  , 0.  , 0.  ]])

In [266]:
def inv_up_function(alpha, a, b):
    return alpha*(b-a) + a

def inv_down_function(alpha, a, b):
    return b - alpha*(b-a)

In [267]:
def calculate_price(df_aggregated, prices):
    list_price = []
    for i in range(df_aggregated.shape[0]):
        dict_price = {
            'a_pred': [],
            'z': []
         }
        for j, price in enumerate(prices):
            if price == 0:
                dict_price['a_pred'].append(df_aggregated[i, j])
                dict_price['z'].append(inv_down_function(df_aggregated[i, j], 1750000, 6650000))
            elif price == 1:
                dict_price['a_pred'].append(df_aggregated[i, j])
                dict_price['z'].append(inv_up_function(df_aggregated[i, j], 1750000, 6650000))
                dict_price['a_pred'].append(df_aggregated[i, j])
                dict_price['z'].append(inv_down_function(df_aggregated[i, j], 6650000, 13300000))
            elif price == 2:
                dict_price['a_pred'].append(df_aggregated[i, j])
                dict_price['z'].append(inv_up_function(df_aggregated[i, j], 6650000, 13300000))
        list_price.append(dict_price)
    return list_price

In [268]:
levels = calculate_price(df_aggregated, df_rule['price']) 
levels[0]['a_pred'][:10], levels[0]['z'][:10]

([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [1750000.0,
  13300000.0,
  1750000.0,
  13300000.0,
  6650000.0,
  6650000.0,
  6650000.0,
  6650000.0,
  6650000.0,
  6650000.0])

In [269]:
def defuzzification(levels):
    result = []
    for level in levels:
        total_weighted_z = 0
        total_weight = 0
        for a, z in zip(level['a_pred'], level['z']):
            total_weighted_z += a * z
            total_weight += a
        centroid = total_weighted_z / total_weight if total_weight else 0
        result.append(centroid)
    return result

In [270]:
predicted_level = defuzzification(levels)
predicted_level

[0,
 6920735.294117646,
 5987993.664202745,
 0,
 5968426.791277258,
 6864835.051546392,
 7087500.0,
 0,
 6116825.280414148,
 5759474.708171206,
 7087500.0,
 5982279.0202342905,
 5963617.486338798,
 0,
 0,
 0,
 0,
 0,
 0,
 6550435.606060606,
 6622707.446808511,
 6883916.666666667,
 5956868.798235942,
 0,
 7085483.443708609,
 0,
 5901468.860164513,
 0,
 0,
 0,
 0,
 0,
 6834664.772727273,
 0,
 0,
 0,
 0,
 0,
 5907693.602693603,
 0,
 7087500.0,
 5956868.798235942,
 6907403.50877193,
 0,
 6807799.145299146,
 6605370.320855616,
 6720084.677419355,
 0,
 7114750.922509225,
 6698412.499999999,
 6720000.0,
 6093281.834372218,
 7086370.567375886,
 0,
 0,
 5811301.115241636,
 6852144.104803493,
 0,
 0,
 5970097.508125678,
 6702901.041666666,
 0,
 6669992.088607593,
 6216070.6278026905,
 0,
 0,
 6887555.83756345,
 0,
 7143895.833333333,
 7087500.0,
 0,
 6907403.50877193,
 7123368.453865337,
 7196162.790697674,
 6056702.790697673,
 0,
 0,
 6177422.394678492,
 6782712.328767124,
 6432391.304347826,
 

In [271]:
error = []
for y, y_hat in zip(y_test, predicted_level):
    if y_hat != 0:
        error.append(abs(y - y_hat))

print(f'Mean relative error: {np.mean(error)}')

Mean relative error: 2148673.7882806933
