In [911]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from IPython.display import display

In [912]:
df_data = pd.read_csv('data/Housing.csv')
df_data.drop(columns=['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus'], inplace=True)
df_data.sort_values(by=['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'parking'], inplace=True, ascending=True)
df_data.reset_index(drop=True, inplace=True)
df_data

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking
0,1750000,2910,3,1,1,0
1,1750000,3620,2,1,1,0
2,1750000,3850,3,1,2,0
3,1767150,2400,3,1,1,0
4,1820000,3000,2,1,1,2
...,...,...,...,...,...,...
540,11410000,7420,4,1,2,2
541,12215000,7500,4,2,2,3
542,12250000,8960,4,4,4,3
543,12250000,9960,3,2,2,2


In [913]:
df_rule = pd.DataFrame()
df_rule['price'] = df_data['price'].apply(lambda x: 'low_price' if x <= 5125000 else ('mid_price' if 5125000 < x <= 9835000 else 'high_price'))
df_rule['area'] = df_data['area'].apply(lambda x: 'low' if x <= df_data['area'].max()//2 else 'high')
df_rule['bedrooms'] = df_data['bedrooms'].apply(lambda x: 'low' if x <= df_data['bedrooms'].max()//2 else 'high')
df_rule['bathrooms'] = df_data['bathrooms'].apply(lambda x: 'low' if x <= df_data['bathrooms'].max()//2 else 'high')
df_rule['stories'] = df_data['stories'].apply(lambda x: 'low' if x <= df_data['stories'].max()//2 else 'high')
df_rule['parking'] = df_data['parking'].apply(lambda x: 'low' if x <= df_data['parking'].max()//2 else 'high')



In [914]:
df_rule.drop_duplicates(inplace=True, subset=['area', 'bedrooms', 'bathrooms', 'stories', 'parking'])
df_rule.reset_index(drop=True,inplace=True)

In [915]:
df_rule

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking
0,low_price,low,low,low,low,low
1,low_price,low,low,low,low,high
2,low_price,low,high,low,low,low
3,low_price,low,low,high,low,low
4,low_price,low,high,low,high,low
5,low_price,low,high,low,low,high
6,low_price,high,low,low,low,high
7,low_price,low,low,low,high,low
8,low_price,high,low,low,low,low
9,low_price,low,high,low,high,high


In [916]:
dict_rule_to_index = {
    'low': 0,
    'high': 1,
    'low_price': 0,
    'mid_price': 1,
    'high_price': 2

}

In [917]:
df_rule = df_rule.applymap(lambda x: dict_rule_to_index[x] if x in dict_rule_to_index else x)

  df_rule = df_rule.applymap(lambda x: dict_rule_to_index[x] if x in dict_rule_to_index else x)


In [918]:
df_rule

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking
0,0,0,0,0,0,0
1,0,0,0,0,0,1
2,0,0,1,0,0,0
3,0,0,0,1,0,0
4,0,0,1,0,1,0
5,0,0,1,0,0,1
6,0,1,0,0,0,1
7,0,0,0,0,1,0
8,0,1,0,0,0,0
9,0,0,1,0,1,1


In [919]:
def down_function(x, a, b):
    result = (b - x) / (b - a)
    result = round(result, 2)
    return max(result, 0)

def up_function(x, a, b):
    result = (x - a) / (b - a)
    result = round(result, 2)
    return max(result, 0)

def triangle_function(x, a, b, c):
    if x <= a or x >= c:
        return 0
    if a < x <= b:
        return up_function(x, a, b)
    if b < x < c:
        return down_function(x, b, c)

In [920]:
def area_membership(x):
    low = down_function(x, df_data['area'].min(), df_data['area'].max())
    high = up_function(x, df_data['area'].min(), df_data['area'].max())
    return low, high

def bedrooms_membership(x):
    low = down_function(x, df_data['bedrooms'].min(), df_data['bedrooms'].max())
    high = up_function(x, df_data['bedrooms'].min(), df_data['bedrooms'].max())
    return low, high

def bathrooms_membership(x):
    low = down_function(x, df_data['bathrooms'].min(), df_data['bathrooms'].max())
    high = up_function(x, df_data['bathrooms'].min(), df_data['bathrooms'].max())
    return low, high

def stories_membership(x):
    low = down_function(x, df_data['stories'].min(), df_data['stories'].max())
    high = up_function(x, df_data['stories'].min(), df_data['stories'].max())
    return low, high

def parking_membership(x):
    low = down_function(x, df_data['parking'].min(), df_data['parking'].max())
    high = up_function(x, df_data['parking'].min(), df_data['parking'].max())
    return low, high



In [921]:
def calculate_membership(df_data):
    membership_functions = {
        'area': area_membership,
        'bedrooms': bedrooms_membership,
        'bathrooms': bathrooms_membership,
        'stories': stories_membership,
        'parking': parking_membership,
    }
    df_result = pd.DataFrame()
    for column, membership_function in membership_functions.items():
        df_result[column] = df_data[column].apply(lambda x: membership_function(x))
    return df_result

In [922]:
df_membership = calculate_membership(df_data.drop(columns=['price']))

In [923]:
df_membership.reset_index(drop=True, inplace=True)

In [924]:
display(df_membership)
display(df_rule.drop(columns=['price']))

Unnamed: 0,area,bedrooms,bathrooms,stories,parking
0,"(0.91, 0.09)","(0.6, 0.4)","(1.0, 0.0)","(1.0, 0.0)","(1.0, 0.0)"
1,"(0.86, 0.14)","(0.8, 0.2)","(1.0, 0.0)","(1.0, 0.0)","(1.0, 0.0)"
2,"(0.85, 0.15)","(0.6, 0.4)","(1.0, 0.0)","(0.67, 0.33)","(1.0, 0.0)"
3,"(0.95, 0.05)","(0.6, 0.4)","(1.0, 0.0)","(1.0, 0.0)","(1.0, 0.0)"
4,"(0.91, 0.09)","(0.8, 0.2)","(1.0, 0.0)","(1.0, 0.0)","(0.33, 0.67)"
...,...,...,...,...,...
540,"(0.6, 0.4)","(0.4, 0.6)","(1.0, 0.0)","(0.67, 0.33)","(0.33, 0.67)"
541,"(0.6, 0.4)","(0.4, 0.6)","(0.67, 0.33)","(0.67, 0.33)","(0.0, 1.0)"
542,"(0.5, 0.5)","(0.4, 0.6)","(0.0, 1.0)","(0.0, 1.0)","(0.0, 1.0)"
543,"(0.43, 0.57)","(0.6, 0.4)","(0.67, 0.33)","(0.67, 0.33)","(0.33, 0.67)"


Unnamed: 0,area,bedrooms,bathrooms,stories,parking
0,0,0,0,0,0
1,0,0,0,0,1
2,0,1,0,0,0
3,0,0,1,0,0
4,0,1,0,1,0
5,0,1,0,0,1
6,1,0,0,0,1
7,0,0,0,1,0
8,1,0,0,0,0
9,0,1,0,1,1


In [925]:
def aggregate(df_membership, df_rule):
    alpha_predicate = np.zeros((df_membership.shape[0], df_rule.shape[0]))
    for i, row_member in df_membership.iterrows():
        for j, row_rule in df_rule.iterrows():
            temp = []
            for cell_member, cell_rule in zip(row_member, row_rule):
                temp.append(cell_member[cell_rule])
            alpha_predicate[i, j] = min(temp)
    return alpha_predicate

In [926]:
df_aggregated = aggregate(df_membership, df_rule.drop(columns=['price']))
df_aggregated.shape

(545, 23)

In [927]:
def inv_up_function(alpha, a, b):
    return alpha*(b-a) + a

def inv_down_function(alpha, a, b):
    return b - alpha*(b-a)

def calculate_price(df_aggregated, prices):
    list_price = []
    for i in range(df_aggregated.shape[0]):
        dict_price = {
            'a_pred': [],
            'z': []
         }
        for j, price in enumerate(prices):
            if price == 0:
                dict_price['a_pred'].append(df_aggregated[i, j])
                dict_price['z'].append(inv_down_function(df_aggregated[i, j], df_data['price'].min(), df_data['price'].max()//2))   
            elif price == 1:
                dict_price['a_pred'].append(df_aggregated[i, j])
                dict_price['z'].append(inv_up_function(df_aggregated[i, j], df_data['price'].min(), df_data['price'].max()//2)) 
                dict_price['a_pred'].append(df_aggregated[i, j])
                dict_price['z'].append(inv_down_function(df_aggregated[i, j], df_data['price'].max()//2, df_data['price'].max()))
            elif price == 2:
                dict_price['a_pred'].append(df_aggregated[i, j])
                dict_price['z'].append(inv_up_function(df_aggregated[i, j], df_data['price'].max()//2, df_data['price'].max()))
        list_price.append(dict_price)
    return list_price

In [928]:
levels = calculate_price(df_aggregated, df_rule['price']) 

In [929]:
def defuzzification(levels):
    result = []
    for level in levels:
        total_weighted_z = 0
        total_weight = 0
        for a, z in zip(level['a_pred'], level['z']):
            total_weighted_z += a * z
            total_weight += a
        centroid = total_weighted_z / total_weight if total_weight else 0
        result.append(centroid)
    return result

In [930]:
predicted_level = defuzzification(levels)

In [931]:
error = []
for y, y_hat in zip(df_data['price'], predicted_level):
    error.append(abs(y - y_hat))

print(f'Mean relative error: {np.mean(error)}')

Mean relative error: 1304618.276539483
