In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from scipy import stats

In [2]:
# Contoh data listing rumah
data = {
    'price': [50000, 100000, 75000, 200000, 150000, 300000],
    'price_per_sq_meter': [500, 1000, 750, 2000, 1500, 3000],
    'Category': ['A', 'B', 'A', 'C', 'B', 'C'],
    'title_deed': ['Yes', 'No', 'Yes', 'No', 'Yes', 'No'],
    'repair': ['Good', 'Poor', 'Good', 'Excellent', 'Poor', 'Excellent'],
    'mortgage': ['Yes', 'No', 'Yes', 'No', 'Yes', 'No'],
    'rooms': [2, 3, 2, 4, 3, 4],
    'building_type': ['Apartment', 'House', 'Apartment', 'House', 'House', 'Apartment']
}

df = pd.DataFrame(data)

# Normalisasi harga rumah dan harga per meter persegi menggunakan teknik Min-Max
scaler = MinMaxScaler()
df[['price', 'price_per_sq_meter']] = scaler.fit_transform(df[['price', 'price_per_sq_meter']])

print("Data setelah normalisasi:")
print(df)

Data setelah normalisasi:
   price  price_per_sq_meter Category title_deed     repair mortgage  rooms  \
0    0.0                 0.0        A        Yes       Good      Yes      2   
1    0.2                 0.2        B         No       Poor       No      3   
2    0.1                 0.1        A        Yes       Good      Yes      2   
3    0.6                 0.6        C         No  Excellent       No      4   
4    0.4                 0.4        B        Yes       Poor      Yes      3   
5    1.0                 1.0        C         No  Excellent       No      4   

  building_type  
0     Apartment  
1         House  
2     Apartment  
3         House  
4         House  
5     Apartment  


In [3]:
# Encoding data kategorikal menggunakan one-hot encoding
df_encoded = pd.get_dummies(df, columns=['Category', 'title_deed', 'repair', 'mortgage'])

print("\nData setelah one-hot encoding:")
print(df_encoded)



Data setelah one-hot encoding:
   price  price_per_sq_meter  rooms building_type  Category_A  Category_B  \
0    0.0                 0.0      2     Apartment        True       False   
1    0.2                 0.2      3         House       False        True   
2    0.1                 0.1      2     Apartment        True       False   
3    0.6                 0.6      4         House       False       False   
4    0.4                 0.4      3         House       False        True   
5    1.0                 1.0      4     Apartment       False       False   

   Category_C  title_deed_No  title_deed_Yes  repair_Excellent  repair_Good  \
0       False          False            True             False         True   
1       False           True           False             False        False   
2       False          False            True             False         True   
3        True           True           False              True        False   
4       False          False     

In [6]:
# Menghitung rata-rata, median, dan modus dari harga rumah berdasarkan jumlah kamar dan tipe bangunan
aggregation = df.groupby(['rooms', 'building_type']).agg(
    price_mean=('price', 'mean'),
    price_median=('price', 'median'),
    price_mode=('price', lambda x: x.mode().iloc[0] if not x.mode().empty else np.nan)
).reset_index()

print("\nAggregasi data:")
print(aggregation)


Aggregasi data:
   rooms building_type  price_mean  price_median  price_mode
0      2     Apartment        0.05          0.05         0.0
1      3         House        0.30          0.30         0.2
2      4     Apartment        1.00          1.00         1.0
3      4         House        0.60          0.60         0.6
