In [1]:
import os
import pandas as pd
import numpy as np
import random as rd

# Define the path to the data directory
data_dir = os.path.join(os.path.dirname(os.getcwd()), 'data')

# Build the full path to the file
file_path = os.path.join(data_dir, 'Melbourne_cleaned.csv')

# Load the dataset
df = pd.read_csv(file_path)

df.head()
df.isnull().sum()

Suburb           0
Address          0
Bedroom          0
Type             0
Price            0
Method           0
SellerG          0
Date             0
Distance         0
Postcode         0
Bedroom2         0
Bathroom         0
Car              0
Landsize         0
BuildingArea     0
YearBuilt        0
CouncilArea      0
Regionname       0
Propertycount    0
dtype: int64

In [2]:
avg_landsize = df["Landsize"].mean()
df_filtered = df[df["Landsize"] > avg_landsize].copy()
df_sorted = df_filtered.sort_values(by=["Regionname", "Suburb", "Distance", "YearBuilt", "Price"], ascending=[True, True, True, False, True]).reset_index(drop=True)
df_sorted.head(10)

Unnamed: 0,Suburb,Address,Bedroom,Type,Price,Method,SellerG,Date,Distance,Postcode,Bedroom2,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Regionname,Propertycount
0,Bayswater,2/1 Orchard Rd,2,h,661000.0,S,Barry,26-08-2017,23.2,3153,2,2,1,735.0,103.0,2007,Knox,Eastern Metropolitan,5030
1,Bayswater,3 Grieve St,3,h,824000.0,S,One,03-09-2017,23.2,3153,3,1,2,725.0,112.0,1980,Knox,Eastern Metropolitan,5030
2,Bayswater,15 Clifford St,4,h,705000.0,PI,Biggin,27-05-2017,23.2,3153,4,2,4,765.0,141.0,1975,Knox,Eastern Metropolitan,5030
3,Bayswater,85 Farnham Rd,4,h,709690.5,SP,Barry,08-07-2017,23.2,3153,3,1,7,773.0,120.0,1975,Knox,Eastern Metropolitan,5030
4,Bayswater,3/23 Begonia Av,2,h,425000.0,VB,McGrath,19-08-2017,23.2,3153,2,1,1,735.0,63.0,1970,Knox,Eastern Metropolitan,5030
5,Bayswater,1/33 Begonia Av,2,u,430000.0,SP,Stockdale,17-06-2017,23.2,3153,2,1,1,735.0,126.5,1970,Knox,Eastern Metropolitan,5030
6,Bayswater,1/8 Tracey St,3,h,610000.0,S,McGrath,27-05-2017,23.2,3153,3,1,1,735.0,126.5,1970,Knox,Eastern Metropolitan,5030
7,Bayswater,6 Larne Av,2,u,620000.0,S,First,22-07-2017,23.2,3153,2,1,1,735.0,126.5,1970,Knox,Eastern Metropolitan,5030
8,Bayswater,16 Wiltshire Av,4,h,640000.0,VB,iTRAK,03-06-2017,23.2,3153,4,1,1,735.0,126.5,1970,Knox,Eastern Metropolitan,5030
9,Bayswater,5 Susan St,4,h,709690.5,PI,Philip,24-06-2017,23.2,3153,4,2,3,736.0,162.0,1970,Knox,Eastern Metropolitan,5030


In [3]:
#random recommendation from top ten list
top10 = df_sorted[0:10]
random_index = rd.randint(0, 9)
random_house = top10.iloc[random_index]
print(random_house)

Suburb                      Bayswater
Address                    6 Larne Av
Bedroom                             2
Type                                u
Price                        620000.0
Method                              S
SellerG                         First
Date                       22-07-2017
Distance                         23.2
Postcode                         3153
Bedroom2                            2
Bathroom                            1
Car                                 1
Landsize                        735.0
BuildingArea                    126.5
YearBuilt                        1970
CouncilArea                      Knox
Regionname       Eastern Metropolitan
Propertycount                    5030
Name: 7, dtype: object


In [4]:
top10_per_suburb = df_sorted.groupby("Suburb").head(10).reset_index(drop=True) #here i stored top 10 houses per suburb
top10_per_suburb.head

<bound method NDFrame.head of             Suburb          Address  Bedroom Type     Price Method   SellerG  \
0        Bayswater   2/1 Orchard Rd        2    h  661000.0      S     Barry   
1        Bayswater      3 Grieve St        3    h  824000.0      S       One   
2        Bayswater   15 Clifford St        4    h  705000.0     PI    Biggin   
3        Bayswater    85 Farnham Rd        4    h  709690.5     SP     Barry   
4        Bayswater  3/23 Begonia Av        2    h  425000.0     VB   McGrath   
...            ...              ...      ...  ...       ...    ...       ...   
2285  Melton South     5 Leggatt St        3    h  370000.0      S  Reliance   
2286  Melton South     48 Manson Dr        3    h  385000.0      S       YPA   
2287  Melton South     97 Exford Rd        3    h  392500.0      S     Raine   
2288  Melton South      1 Fraser St        3    h  395000.0      S       YPA   
2289  Melton South     23 Neerim St        4    h  426000.0      S     Raine   

         

In [5]:
top10_per_region = df_sorted.groupby("Regionname").head(10).reset_index(drop=True) #but here i stored top 10 houses per region
top10_per_region.head

<bound method NDFrame.head of           Suburb          Address  Bedroom Type     Price Method  \
0      Bayswater   2/1 Orchard Rd        2    h  661000.0      S   
1      Bayswater      3 Grieve St        3    h  824000.0      S   
2      Bayswater   15 Clifford St        4    h  705000.0     PI   
3      Bayswater    85 Farnham Rd        4    h  709690.5     SP   
4      Bayswater  3/23 Begonia Av        2    h  425000.0     VB   
..           ...              ...      ...  ...       ...    ...   
75        Melton       14 Musk Ct        3    h  295000.0     SP   
76        Melton      1 Irving Rd        4    h  400000.0      S   
77        Melton    53 Barries Rd        3    h  420718.8     SN   
78        Melton      28 Atkin St        4    h  550000.0     PI   
79  Melton South     69 Andrew St        3    h  375467.4     PI   

          SellerG        Date  Distance  Postcode  Bedroom2  Bathroom  Car  \
0           Barry  26-08-2017      23.2      3153         2         2    1 