In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error, mean_absolute_error, mean_squared_error, r2_score, accuracy_score
import seaborn as sns
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
sns.set_style("whitegrid")

In [2]:
data = pd.read_csv("results/properties.csv")

In [3]:
data

Unnamed: 0,type_of_house,location,price,bedrooms,bathrooms,toilets,parking_space
0,5 bedroom detached duplex for sale,"Ikate, Lekki, Lagos","₦750,000,000",5.0,5.0,6.0,
1,4 bedroom detached duplex for sale,"Oral Estate, Ikota, Lekki, Lagos","₦180,000,000",4.0,5.0,5.0,
2,3 bedroom block of flats for sale,"Gra Estate, Abijo, Lekki, Lagos","₦85,000,000",3.0,3.0,3.0,2.0
3,6 bedroom detached duplex for sale,"Pinnock Beach Estate, Osapa, Lekki, Lagos","₦1,500,000,000",6.0,7.0,7.0,
4,5 bedroom detached duplex for sale,"Osapa, Lekki, Lagos","₦350,000,000",5.0,5.0,6.0,
...,...,...,...,...,...,...,...
80684,2 bedroom flat / apartment for sale,"Donatus Odum Street, Seagate Estate, Ikate Ele...","₦130,000,000",2.0,2.0,3.0,2.0
80685,6 bedroom detached duplex for sale,"Apo, Abuja","₦570,000,000",6.0,7.0,7.0,10.0
80686,4 bedroom terraced duplex for sale,"Plot 134, Ayodele Odubiyi Street, Pinnacle Gas...","₦1,200,000,000",4.0,16.0,20.0,12.0
80687,3 bedroom flat / apartment for sale,"Off Oladipo Diya Road, Gaduwa, Abuja","₦180,000,000",3.0,3.0,4.0,2.0


In [5]:
data.shape

(80689, 7)

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 80689 entries, 0 to 80688
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   type_of_house  80689 non-null  object 
 1   location       80689 non-null  object 
 2   price          80689 non-null  object 
 3   bedrooms       58725 non-null  float64
 4   bathrooms      51543 non-null  float64
 5   toilets        53044 non-null  float64
 6   parking_space  33765 non-null  float64
dtypes: float64(4), object(3)
memory usage: 4.3+ MB


In [7]:
data.describe()

Unnamed: 0,bedrooms,bathrooms,toilets,parking_space
count,58725.0,51543.0,53044.0,33765.0
mean,4.982937,4.794288,5.511293,93.360107
std,85.141716,19.130627,11.537307,6027.323821
min,1.0,1.0,1.0,1.0
25%,3.0,3.0,4.0,3.0
50%,4.0,4.0,5.0,4.0
75%,5.0,5.0,6.0,5.0
max,20540.0,3157.0,2000.0,1000000.0


### Data Cleaning and Normalization

Firstly, it would be more suitable to pull out the `state` and `town` from the location column

In [43]:
data["location"].str.split(",").str[-2].value_counts()

location
 Lekki          26624
 Ajah            4868
 Ikeja           4322
 Ikoyi           4217
Ajah             2926
                ...  
Uruan               1
 Ikot Ekpene        1
 Awgu               1
 Ilesa East         1
 Idemili            1
Name: count, Length: 448, dtype: int64

In [44]:
data[data["location"].str.contains("lagos")]

Unnamed: 0,type_of_house,location,price,bedrooms,bathrooms,toilets,parking_space,town,state
6048,Filling station for sale,"Hammadiyah B/stop, lagos-abeokuta Express, Age...","₦600,000,000",,,,,Agege,Lagos
10090,Commercial property for sale,"Ibadan-lagos Express Way, Ojodu Berger, Ojodu,...","₦750,000,000",,,,,Ojodu,Lagos
18118,Residential land for sale,"Beside First Technical University, lagos Expre...","₦25,000,000",,,,,Ibadan,Oyo
23022,5 bedroom detached duplex for sale,"Osapa, lekki, lagos, Osapa, Lekki, Lagos","₦300,000,000",5.0,5.0,6.0,3.0,Lekki,Lagos
27831,5 bedroom detached duplex for sale,"Orchid Hotel Road, lagos Ocean Bay Estate, Lek...","₦380,000,000",5.0,5.0,6.0,3.0,Lekki,Lagos
31112,4 bedroom detached duplex for sale,"Ajah, lagos, Ajah, Lagos","₦120,000,000",4.0,5.0,5.0,3.0,Ajah,Lagos
37586,12 bedroom house for sale,"Isihor Egbaen, Off Ugbowo-lagos Road, Benin, O...","₦50,000,000",12.0,,,,Oredo,Edo
53401,Industrial land for sale,"Km 8, Abeokuta-lagos Expressway, Ile-ise Awo B...","₦100,000,000",,,,,Abeokuta North,Ogun
53911,Land for sale,Close To Oke Ogun Tennis Mins Off Abeokuta-lag...,"₦900,000",,,,,Iseyin,Oyo
56077,Land for sale,Few Mins Off Abeokuta-lagos Road Close To Ace ...,"₦900,000",,,,,Iseyin,Oyo


In [45]:
# To reomove all white spaces
data["location"] = data["location"].str.strip().replace(r'\s+', ' ', regex=True)
data["location"] = data["location"].str.replace(r'\s*,\s*', ', ', regex=True)

In [46]:
def split_location(location):
    """Splits location into state and town."""
    try:
        parts = location.rsplit(",", 2)  # Split from right, max 2 splits
        state = parts[-1].strip()
        town = parts[-2].strip()
        return state, town
    except IndexError:
        # Handle cases with missing state or town
        return None, None

This line of code takes the `location` column, applies the split_location function to extract the state and town into tuples, unpacks those tuples into separate columns using pd.Series, and then assigns those columns to the "state" and "town" columns of the original DataFrame data.

In [47]:
data[["state", "town"]] = data["location"].apply(split_location).apply(pd.Series)