In [5]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import json
from connectdb import connect_to_mongodb

In [6]:
# Replace 'your_database_name' and 'your_collection_name' with your desired database and collection names
database_name = 'realestatebackend'
collection_name = 'lake'

collection = connect_to_mongodb(database_name, collection_name)

Connected to MongoDB successfully!


In [10]:
data = list(collection.find({}))

In [13]:
df = pd.DataFrame(data)

In [14]:
df.columns

Index(['_id', 'source', 'category', 'price', 'house_details', 'other_details',
       'location', 'phone', 'images'],
      dtype='object')

In [15]:
df.head()

Unnamed: 0,_id,source,category,price,house_details,other_details,location,phone,images
0,64c8106ad2b80fc9a5e08ff1,efiewura.com,for sale,"GHS 125,000.00","{'Beds': '4', 'Baths': '4'}",Appliances\nWater Heater\nWardrobes\nAmenities...,"[Greater Accra Region, Accra, Lakeside Estat...",,[https://efiewura.com/img/properties/executive...
1,64c8106cd2b80fc9a5e08ff2,efiewura.com,for sale,"GHS 200,000.00","{'Beds': '4', 'Baths': '4'}",,"[Greater Accra Region, Tema, Tema Community 22]",,[https://efiewura.com/img/properties/4_bedroom...
2,64c8106dd2b80fc9a5e08ff3,efiewura.com,for sale,"GHS 165,000.00","{'Beds': '3', 'Baths': '3'}",,"[Greater Accra Region, Tema, Tema Community 22]",,[https://efiewura.com/img/properties/3_bedroom...
3,64c8106fd2b80fc9a5e08ff4,efiewura.com,for sale,"GHS 100,000.00","{'Beds': '3', 'Baths': '3'}",,"[Greater Accra Region, Tema, Tema Community 22]",,[https://efiewura.com/img/properties/3_bedroom...
4,64c81070d2b80fc9a5e08ff5,efiewura.com,for sale,"USD 280,000.00","{'Beds': '3', 'Baths': '2'}",,"[Greater Accra Region, Tema, Tema Community 22]",,[https://efiewura.com/img/properties/3_bedroom...


In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 440 entries, 0 to 439
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   _id            440 non-null    object
 1   source         440 non-null    object
 2   category       440 non-null    object
 3   price          440 non-null    object
 4   house_details  440 non-null    object
 5   other_details  440 non-null    object
 6   location       440 non-null    object
 7   phone          440 non-null    object
 8   images         440 non-null    object
dtypes: object(9)
memory usage: 31.1+ KB


# Data Wrangling

### Data Cleaning Steps
###### 1. split price column into currecy and amount
###### 2. split house details into bedrooms and bathrooms
###### 3. split location column into Region,City,community

In [22]:
df['price'].unique()

array(['GHS 125,000.00', 'GHS 200,000.00', 'GHS 165,000.00',
       'GHS 100,000.00', 'USD 280,000.00', 'USD 250,000.00',
       'USD 950,000.00', 'USD 727,800.00', 'USD 259,500.00',
       'USD 180,000.00', 'USD 120,000.00', 'USD 100,000.00',
       'USD 320,000.00', 'GHS 540,000.00', 'GHS 850,000.00',
       'GHS 1,680,000.00', 'USD 80,000.00', 'USD 75,000.00',
       'GHS 3,500,000.00', 'GHS 2,314,000.00', 'USD 222,479.00',
       'USD 220,350.00', 'USD 141,347.00', 'USD 193,500.00',
       'USD 384,500.00', 'USD 544,160.00', 'USD 1,596,000.00',
       'GHS 1,500,000.00', 'GHS 830,000.00', 'USD 271,950.00',
       'USD 245,000.00 / day', 'USD 150,000.00', 'USD 200,000.00',
       'GHS 350,000.00', 'GHS 750,000.00', 'USD 70,000.00',
       'USD 210,000.00', 'GHS 900,000.00', 'USD 350,000.00',
       'USD 130,000.00', 'USD 650,000.00', 'USD 140,000.00',
       'USD 234,650.00', 'USD 110,000.00', 'USD 300,000.00',
       'GHS 420,000.00', 'USD 242,000.00', 'GHS 6,000,000.00',
       'G

#### price

In [34]:
currency = []
amount = []
duration = []

for i in df['price']:
    split = i.split(" ")
    currency.append(split[0])
    amount.append(split[1])
    if "/" in split:
        duration.append(split[-1])
    else:
        duration.append("N/A")

In [35]:
df['currency'] = currency
df['price'] = amount
df['duration']=duration

Unnamed: 0,_id,source,category,price,house_details,other_details,location,phone,images,currency,duration
0,64c8106ad2b80fc9a5e08ff1,efiewura.com,for sale,125000.00,"{'Beds': '4', 'Baths': '4'}",Appliances\nWater Heater\nWardrobes\nAmenities...,"[Greater Accra Region, Accra, Lakeside Estat...",,[https://efiewura.com/img/properties/executive...,GHS,
1,64c8106cd2b80fc9a5e08ff2,efiewura.com,for sale,200000.00,"{'Beds': '4', 'Baths': '4'}",,"[Greater Accra Region, Tema, Tema Community 22]",,[https://efiewura.com/img/properties/4_bedroom...,GHS,
2,64c8106dd2b80fc9a5e08ff3,efiewura.com,for sale,165000.00,"{'Beds': '3', 'Baths': '3'}",,"[Greater Accra Region, Tema, Tema Community 22]",,[https://efiewura.com/img/properties/3_bedroom...,GHS,
3,64c8106fd2b80fc9a5e08ff4,efiewura.com,for sale,100000.00,"{'Beds': '3', 'Baths': '3'}",,"[Greater Accra Region, Tema, Tema Community 22]",,[https://efiewura.com/img/properties/3_bedroom...,GHS,
4,64c81070d2b80fc9a5e08ff5,efiewura.com,for sale,280000.00,"{'Beds': '3', 'Baths': '2'}",,"[Greater Accra Region, Tema, Tema Community 22]",,[https://efiewura.com/img/properties/3_bedroom...,USD,
...,...,...,...,...,...,...,...,...,...,...,...
435,64c813e4d2b80fc9a5e091a4,efiewura.com,for sale,2857600.00,"{'Beds': '4', 'Baths': '4'}",\nExecutive furnished 4 bedroom is up for sal...,"[Greater Accra Region, Accra, Tantra Hills]",,[https://efiewura.com/img/properties/executive...,GHC,
436,64c813e6d2b80fc9a5e091a5,efiewura.com,for sale,1504000.00,"{'Beds': '4', 'Baths': '4'}",\n4bedroom fully furnished for sale at East le...,"[Greater Accra Region, Accra, East Legon Hills]",,[https://efiewura.com/img/properties/4bedroom_...,GHC,
437,64c813e8d2b80fc9a5e091a6,efiewura.com,for sale,550000.00,"{'Beds': '3', 'Baths': '3'}",\nThis is a newly built 3bedroom self house lo...,"[Greater Accra Region, Accra, Ashaley Botwe ...",,[https://efiewura.com/img/properties/this_is_a...,GHC,
438,64c813ead2b80fc9a5e091a7,efiewura.com,for sale,2030400.00,"{'Beds': '4', 'Baths': '4'}",\n4 bedrooms house for sale at east legon hill...,"[Greater Accra Region, Accra, East Legon Hills]",,[https://efiewura.com/img/properties/_4_bedroo...,GHC,


In [496]:
def remove_comma(x):
    return float("".join(x.split(",")))

In [497]:
df['price']= df['price'].apply(remove_comma)

In [52]:
backup_df = df.copy()

In [461]:
df = backup_df

##### location

In [462]:
df['location']

0      [Greater Accra Region,  Accra,  Lakeside Estat...
1      [Greater Accra Region,  Tema,  Tema Community 22]
2      [Greater Accra Region,  Tema,  Tema Community 22]
3      [Greater Accra Region,  Tema,  Tema Community 22]
4      [Greater Accra Region,  Tema,  Tema Community 22]
                             ...                        
435        [Greater Accra Region,  Accra,  Tantra Hills]
436    [Greater Accra Region,  Accra,  East Legon Hills]
437    [Greater Accra Region,  Accra,  Ashaley Botwe ...
438    [Greater Accra Region,  Accra,  East Legon Hills]
439          [Ashanti Region,  Kumasi,  Daaban new site]
Name: location, Length: 440, dtype: object

In [463]:
df['region'] = df['location'].apply(lambda location: location[0])
df['city'] = df['location'].apply(lambda location:location[1])
df['sub'] = df['location'].apply(lambda location:location[2])

In [464]:
df.columns

Index(['_id', 'source', 'category', 'price', 'house_details', 'other_details',
       'location', 'phone', 'images', 'currency', 'duration', 'region', 'city',
       'sub', 'no_of_bedrooms', 'no_of_bathrooms'],
      dtype='object')

In [465]:
df.head(3)

Unnamed: 0,_id,source,category,price,house_details,other_details,location,phone,images,currency,duration,region,city,sub,no_of_bedrooms,no_of_bathrooms
0,64c8106ad2b80fc9a5e08ff1,efiewura.com,for sale,125000.0,"{'Beds': '4', 'Baths': '4'}",Appliances\nWater Heater\nWardrobes\nAmenities...,"[Greater Accra Region, Accra, Lakeside Estat...",,[https://efiewura.com/img/properties/executive...,GHS,,Greater Accra Region,Accra,Lakeside Estate,4,4
1,64c8106cd2b80fc9a5e08ff2,efiewura.com,for sale,200000.0,"{'Beds': '4', 'Baths': '4'}",,"[Greater Accra Region, Tema, Tema Community 22]",,[https://efiewura.com/img/properties/4_bedroom...,GHS,,Greater Accra Region,Tema,Tema Community 22,4,4
2,64c8106dd2b80fc9a5e08ff3,efiewura.com,for sale,165000.0,"{'Beds': '3', 'Baths': '3'}",,"[Greater Accra Region, Tema, Tema Community 22]",,[https://efiewura.com/img/properties/3_bedroom...,GHS,,Greater Accra Region,Tema,Tema Community 22,3,3


###### house_details

In [466]:
df['no_of_bedrooms'] = df['house_details'].apply(lambda x:x.get("Beds"))
df['no_of_bathrooms'] = df['house_details'].apply(lambda x:x.get("Baths"))

In [467]:
# drop unnecessary columns

In [468]:
df.head(2)

Unnamed: 0,_id,source,category,price,house_details,other_details,location,phone,images,currency,duration,region,city,sub,no_of_bedrooms,no_of_bathrooms
0,64c8106ad2b80fc9a5e08ff1,efiewura.com,for sale,125000.0,"{'Beds': '4', 'Baths': '4'}",Appliances\nWater Heater\nWardrobes\nAmenities...,"[Greater Accra Region, Accra, Lakeside Estat...",,[https://efiewura.com/img/properties/executive...,GHS,,Greater Accra Region,Accra,Lakeside Estate,4,4
1,64c8106cd2b80fc9a5e08ff2,efiewura.com,for sale,200000.0,"{'Beds': '4', 'Baths': '4'}",,"[Greater Accra Region, Tema, Tema Community 22]",,[https://efiewura.com/img/properties/4_bedroom...,GHS,,Greater Accra Region,Tema,Tema Community 22,4,4


In [469]:
for i in list(df['other_details']):
    print(i)

Appliances
Water Heater
Wardrobes
Amenities
Security Fence
Water storage and pumping system
Car Port
Parking
Pre-paid Meter

* All Bedrooms En-suite
* P.O.P Ceiling in all Rooms 
* Modern Porcelain Floor Tiles
* Wall Tiles in Wet Areas
* Wardrobes in all Bedrooms
* Tiled and Fitted Kitchen Cabinet
* Wiring for Microwave, Gas Cooker and Heat Extractor
* Hot Water System including Water Heater
* Piping and Wiring for Washing Machine
* Wiring for A/C and Ceiling Fans
* TV and DSTV points
* Prepainted Longspan AluZinc Roof with Rain Gutters
* Sliding Gate
* External Steel Security Doors
* Polished Internal Doors with Architraves
* Sliding Glass Windows with Fixed Burglar Proofs
* Biofill System
* Overhead Polytank
* Tiled and Grassed Compound
* Provision for Generator, Water Pump 
* Carport
* Security Electric Fence Wall




Appliances
Dishwasher
Microwave
Air Condition
Recess Light
Gas Range
Electric Cooker
Wardrobes
Amenities
Security Fence
24-hour Security
Water storage and pumping syst

In [470]:
df.columns

Index(['_id', 'source', 'category', 'price', 'house_details', 'other_details',
       'location', 'phone', 'images', 'currency', 'duration', 'region', 'city',
       'sub', 'no_of_bedrooms', 'no_of_bathrooms'],
      dtype='object')

In [471]:
filtered = []
for row in df.itertuples(name = 'RealEstate'):
    if len(row.images) > 1:
        filtered.append(row.Index)

In [472]:
df = df.iloc[filtered] #removed rows with less than 2 images

In [473]:
df= df[df['duration'].isin(['N/A']) & df['region'].isin(['Greater Accra Region'])]

In [474]:
df.head(3)

Unnamed: 0,_id,source,category,price,house_details,other_details,location,phone,images,currency,duration,region,city,sub,no_of_bedrooms,no_of_bathrooms
0,64c8106ad2b80fc9a5e08ff1,efiewura.com,for sale,125000.0,"{'Beds': '4', 'Baths': '4'}",Appliances\nWater Heater\nWardrobes\nAmenities...,"[Greater Accra Region, Accra, Lakeside Estat...",,[https://efiewura.com/img/properties/executive...,GHS,,Greater Accra Region,Accra,Lakeside Estate,4,4
2,64c8106dd2b80fc9a5e08ff3,efiewura.com,for sale,165000.0,"{'Beds': '3', 'Baths': '3'}",,"[Greater Accra Region, Tema, Tema Community 22]",,[https://efiewura.com/img/properties/3_bedroom...,GHS,,Greater Accra Region,Tema,Tema Community 22,3,3
3,64c8106fd2b80fc9a5e08ff4,efiewura.com,for sale,100000.0,"{'Beds': '3', 'Baths': '3'}",,"[Greater Accra Region, Tema, Tema Community 22]",,[https://efiewura.com/img/properties/3_bedroom...,GHS,,Greater Accra Region,Tema,Tema Community 22,3,3


In [475]:
df = df.drop(['source','house_details','phone','location'],axis =1)

In [476]:
df.head()

Unnamed: 0,_id,category,price,other_details,images,currency,duration,region,city,sub,no_of_bedrooms,no_of_bathrooms
0,64c8106ad2b80fc9a5e08ff1,for sale,125000.0,Appliances\nWater Heater\nWardrobes\nAmenities...,[https://efiewura.com/img/properties/executive...,GHS,,Greater Accra Region,Accra,Lakeside Estate,4,4
2,64c8106dd2b80fc9a5e08ff3,for sale,165000.0,,[https://efiewura.com/img/properties/3_bedroom...,GHS,,Greater Accra Region,Tema,Tema Community 22,3,3
3,64c8106fd2b80fc9a5e08ff4,for sale,100000.0,,[https://efiewura.com/img/properties/3_bedroom...,GHS,,Greater Accra Region,Tema,Tema Community 22,3,3
4,64c81070d2b80fc9a5e08ff5,for sale,280000.0,,[https://efiewura.com/img/properties/3_bedroom...,USD,,Greater Accra Region,Tema,Tema Community 22,3,2
5,64c81072d2b80fc9a5e08ff6,for sale,250000.0,Appliances\nDishwasher\nMicrowave\nAir Conditi...,[https://efiewura.com/img/properties/4_bedroom...,USD,,Greater Accra Region,Accra,New Legon Hills,4,5


In [477]:
df['city'].value_counts()

 Accra       373
 Tema         27
 Other         6
 Afienya       3
 Oyibi         2
 Ada-Foah      1
 Ashaiman      1
Name: city, dtype: int64

In [478]:
def citywrangler(x):
    if x.__contains__('Accra'):
        return 'Accra'
    else:
        return "Other"

In [479]:
df['city'] = df['city'].apply(citywrangler)

In [480]:
df

Unnamed: 0,_id,category,price,other_details,images,currency,duration,region,city,sub,no_of_bedrooms,no_of_bathrooms
0,64c8106ad2b80fc9a5e08ff1,for sale,125000.00,Appliances\nWater Heater\nWardrobes\nAmenities...,[https://efiewura.com/img/properties/executive...,GHS,,Greater Accra Region,Accra,Lakeside Estate,4,4
2,64c8106dd2b80fc9a5e08ff3,for sale,165000.00,,[https://efiewura.com/img/properties/3_bedroom...,GHS,,Greater Accra Region,Other,Tema Community 22,3,3
3,64c8106fd2b80fc9a5e08ff4,for sale,100000.00,,[https://efiewura.com/img/properties/3_bedroom...,GHS,,Greater Accra Region,Other,Tema Community 22,3,3
4,64c81070d2b80fc9a5e08ff5,for sale,280000.00,,[https://efiewura.com/img/properties/3_bedroom...,USD,,Greater Accra Region,Other,Tema Community 22,3,2
5,64c81072d2b80fc9a5e08ff6,for sale,250000.00,Appliances\nDishwasher\nMicrowave\nAir Conditi...,[https://efiewura.com/img/properties/4_bedroom...,USD,,Greater Accra Region,Accra,New Legon Hills,4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
434,64c813e2d2b80fc9a5e091a3,for sale,1278400.00,\nFurnished 3 Bedrooms house For sale at lakes...,[https://efiewura.com/img/properties/furnished...,GHC,,Greater Accra Region,Accra,Lakeside community 8,3,3
435,64c813e4d2b80fc9a5e091a4,for sale,2857600.00,\nExecutive furnished 4 bedroom is up for sal...,[https://efiewura.com/img/properties/executive...,GHC,,Greater Accra Region,Accra,Tantra Hills,4,4
436,64c813e6d2b80fc9a5e091a5,for sale,1504000.00,\n4bedroom fully furnished for sale at East le...,[https://efiewura.com/img/properties/4bedroom_...,GHC,,Greater Accra Region,Accra,East Legon Hills,4,4
437,64c813e8d2b80fc9a5e091a6,for sale,550000.00,\nThis is a newly built 3bedroom self house lo...,[https://efiewura.com/img/properties/this_is_a...,GHC,,Greater Accra Region,Accra,Ashaley Botwe Japan -Motors,3,3


In [481]:
df['sub'].value_counts()

 Oyarifa                        19
 East Legon                     16
 East Legon Hills               15
 Spintex                        13
 East legon                     10
                                ..
 West Legon                      1
 Amasaman Sonitra Junction       1
 Sapeiman( Amasaman)             1
 North Kaneshie                  1
 Ashaley Botwe Japan -Motors     1
Name: sub, Length: 210, dtype: int64

In [482]:
for i in df['sub'].unique():
    if i.lower().__contains__('legon'):
        print(i)

 New Legon Hills
 East Legon (Trassaco/Adjiringanor)
 Eastlegon hills
 EAST LEGON HILL
 East legon hill
 Eastlegon
 NORTH LEGON
 EASTLEGON
 East legon
 EAST LEGON
 West legon
 New Legon
 East Legon
 East Legon UPSA
 North Legon
 Agbogba North Legon
 East Legon around AH hotel
 East Legon Hills
 East Legon Agringanor
 East Legon behind Pinkberry ice cream
 West Legon
 Adenta New Legon
 Adenta new legon
 EAST LEGON HILLS
 East Legon Trasacco
 East Legon - Adjiringanor
 East Legon hills


In [483]:
def subwrangler(x):
    if x.lower().__contains__('legon'):
        if x.lower().__contains__('Agbogba') or x.lower().__contains__('Adenta'):
            pass
        else:
            print(x)
            return 'Legon'
    else:
        return x

In [484]:
df['sub'] = df['sub'].apply(subwrangler)

 New Legon Hills
 East Legon (Trassaco/Adjiringanor)
 East Legon (Trassaco/Adjiringanor)
 Eastlegon hills
 EAST LEGON HILL
 East legon hill
 Eastlegon hills
 Eastlegon
 Eastlegon
 Eastlegon
 Eastlegon hills
 NORTH LEGON
 Eastlegon hills
 EASTLEGON
 East legon hill
 East legon
 East legon hill
 East legon
 New Legon Hills
 NORTH LEGON
 EAST LEGON HILL
 East legon
 EAST LEGON
 West legon
 East legon
 New Legon
 East Legon
 East Legon UPSA
 North Legon
 EAST LEGON HILL
 EAST LEGON HILL
 East legon hill
 East Legon
 Agbogba North Legon
 EAST LEGON
 East Legon
 North Legon
 East Legon
 East Legon around AH hotel
 East Legon Hills
 East Legon Hills
 East Legon Agringanor
 East Legon behind Pinkberry ice cream
 East Legon Hills
 East Legon
 West Legon
 NORTH LEGON
 East Legon
 East Legon
 Adenta New Legon
 East Legon Agringanor
 East legon
 East legon
 EAST LEGON
 East legon hill
 East legon hill
 East legon
 East legon hill
 East legon hill
 East legon hill
 Adenta new legon
 East legon
 Eas

In [485]:
df['sub'].unique()

array([' Lakeside Estate', ' Tema Community 22', 'Legon',
       ' Cantonment/ Labone', ' Airport West',
       ' Tse-Addo (Near Cantonments & Labone)', ' Accra',
       ' Sapeiman( Amasaman)', ' Fise(Amasaman)', ' Tantra Hills',
       ' Mataheko', ' Roman Ridge', ' Cantonments', ' Cantoments',
       ' Oyarifa', ' Abokobi', ' Old Ashongman', ' Tema', ' Botwe',
       ' Tema community 25', ' Amasaman', ' East Airport', ' Lakeside',
       ' TSEADDO', ' Airport residential Area', ' Amrahia',
       ' AIRPORT RESIDENTIAL AREA', ' Shiashie', ' Sakumono',
       ' Cambodia spintex', ' Tseaddo', ' Pantang', ' Keabenya ACP',
       ' Roman Rudge', ' Labone', ' Airport Hill', ' Hatso',
       ' Tantra hill', ' Oyarifa special', ' Kashibj', ' Katamanso',
       ' Adenta - Katamanso', ' Trasacco', ' Westland', ' Madina Ogbojo',
       ' Adenta ssnit flat', ' Lashibi spintex', ' Spintex',
       ' Adjiringanor', ' Airport residential area', ' Cantonment',
       ' Botwe school junction', ' McCa

In [486]:
df['sub'] = df['sub'].replace({' Adjiriganor':"Legon",' Shiashie':"Legon",' Adjiringanor':"Legon",' American house':"Legon",' Adjringanor':"Legon",' ADJIRIGANOR':"Legon",' AMERICAN HOUSE':"Legon",' Westland':"Legon",
                   ' Adenta Sakora':"Adenta",' Adenta - Katamanso':"Adenta",' Adenta ssnit flat':"Adenta",' Adenta Amanfrom':'Adenta',' Adenta':"Adenta",' Adenta Housing Down':"Adenta",' Adenta pantang':"Adenta",
                   ' Cantonment/ Labone':"Cantonments",' Cantoments':"Cantonments",' Cantonment':"Cantonments"," Cantonments":"Cantonments",
                   ' Cambodia spintex':"Spintex",' Lashibi spintex':"Spintex",' Spintex':"Spintex",' SPINTEX COMMUNITY 18':"Spintex",' Spintex signboard':"Spintex",' SPINTEX COM 18':"Spintex", ' Spintex Coastal':"Spintex",' spintex':"Spintex",' Spintex Road':"Spintex",
                               ' Spintex sakumono estate':"Spintex",' Spintex Manet':"Spintex",' Spintex Greda estate':"Spintex",' Spintex Community 20':"Spintex",' Spintex ecobank':"Spintex",' Spintex kasspreko':"Spintex",
                   ' Tema Community 22':"Tema", ' Tema':"Tema",' Tema community 25':"Tema",' Teshie Lekma':"Tema",' Tema Community 19':"Tema",' Community 25':"Tema",' Golf City.Tema community25':"Tema",' Community24':"Tema",' Community 25 Tema':"Tema",' Off the Tema-Aflao road':"Tema",
                               ' Afienya Jerusalem.Tema':"Tema",' COMMUNITY 6':"Tema",' Afienya-Mataheko':"Tema",' Afienya':"Tema",' Tema Community 25':"Tema",' Tema community10':"Tema",' Afienya mataheko.Tema':"Tema",' Tema Aflao Road':"Tema",' Nungua':"Tema",' Ashiaman':"Tema",' Gbetsile Michel Camp.Tema':"Tema",' Community 25 Devtraco':"Tema",
                               ' Oyarifa special':"Oyarifa",' Oyarifa':"Oyarifa",' Oyarifa Focos':"Oyarifa",' Oyarifa - Special Ice':"Oyarifa",' OYARIFA ayi mensah':"Oyarifa",
                  ' Airport West':"Airport",' East Airport':"Airport",' Airport':"Airport",' Airport Residential Area':"Airport",' AIRPORT RESIDENTIAL AREA':"Airport",' Airport Hill':"Airport",' Airport residential area':"Airport",' Airport Residential Area':"Airport",' Airport Hills':"Airport"
                  })

In [487]:
df['sub'].unique()

array([' Lakeside Estate', 'Tema', 'Legon', 'Cantonments', 'Airport',
       ' Tse-Addo (Near Cantonments & Labone)', ' Accra',
       ' Sapeiman( Amasaman)', ' Fise(Amasaman)', ' Tantra Hills',
       ' Mataheko', ' Roman Ridge', 'Oyarifa', ' Abokobi',
       ' Old Ashongman', ' Botwe', ' Amasaman', ' Lakeside', ' TSEADDO',
       ' Airport residential Area', ' Amrahia', ' Sakumono', 'Spintex',
       ' Tseaddo', ' Pantang', ' Keabenya ACP', ' Roman Rudge', ' Labone',
       ' Hatso', ' Tantra hill', ' Kashibj', ' Katamanso', 'Adenta',
       ' Trasacco', ' Madina Ogbojo', ' Botwe school junction',
       ' McCarthy Hill', ' Botwe Pentecost', ' Pantang junction',
       ' BOTWE SCHOOL JUNCTION', ' Abokobi Boi', ' Trassaco', ' Musuku',
       ' North Ridge', ' Achimota Tantra-Hills', ' Lapaz',
       ' Lakeside Estate Community 5', ' Dome Pillar 2',
       ' Ashaley Botwe Sch Junct. Near NanaKrom Melcom', ' Burma Hills',
       ' Okpoi-Gonno', ' Ashley Botwe', ' Trassaco Valley',
     

In [488]:
pd.DataFrame(df['sub'].value_counts()).head(20)

Unnamed: 0,sub
Legon,112
Tema,28
Spintex,27
Airport,25
Oyarifa,24
Cantonments,12
Adenta,12
Abokobi,6
Tseaddo,5
Lakeside Estate,5


In [489]:
def subwrangler2(x):
    if x not in ['Legon',"Spintex","Tema","Oyarifa","Adenta","Airport","Cantonments"]:
        return "Other"
    else:
        return x

In [490]:
df['sub'] = df['sub'].apply(subwrangler2)

In [491]:
df.head()

Unnamed: 0,_id,category,price,other_details,images,currency,duration,region,city,sub,no_of_bedrooms,no_of_bathrooms
0,64c8106ad2b80fc9a5e08ff1,for sale,125000.0,Appliances\nWater Heater\nWardrobes\nAmenities...,[https://efiewura.com/img/properties/executive...,GHS,,Greater Accra Region,Accra,Other,4,4
2,64c8106dd2b80fc9a5e08ff3,for sale,165000.0,,[https://efiewura.com/img/properties/3_bedroom...,GHS,,Greater Accra Region,Other,Tema,3,3
3,64c8106fd2b80fc9a5e08ff4,for sale,100000.0,,[https://efiewura.com/img/properties/3_bedroom...,GHS,,Greater Accra Region,Other,Tema,3,3
4,64c81070d2b80fc9a5e08ff5,for sale,280000.0,,[https://efiewura.com/img/properties/3_bedroom...,USD,,Greater Accra Region,Other,Tema,3,2
5,64c81072d2b80fc9a5e08ff6,for sale,250000.0,Appliances\nDishwasher\nMicrowave\nAir Conditi...,[https://efiewura.com/img/properties/4_bedroom...,USD,,Greater Accra Region,Accra,Legon,4,5


#### currency

In [492]:
df['currency'].unique()

array(['GHS', 'USD', 'GHC'], dtype=object)

In [493]:
df['currency_value'] = df['currency'].map({"USD":11.29,"GHC":1,"GHS":1})

In [498]:
df['price'] = df['price']*df['currency_value']

In [499]:
df.head()

Unnamed: 0,_id,category,price,other_details,images,currency,duration,region,city,sub,no_of_bedrooms,no_of_bathrooms,currency_value
0,64c8106ad2b80fc9a5e08ff1,for sale,125000.0,Appliances\nWater Heater\nWardrobes\nAmenities...,[https://efiewura.com/img/properties/executive...,GHS,,Greater Accra Region,Accra,Other,4,4,1.0
2,64c8106dd2b80fc9a5e08ff3,for sale,165000.0,,[https://efiewura.com/img/properties/3_bedroom...,GHS,,Greater Accra Region,Other,Tema,3,3,1.0
3,64c8106fd2b80fc9a5e08ff4,for sale,100000.0,,[https://efiewura.com/img/properties/3_bedroom...,GHS,,Greater Accra Region,Other,Tema,3,3,1.0
4,64c81070d2b80fc9a5e08ff5,for sale,3161200.0,,[https://efiewura.com/img/properties/3_bedroom...,USD,,Greater Accra Region,Other,Tema,3,2,11.29
5,64c81072d2b80fc9a5e08ff6,for sale,2822500.0,Appliances\nDishwasher\nMicrowave\nAir Conditi...,[https://efiewura.com/img/properties/4_bedroom...,USD,,Greater Accra Region,Accra,Legon,4,5,11.29


In [500]:
df['currency'] = df['currency'].replace("GHS","GHC")

In [501]:
df['currency'].unique()

array(['GHC', 'USD'], dtype=object)

In [502]:
df = df.rename(columns = {"currency":"accepted_currency"})

In [515]:
df = df.drop(columns=['region','_id','currency_value','category',"duration"],axis = 1)

KeyError: "['region', '_id', 'currency_value', 'category'] not found in axis"

In [508]:
df.isna().sum()

price                0
other_details        0
images               0
accepted_currency    0
duration             0
city                 0
sub                  0
no_of_bedrooms       5
no_of_bathrooms      5
dtype: int64

In [509]:
df[df['other_details'] == ""]

Unnamed: 0,price,other_details,images,accepted_currency,duration,city,sub,no_of_bedrooms,no_of_bathrooms
2,165000.0,,[https://efiewura.com/img/properties/3_bedroom...,GHC,,Other,Tema,3,3
3,100000.0,,[https://efiewura.com/img/properties/3_bedroom...,GHC,,Other,Tema,3,3
4,3161200.0,,[https://efiewura.com/img/properties/3_bedroom...,USD,,Other,Tema,3,2
243,1129000.0,,[https://efiewura.com/img/properties/3_bedroom...,USD,,Accra,Other,3,3
300,846750.0,,[https://efiewura.com/img/properties/two_bedro...,USD,,Other,Tema,2,2
301,1072550.0,,[https://efiewura.com/img/properties/three_bed...,USD,,Other,Tema,3,3
372,4064400.0,,[https://efiewura.com/img/properties/executive...,USD,,Accra,Other,4,4
377,3387000.0,,[https://efiewura.com/img/properties/6_bedroom...,USD,,Accra,Other,14,14
379,6762710.0,,[https://efiewura.com/img/properties/executive...,USD,,Accra,Tema,5,5
383,2258000.0,,[https://efiewura.com/img/properties/4_bedroom...,USD,,Accra,Other,4,4


In [512]:
df['other_details'] = df['other_details'].replace("",float("nan"))

In [513]:
df.isna().sum()

price                 0
other_details        11
images                0
accepted_currency     0
duration              0
city                  0
sub                   0
no_of_bedrooms        5
no_of_bathrooms       5
dtype: int64

In [521]:
df.dropna().to_excel("cleaned_data.xlsx")

KeyError: 'house_details'