# Data Cleaning

In [1]:
from datetime import date
import pandas as pd
import numpy as np


In [2]:
#Reading data retrieved from the rightmove 
data = pd.read_csv('Combined_data_230219.csv')
data.head()

Unnamed: 0,selection1_name,selection1_prices,selection1_house_type,selection1_date,selection1_bedrooms,selection1_selection3
0,"10, Lord Chancellor Walk, Kingston Upon Thames...","£1,050,000","Detached, Freehold, Residential",19-Dec-18,5 bedrooms,(0.7 miles)
1,"16, Clevedon Road, Kingston Upon Thames, Great...","£905,000","Detached, Freehold, Residential",04-Dec-18,5 bedrooms,(0.2 miles)
2,"14, Albion Road, Kingston Upon Thames, Greater...","£1,225,000","Detached, Freehold, Residential",03-Dec-18,4 bedrooms,(0.5 miles)
3,"Chase End, Fitzgeorge Avenue, New Malden, Grea...","£1,030,000","Detached, Freehold, Residential",30-Nov-18,4 bedrooms,(0.9 miles)
4,"55, Cobham Road, Kingston Upon Thames, Greater...","£910,000","Detached, Freehold, Residential",22-Nov-18,4 bedrooms,(0.1 miles)


### Features available are 

1.selection1_name - ADDRESS<br>
2.selection1_prices - SALE PRICE<br>
3.selection1_house_type - DETAILS which include HOUSE TYPE,FREHOLD/LEASEHOLD , RESIDENTIAL-NEW/OLD<br>
4.selection1_date - SALE DATE<br>
5.selection1_bedrooms - BEDROOMS<br>
6.selection_selection3 - DISTANCE TO THE NEAREST STATION in miles<br>

In [3]:
#columns names obtained from the retrieved data which needed to be changed into the required form
data.columns = ['Address','Sale_price','House_types','Sale_date','Bedrooms','Stationdist_miles']

In [4]:
data.columns

Index(['Address', 'Sale_price', 'House_types', 'Sale_date', 'Bedrooms',
       'Stationdist_miles'],
      dtype='object')

In [5]:
#Splitting the house type into three seperate columns
data[['House_type','Freehold_leasehold','Res_new_old']] = data['House_types'].str.split(',', expand=True)
data = data.drop('House_types', axis=1)

In [6]:
#Cleaning for  ADDRESS column[0] and seperating POSTCODE to a new column
data['Postcode']=data['Address'].str.rsplit(',').str[-1]
data['Postcode'] = data['Postcode'].str.replace("Greater London","")
data['Address']=data['Address'].str.replace(' Kingston Upon Thames,','')
data['Address']=data['Address'].str.replace('Greater London','')
data['Address']=data['Address'].str.replace('London','')
data['Address']=data['Address'] .map(lambda x: str(x)[:-10])

# Cleaning for saleprice,Bedrooms and stationsdist_miles columns
data['Bedrooms'] = data['Bedrooms'].str.replace("bedrooms","")
data['Bedrooms'] = data['Bedrooms'].str.replace("bedroom","")
data['Stationdist_miles'] = data['Stationdist_miles'].str.replace("(","")
data['Stationdist_miles'] = data['Stationdist_miles'].str.replace("miles","")
data['Stationdist_miles'] = data['Stationdist_miles'].str.replace(")","")
data['Sale_price']=data['Sale_price'].str.replace(',','')
data['Sale_price'] = data['Sale_price'].str.replace("£","")


In [7]:
#SPLITTING POSTCODE INTO KT1 AND KT2 region 
data['KT1_KT2']=data['Postcode'].map(lambda x: str(x)[:-3])

In [8]:
#Seperating the sale date into two seperate columns with sale year and sale month
data['Sale_year']=pd.DatetimeIndex(data['Sale_date']).year
data['Sale_month']=pd.DatetimeIndex(data['Sale_date']).month
data.head()

Unnamed: 0,Address,Sale_price,Sale_date,Bedrooms,Stationdist_miles,House_type,Freehold_leasehold,Res_new_old,Postcode,KT1_KT2,Sale_year,Sale_month
0,"10, Lord Chancellor Walk",1050000,19-Dec-18,5,0.7,Detached,Freehold,Residential,KT2 7HG,KT2,2018,12
1,"16, Clevedon Road",905000,04-Dec-18,5,0.2,Detached,Freehold,Residential,KT1 3AD,KT1,2018,12
2,"14, Albion Road",1225000,03-Dec-18,4,0.5,Detached,Freehold,Residential,KT2 7BZ,KT2,2018,12
3,"Chase End, Fitzgeorge Avenue, New Malden",1030000,30-Nov-18,4,0.9,Detached,Freehold,Residential,KT3 4SH,KT3,2018,11
4,"55, Cobham Road",910000,22-Nov-18,4,0.1,Detached,Freehold,Residential,KT1 3AE,KT1,2018,11


## Treating missing values

Columns named Bedrooms and Stationdist_miles seemed to have missing values

In [9]:
#Replacing the white spaces with nan
data = data.replace(r'^\s+$', np.nan, regex=True)
#This shows the number of null values in street_name,Bedrooms
null_columns=data.columns[data.isnull().any()]
data[null_columns].isnull().sum()

Bedrooms             1315
Stationdist_miles    1368
dtype: int64

In [10]:
#filling the null values for bedrooms 
#year wise sorting with respect to sale price and filling values

data_2014=data[data['Sale_year']==2014]
data_2014_sorted=data_2014.sort_values('Sale_price')
data_2014_sorted['Bedrooms'].fillna(method='backfill',inplace=True) 
data_2014_sorted['Bedrooms'].fillna(method='ffill',inplace=True) 

data_2015=data[data['Sale_year']==2015]
data_2015_sorted=data_2015.sort_values('Sale_price')
data_2015_sorted['Bedrooms'].fillna(method='backfill',inplace=True) 
data_2015_sorted['Bedrooms'].fillna(method='ffill',inplace=True)

data_2016=data[data['Sale_year']==2016]
data_2016_sorted=data_2016.sort_values('Sale_price')
data_2016_sorted['Bedrooms'].fillna(method='backfill',inplace=True) 
data_2016_sorted['Bedrooms'].fillna(method='ffill',inplace=True)

data_2017=data[data['Sale_year']==2017]
data_2017_sorted=data_2017.sort_values('Sale_price')
data_2017_sorted['Bedrooms'].fillna(method='backfill',inplace=True) 
data_2017_sorted['Bedrooms'].fillna(method='ffill',inplace=True) 

data_2018=data[data['Sale_year']==2018]
data_2018_sorted=data_2018.sort_values('Sale_price')
data_2018_sorted['Bedrooms'].fillna(method='backfill',inplace=True) 
data_2018_sorted['Bedrooms'].fillna(method='ffill',inplace=True)  

data_update= pd.concat([data_2014_sorted, data_2018_sorted,data_2017_sorted,data_2016_sorted,data_2015_sorted])
len(data_update)

3643

In [11]:
#recheck null values count 
null_columns=data_update.columns[data_update.isnull().any()]
data_update[null_columns].isnull().sum()

Stationdist_miles    1368
dtype: int64

In [12]:
data_update['Bedrooms'].value_counts()

2     1281
3     1015
4      507
1      416
5      318
6       74
0       16
7       15
8        1
Name: Bedrooms, dtype: int64

In [13]:
#sorting according to postcode and filling stationdist_miles missing values
data_update=data_update.sort_values('Postcode')
#using backfill to fill missing values in dist_to_station
data_update['Stationdist_miles'].fillna(method='backfill',inplace=True) 


In [14]:
#rechecikng again to look for null values
null_columns=data_update.columns[data_update.isnull().any()]
data_update[null_columns].isnull().sum()

Series([], dtype: float64)

In [15]:
data_update.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3643 entries, 895 to 3248
Data columns (total 12 columns):
Address               3643 non-null object
Sale_price            3643 non-null object
Sale_date             3643 non-null object
Bedrooms              3643 non-null object
Stationdist_miles     3643 non-null object
House_type            3643 non-null object
Freehold_leasehold    3643 non-null object
Res_new_old           3643 non-null object
Postcode              3643 non-null object
KT1_KT2               3643 non-null object
Sale_year             3643 non-null int64
Sale_month            3643 non-null int64
dtypes: int64(2), object(10)
memory usage: 370.0+ KB


In [16]:
#Reordering the columns
columnsreorder=['Address','Postcode','KT1_KT2','Sale_price','Bedrooms','House_type','Freehold_leasehold','Res_new_old',
                'Stationdist_miles','Sale_date','Sale_year','Sale_month']
data_update = data_update.reindex(columns=columnsreorder)
#Renaming Postcode to POSTCODE to merge with postcode info .csv 
data_update=data_update.rename(columns={"Postcode":"POSTCODE"})


In [17]:
data_update.head()

Unnamed: 0,Address,POSTCODE,KT1_KT2,Sale_price,Bedrooms,House_type,Freehold_leasehold,Res_new_old,Stationdist_miles,Sale_date,Sale_year,Sale_month
895,"Flat 40, Elder House, 4, Water Lane",KT1 1AE,KT1,435000,2,Flat,Leasehold,Residential,0.4,05-Oct-15,2015,10
845,"Flat 50, Elder House, 4, Water Lane",KT1 1AE,KT1,1295000,4,Flat,Leasehold,Residential,0.4,05-Feb-16,2016,2
577,"Flat 35, Elder House, 4, Water Lane",KT1 1AE,KT1,550000,2,Flat,Leasehold,Residential,0.4,19-Oct-17,2017,10
522,"Flat 47, Elder House, 4, Water Lane",KT1 1AE,KT1,550000,5,Flat,Leasehold,Residential,0.4,20-Apr-18,2018,4
723,"Flat 1, Elder House, 4, Water Lane",KT1 1AE,KT1,143750,2,Flat,Leasehold,Residential,0.4,06-Oct-16,2016,10


In [18]:
#saving the updated  dataframwe to a csv named KINGSTON_UPON_THAMES_REALDATA
data_update.to_csv('KINGSTON_UPON_THAMES_REALDATA.csv',index=False)

In [19]:
df_readcsv=pd.read_csv('KINGSTON_UPON_THAMES_REALDATA.csv')

In [20]:
def trim(dataset):
    trim = lambda x: x.strip() if type(x) is str else x
    return dataset.applymap(trim)
df_readcsv = trim(df_readcsv)
df_readcsv


Unnamed: 0,Address,POSTCODE,KT1_KT2,Sale_price,Bedrooms,House_type,Freehold_leasehold,Res_new_old,Stationdist_miles,Sale_date,Sale_year,Sale_month
0,"Flat 40, Elder House, 4, Water Lane",KT1 1AE,KT1,435000,2,Flat,Leasehold,Residential,0.4,05-Oct-15,2015,10
1,"Flat 50, Elder House, 4, Water Lane",KT1 1AE,KT1,1295000,4,Flat,Leasehold,Residential,0.4,05-Feb-16,2016,2
2,"Flat 35, Elder House, 4, Water Lane",KT1 1AE,KT1,550000,2,Flat,Leasehold,Residential,0.4,19-Oct-17,2017,10
3,"Flat 47, Elder House, 4, Water Lane",KT1 1AE,KT1,550000,5,Flat,Leasehold,Residential,0.4,20-Apr-18,2018,4
4,"Flat 1, Elder House, 4, Water Lane",KT1 1AE,KT1,143750,2,Flat,Leasehold,Residential,0.4,06-Oct-16,2016,10
5,"Flat 42, Elder House, 4, Water Lane",KT1 1AE,KT1,765000,4,Flat,Leasehold,Residential,0.4,20-Sep-16,2016,9
6,"Flat 93, Garricks House, Wadbrook Street",KT1 1AZ,KT1,515000,2,Flat,Leasehold,Residential,0.4,27-Apr-18,2018,4
7,"Flat 100, Garricks House, Wadbrook Street",KT1 1AZ,KT1,515000,2,Flat,Leasehold,Residential,0.4,23-Aug-18,2018,8
8,"Flat 110, Garricks House, Wadbrook Street",KT1 1AZ,KT1,800000,2,Flat,Leasehold,Residential,0.4,03-Apr-14,2014,4
9,"Flat 105, Garricks House, Wadbrook Street",KT1 1AZ,KT1,635000,2,Flat,Leasehold,Residential,0.4,21-Mar-16,2016,3


In [21]:
#reading the data retrieved for the post code details 
geo=pd.read_csv('geographical_postcode_kingston_upon_thames.csv')
geo.head(5)


Unnamed: 0,POSTCODE,In Use?,Latitude,Longitude,Easting,Northing,Grid Ref,Ward,Introduced,Terminated,Altitude
0,KT1 1AA,Yes,51.408688,-0.304465,518023,169104,TQ180691,Grove,01/02/1990,,12
1,KT1 1AB,No,51.41233,-0.302591,518144,169512,TQ181695,Grove,01/01/2005,01/08/2014,14
2,KT1 1AD,Yes,51.411211,-0.301136,518248,169390,TQ182693,Grove,01/03/2010,,13
3,KT1 1AE,Yes,51.412605,-0.307355,517812,169535,TQ178695,Grove,01/02/2011,,9
4,KT1 1AF,Yes,51.412477,-0.307208,517823,169521,TQ178695,Grove,01/02/2011,,10


In [22]:
#Merging the two datasets to form the final dataset
mer_data=pd.merge(df_readcsv,geo)
#Removing the columns which are not required for modelling 
del mer_data['Grid Ref']
del mer_data['Terminated']
del mer_data['In Use?']
del mer_data['Introduced']
# mer_data['Introduced']=mer_data['Introduced'].str.replace('/','-')
# mer_data['Introduced']=mer_data['Introduced'].map(lambda x: str(x)[3:])
mer_data

Unnamed: 0,Address,POSTCODE,KT1_KT2,Sale_price,Bedrooms,House_type,Freehold_leasehold,Res_new_old,Stationdist_miles,Sale_date,Sale_year,Sale_month,Latitude,Longitude,Easting,Northing,Ward,Altitude
0,"Flat 40, Elder House, 4, Water Lane",KT1 1AE,KT1,435000,2,Flat,Leasehold,Residential,0.4,05-Oct-15,2015,10,51.412605,-0.307355,517812,169535,Grove,9
1,"Flat 50, Elder House, 4, Water Lane",KT1 1AE,KT1,1295000,4,Flat,Leasehold,Residential,0.4,05-Feb-16,2016,2,51.412605,-0.307355,517812,169535,Grove,9
2,"Flat 35, Elder House, 4, Water Lane",KT1 1AE,KT1,550000,2,Flat,Leasehold,Residential,0.4,19-Oct-17,2017,10,51.412605,-0.307355,517812,169535,Grove,9
3,"Flat 47, Elder House, 4, Water Lane",KT1 1AE,KT1,550000,5,Flat,Leasehold,Residential,0.4,20-Apr-18,2018,4,51.412605,-0.307355,517812,169535,Grove,9
4,"Flat 1, Elder House, 4, Water Lane",KT1 1AE,KT1,143750,2,Flat,Leasehold,Residential,0.4,06-Oct-16,2016,10,51.412605,-0.307355,517812,169535,Grove,9
5,"Flat 42, Elder House, 4, Water Lane",KT1 1AE,KT1,765000,4,Flat,Leasehold,Residential,0.4,20-Sep-16,2016,9,51.412605,-0.307355,517812,169535,Grove,9
6,"Flat 93, Garricks House, Wadbrook Street",KT1 1AZ,KT1,515000,2,Flat,Leasehold,Residential,0.4,27-Apr-18,2018,4,51.408764,-0.307856,517787,169107,Grove,11
7,"Flat 100, Garricks House, Wadbrook Street",KT1 1AZ,KT1,515000,2,Flat,Leasehold,Residential,0.4,23-Aug-18,2018,8,51.408764,-0.307856,517787,169107,Grove,11
8,"Flat 110, Garricks House, Wadbrook Street",KT1 1AZ,KT1,800000,2,Flat,Leasehold,Residential,0.4,03-Apr-14,2014,4,51.408764,-0.307856,517787,169107,Grove,11
9,"Flat 105, Garricks House, Wadbrook Street",KT1 1AZ,KT1,635000,2,Flat,Leasehold,Residential,0.4,21-Mar-16,2016,3,51.408764,-0.307856,517787,169107,Grove,11


In [23]:
mer_data=mer_data.rename(columns={"POSTCODE":"Postcode"})
mer_data.columns

Index(['Address', 'Postcode', 'KT1_KT2', 'Sale_price', 'Bedrooms',
       'House_type', 'Freehold_leasehold', 'Res_new_old', 'Stationdist_miles',
       'Sale_date', 'Sale_year', 'Sale_month', 'Latitude', 'Longitude',
       'Easting', 'Northing', 'Ward', 'Altitude'],
      dtype='object')

In [24]:
#Removing the addresses containing postcodes beginning with SW15 and TW10
mer_data.tail(81)

Unnamed: 0,Address,Postcode,KT1_KT2,Sale_price,Bedrooms,House_type,Freehold_leasehold,Res_new_old,Stationdist_miles,Sale_date,Sale_year,Sale_month,Latitude,Longitude,Easting,Northing,Ward,Altitude
3286,"23, Westfield Court, Portsmouth Road, Surbiton",KT6 4HG,KT6,650000,3,Terraced,Leasehold,Residential,0.5,15-Mar-18,2018,3,51.398538,-0.309024,517732,167968,Grove,11
3287,"6, St Johns Terrace, ,",SW15 3PR,SW15,547500,4,Terraced,Freehold,Residential,0.5,13-Jul-18,2018,7,51.435128,-0.257390,521227,172121,Coombe Hill,15
3288,"4, Robin Hood Lane, ,",SW15 3PU,SW15,475600,3,Terraced,Freehold,Residential,1.8,27-Nov-15,2015,11,51.433419,-0.257987,521190,171930,Coombe Hill,16
3289,"33, Robin Hood Lane, ,",SW15 3PU,SW15,730000,3,Semi-Detached,Freehold,Residential,1.8,14-Apr-14,2014,4,51.433419,-0.257987,521190,171930,Coombe Hill,16
3290,"53, Robin Hood Way, ,",SW15 3PW,SW15,680000,3,Semi-Detached,Freehold,Residential,1.8,21-Mar-16,2016,3,51.433321,-0.257493,521225,171920,Coombe Hill,15
3291,"47, Robin Hood Way, ,",SW15 3PW,SW15,825000,5,Semi-Detached,Freehold,Residential,1.8,25-Nov-16,2016,11,51.433321,-0.257493,521225,171920,Coombe Hill,15
3292,"11, Robin Hood Way, ,",SW15 3PW,SW15,385000,3,Semi-Detached,Freehold,Residential,1.8,26-Sep-14,2014,9,51.433321,-0.257493,521225,171920,Coombe Hill,15
3293,"37, Robin Hood Way, ,",SW15 3PW,SW15,525000,2,Semi-Detached,Freehold,Residential,1.8,16-Mar-15,2015,3,51.433321,-0.257493,521225,171920,Coombe Hill,15
3294,"The Old School House, Robin Hood Lane, ,",SW15 3PY,SW15,875000,3,Semi-Detached,Freehold,Residential,1.8,30-Mar-16,2016,3,51.432943,-0.258667,521144,171876,Coombe Hill,16
3295,"6, Mary Adelaide Close, ,",SW15 3PZ,SW15,499950,3,Terraced,Freehold,Residential,1.8,26-Sep-14,2014,9,51.434212,-0.259281,521098,172016,Coombe Hill,18


In [25]:
mer_data=mer_data[:-80]
mer_data


Unnamed: 0,Address,Postcode,KT1_KT2,Sale_price,Bedrooms,House_type,Freehold_leasehold,Res_new_old,Stationdist_miles,Sale_date,Sale_year,Sale_month,Latitude,Longitude,Easting,Northing,Ward,Altitude
0,"Flat 40, Elder House, 4, Water Lane",KT1 1AE,KT1,435000,2,Flat,Leasehold,Residential,0.4,05-Oct-15,2015,10,51.412605,-0.307355,517812,169535,Grove,9
1,"Flat 50, Elder House, 4, Water Lane",KT1 1AE,KT1,1295000,4,Flat,Leasehold,Residential,0.4,05-Feb-16,2016,2,51.412605,-0.307355,517812,169535,Grove,9
2,"Flat 35, Elder House, 4, Water Lane",KT1 1AE,KT1,550000,2,Flat,Leasehold,Residential,0.4,19-Oct-17,2017,10,51.412605,-0.307355,517812,169535,Grove,9
3,"Flat 47, Elder House, 4, Water Lane",KT1 1AE,KT1,550000,5,Flat,Leasehold,Residential,0.4,20-Apr-18,2018,4,51.412605,-0.307355,517812,169535,Grove,9
4,"Flat 1, Elder House, 4, Water Lane",KT1 1AE,KT1,143750,2,Flat,Leasehold,Residential,0.4,06-Oct-16,2016,10,51.412605,-0.307355,517812,169535,Grove,9
5,"Flat 42, Elder House, 4, Water Lane",KT1 1AE,KT1,765000,4,Flat,Leasehold,Residential,0.4,20-Sep-16,2016,9,51.412605,-0.307355,517812,169535,Grove,9
6,"Flat 93, Garricks House, Wadbrook Street",KT1 1AZ,KT1,515000,2,Flat,Leasehold,Residential,0.4,27-Apr-18,2018,4,51.408764,-0.307856,517787,169107,Grove,11
7,"Flat 100, Garricks House, Wadbrook Street",KT1 1AZ,KT1,515000,2,Flat,Leasehold,Residential,0.4,23-Aug-18,2018,8,51.408764,-0.307856,517787,169107,Grove,11
8,"Flat 110, Garricks House, Wadbrook Street",KT1 1AZ,KT1,800000,2,Flat,Leasehold,Residential,0.4,03-Apr-14,2014,4,51.408764,-0.307856,517787,169107,Grove,11
9,"Flat 105, Garricks House, Wadbrook Street",KT1 1AZ,KT1,635000,2,Flat,Leasehold,Residential,0.4,21-Mar-16,2016,3,51.408764,-0.307856,517787,169107,Grove,11


In [26]:
#Since only KT1 and KT2 are considered for modeling checking whether unwanted postocde to remove it 
mer_data= mer_data[:-62]
mer_data.tail(10)

Unnamed: 0,Address,Postcode,KT1_KT2,Sale_price,Bedrooms,House_type,Freehold_leasehold,Res_new_old,Stationdist_miles,Sale_date,Sale_year,Sale_month,Latitude,Longitude,Easting,Northing,Ward,Altitude
3215,"40, Windmill Rise",KT2 7TU,KT2,390000,2,Flat,Leasehold,Residential,0.6,23-Mar-17,2017,3,51.421039,-0.280723,519642,170516,Coombe Hill,47
3216,"58, Windmill Rise",KT2 7TU,KT2,399999,2,Flat,Leasehold,Residential,0.6,28-Mar-17,2017,3,51.421039,-0.280723,519642,170516,Coombe Hill,47
3217,"44, Windmill Rise",KT2 7TU,KT2,385000,2,Flat,Leasehold,Residential,0.6,08-Jan-16,2016,1,51.421039,-0.280723,519642,170516,Coombe Hill,47
3218,"6, Windmill Rise",KT2 7TU,KT2,850000,3,Terraced,Freehold,Residential,0.6,31-Mar-16,2016,3,51.421039,-0.280723,519642,170516,Coombe Hill,47
3219,"12, Windmill Rise",KT2 7TU,KT2,650000,4,Terraced,Freehold,Residential,0.6,20-Feb-15,2015,2,51.421039,-0.280723,519642,170516,Coombe Hill,47
3220,"38, Windmill Rise",KT2 7TU,KT2,350000,3,Flat,Leasehold,Residential,0.9,07-Dec-18,2018,12,51.421039,-0.280723,519642,170516,Coombe Hill,47
3221,"7, Royal Court, Deer Park Close",KT2 7TW,KT2,330000,2,Flat,Leasehold,Residential,0.9,26-Feb-15,2015,2,51.418934,-0.282499,519524,170279,Coombe Hill,30
3222,"Flat 7, Dower House, 15, Manorgate Road",KT2 7UA,KT2,240000,5,Flat,Leasehold,Residential,0.9,14-Sep-15,2015,9,51.413947,-0.285835,519305,169719,Canbury,18
3223,"1, Moor Park Gardens",KT2 7UD,KT2,2650000,7,Detached,Freehold,Residential,0.9,17-Feb-17,2017,2,51.416814,-0.257641,521258,170084,Coombe Hill,40
3224,"22, Wonford Close",KT2 7XA,KT2,760000,2,Semi-Detached,Freehold,Residential,0.8,28-Sep-18,2018,9,51.415431,-0.25719,521293,169931,Coombe Hill,34


In [27]:
mer_data = trim(mer_data)
mer_data.to_csv('mer.csv',index=False)
#Final dataset is saved as mer.csv which is going to be used for further exploration and preprocessing steps

In [28]:
mer=pd.read_csv('mer.csv')

In [29]:
mer

Unnamed: 0,Address,Postcode,KT1_KT2,Sale_price,Bedrooms,House_type,Freehold_leasehold,Res_new_old,Stationdist_miles,Sale_date,Sale_year,Sale_month,Latitude,Longitude,Easting,Northing,Ward,Altitude
0,"Flat 40, Elder House, 4, Water Lane",KT1 1AE,KT1,435000,2,Flat,Leasehold,Residential,0.4,05-Oct-15,2015,10,51.412605,-0.307355,517812,169535,Grove,9
1,"Flat 50, Elder House, 4, Water Lane",KT1 1AE,KT1,1295000,4,Flat,Leasehold,Residential,0.4,05-Feb-16,2016,2,51.412605,-0.307355,517812,169535,Grove,9
2,"Flat 35, Elder House, 4, Water Lane",KT1 1AE,KT1,550000,2,Flat,Leasehold,Residential,0.4,19-Oct-17,2017,10,51.412605,-0.307355,517812,169535,Grove,9
3,"Flat 47, Elder House, 4, Water Lane",KT1 1AE,KT1,550000,5,Flat,Leasehold,Residential,0.4,20-Apr-18,2018,4,51.412605,-0.307355,517812,169535,Grove,9
4,"Flat 1, Elder House, 4, Water Lane",KT1 1AE,KT1,143750,2,Flat,Leasehold,Residential,0.4,06-Oct-16,2016,10,51.412605,-0.307355,517812,169535,Grove,9
5,"Flat 42, Elder House, 4, Water Lane",KT1 1AE,KT1,765000,4,Flat,Leasehold,Residential,0.4,20-Sep-16,2016,9,51.412605,-0.307355,517812,169535,Grove,9
6,"Flat 93, Garricks House, Wadbrook Street",KT1 1AZ,KT1,515000,2,Flat,Leasehold,Residential,0.4,27-Apr-18,2018,4,51.408764,-0.307856,517787,169107,Grove,11
7,"Flat 100, Garricks House, Wadbrook Street",KT1 1AZ,KT1,515000,2,Flat,Leasehold,Residential,0.4,23-Aug-18,2018,8,51.408764,-0.307856,517787,169107,Grove,11
8,"Flat 110, Garricks House, Wadbrook Street",KT1 1AZ,KT1,800000,2,Flat,Leasehold,Residential,0.4,03-Apr-14,2014,4,51.408764,-0.307856,517787,169107,Grove,11
9,"Flat 105, Garricks House, Wadbrook Street",KT1 1AZ,KT1,635000,2,Flat,Leasehold,Residential,0.4,21-Mar-16,2016,3,51.408764,-0.307856,517787,169107,Grove,11


## FEATURES OBTAINED

1.Address<br>
2.Postcode<br>
3.KT1/KT2<br>
4.Sale price <br>
5.Bedrooms<br>
6.House_type <br>
7.Freehold_leasehold <br>
8.Res_new_old <br>
9.Staiondist_miles(Station distance in miles)<br>
10.Sale_date<br>
11.Sale_year<br>
12.Sale_month<br>
13.Latitude<br>
14.Longitude<br>
15.Easting<br>
16.Northing<br>
17.Ward<br>
18.Altitude<br>
