In [9]:
import pandas as pd
import urllib.parse
import re
import numpy as np

df = pd.read_csv('properties_data.csv')
df = df.drop_duplicates(subset='id')
df = df.drop(df[df["Type"].isin(["house group", "apartment group"])].index)

filtered_columns = ['id','location', 'Number of frontages', 'Type', 'Subtype', 
           'Price', 'Transaction Type', 'Bedrooms', 'Living area', 
           'Kitchen type', 'Furnished', 'How many fireplaces?', 'Terrace surface', 
           'Garden surface', 'Surface of the plot', 'Swimming pool', 'Building condition']
df = df[filtered_columns]

df = df.rename(columns={
    'location' :'Locality',
    'Transaction Type' : 'Type of sale',
    'Type' :'Type of property',
    'Subtype' : 'Subtype of property',
    'Number of frontages': 'Number of facades',
    'Bedrooms':'Number of rooms',
    'Surface of the plot' :'Surface of the land',
    'Garden surface':'Garden', 
    'Terrace surface':'Terrace',
    'Kitchen type' : 'Fully equipped kitchen',
    'How many fireplaces?' : 'Open fire'
})

#put the columns in a specific order
df = df.reindex(columns=['id', 'Locality', 'Type of property', 'Subtype of property',
                         'Type of sale', 'Price', 'Number of facades', 'Number of rooms', 
                         'Living area', 'Fully equipped kitchen', 'Furnished', 
                         'Surface of the land', 'Terrace', 'Garden', 'Open fire', 
                         'Swimming pool'])

def clean_and_convert(column):
    column = column.apply(lambda x: re.sub('\D+', '', str(x)))
    column = column.replace('', np.nan).fillna(0).astype(int)
    return column

df['Locality'] = df['Locality'].apply(urllib.parse.unquote)

df['Living area'] = clean_and_convert(df['Living area'])
df['Terrace'] = clean_and_convert(df['Terrace'])
df['Garden'] = clean_and_convert(df['Garden'])
df['Surface of the land'] = clean_and_convert(df['Surface of the land'])

df.to_csv('properties_clean.csv', index=False)


  df = pd.read_csv('properties_data.csv')


df.describe()

In [8]:
df.describe()

Unnamed: 0,id,Number of facades,Number of rooms,Living area,Surface of the land,Terrace,Garden,Open fire
count,18663.0,14235.0,18316.0,18663.0,18663.0,18663.0,18663.0,1013.0
mean,10420800.0,2.765508,3.029646,179.471093,697.502384,13.241012,275.565343,1.061204
std,231001.1,0.922045,1.657928,1297.974308,5669.115053,41.126757,7159.381176,0.414897
min,7710166.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0
25%,10386580.0,2.0,2.0,90.0,0.0,0.0,0.0,1.0
50%,10495570.0,2.0,3.0,134.0,0.0,0.0,0.0,1.0
75%,10547160.0,4.0,4.0,202.0,460.0,16.0,1.0,1.0
max,10579690.0,25.0,39.0,175173.0,500000.0,3286.0,900000.0,10.0
