**Import Libraries**

In [1]:
import pandas as pd

**Function to clean Rent Dataframe**

In [2]:
def houserent(x):

    #creation of new columns
    x['neighbourhood'] = x['location'].str.split("(", n = 1, expand = True)[0]
    x['location'] = x['location'].str.split("(", n = 1, expand = True)[1]
    x['districto'] = x['location'].str.split(".", n = 1, expand = True)[0]
    x['city'] = x['location'].str.split(".", n = 1, expand = True)[1]
    x['city'] = x['city'].str.rstrip(")")
    x['district'] = x['districto'].str.split(" ", n = 1, expand = True)[1]
    x['city'] = x['city'].str.rstrip(" Capital")
    x['city'] = x['city'].str.lstrip(" ")
    
    
    #reorder df
    x = x[['city','district','neighbourhood','rent_price','sq_mt','bedroom_n','bathroom_n']]

    
    #rename columns
    x = x.rename(columns={'rent_price':'rent_price_€','bedroom_n':'bedrooms','bathroom_n':'bathrooms'})


    #Clean variables in columns with foreign characters and change the data types accordingly
    x['rent_price_€'] = x['rent_price_€'].str.split(" ", n = 1, expand = True)[0]
    x['sq_mt'] = x['sq_mt'].str.split(" ", n = 1, expand = True)[0]
    x['rent_price_€'] = x['rent_price_€'].str.replace("A", "0")

    lops = []
    for i in x['rent_price_€']:
        if '.' in i:
            i = float(i)
            i = i*1000
            i = int(i)
            lops.append(i)
        else:
            i = int(i)
            lops.append(i)
    x['rent_price_€'] = lops

   
    #Strip neighbourhood column of spaces to better merge with latlong table. 
    x['neighbourhood'] = x['neighbourhood'].str.strip()
    
    
    #Drop null values
    x = x.dropna()
    
    
    #Change odd '2Ľ' value, representing 200
    met = []
    for i in x['sq_mt']:
        if '2Ľ'in i:
            met.append(200)
        else:
            met.append(i)
    x['sq_mt'] = met
    
    x['sq_mt'] = x['sq_mt'].astype(float)
    
    
    #Reset some useless information to 0 
    def strange(x):
        lost = []
        for i in x:
            if i.isnumeric() == False:
                lost.append('0')
            else:
                lost.append(i)
        x = lost
        return x
    
    x['bedrooms'] = strange(x['bedrooms'])
    x['bathrooms'] = strange(x['bathrooms'])
    
    x['bedrooms'] = x['bedrooms'].astype(int)
    x['bathrooms'] = x['bathrooms'].astype(int)
    
    
    #Assign listing type for future filtering
    x['type'] = 'rent'
    
    
    #split column neighbourhood on '-' and keep everything to the left
    x['neighbourhood'] = x['neighbourhood'].str.split("-", n = 1, expand = True)[0]


    ##DEALING WITH PROBLEM VALUES IN NEIGHBOURHOOD/DISTRICT WITH OTHER DFS##

    #Neighbourhood renaming values
    neighbs = []
    for i in x['neighbourhood']:
        if 'Valdebebas' in i:
            neighbs.append('Valdefuentes')
        elif 'Virgen del Cortijo' in i:
            neighbs.append('Manoteras')
        elif 'Las Águilas' in i:
            neighbs.append('Aguilas')
        elif 'Ensanche de Vallecas' in i:
            neighbs.append('Valdecarros')
        elif 'Los Ángeles' in i:
            neighbs.append('Los Angeles')
        elif 'San Cristóbal' in i:
            neighbs.append('San Cristobal')
        else:
            neighbs.append(i)
    x['neighbourhood'] = neighbs    


    #district replace '-' with ' - '        
    x['district'] = x['district'].str.replace("-", " - ")

    
    #District rename values
    dist = []
    for i in x['district']:
        if 'San Blas' in i:
            dist.append('San Blas - Canillejas')
        else:
            dist.append(i)
    x['district'] = dist

    
    #strip neighbourhood & district
    x['neighbourhood'] = x['neighbourhood'].str.strip()
    x['district'] = x['district'].str.strip()

    
    return x

In [3]:
rents = pd.read_csv('raw_data/Madrid Housing Market/madridrent.csv', encoding='maccentraleurope')
rents.head(2)

Unnamed: 0,rent_price,location,sq_mt,bedroom_n,bathroom_n
0,1.200 Ř/mes,Rosas (Distrito San Blas. Madrid Capital),105 m_,3,2
1,1.400 Ř/mes,Mirasierra-Arroyo del Fresno (Distrito Fuencar...,90 m_,3,1


In [4]:
rent = houserent(rents)
rent.head(2)

Unnamed: 0,city,district,neighbourhood,rent_price_€,sq_mt,bedrooms,bathrooms,type
0,Madrid,San Blas - Canillejas,Rosas,1200,105.0,3,2,rent
1,Madrid,Fuencarral - El Pardo,Mirasierra,1400,90.0,3,1,rent


**Merge dfs to get latitude and longitude information**

In [5]:
latlong = pd.read_csv('cleaned_data/latlong.csv')
latlong.head(2)

Unnamed: 0,district,neighbourhood,latitude,longitude
0,Arganzuela,Acacias,40.40097,-3.70109
1,Arganzuela,Atocha,40.40152,-3.68443


**Merge dataframes to include latitude and longitude for all districts and neighbourhoods**

In [6]:
MHR_Merged = pd.merge(rent,latlong, how='inner', left_on='neighbourhood', right_on='neighbourhood')
MHR_Merged = MHR_Merged.rename(columns={'district_x':'district'})
MHR_Merged = MHR_Merged[['city','district','latitude','longitude','neighbourhood','rent_price_€','sq_mt','bedrooms','bathrooms','type']]
MHR_Merged.head()

Unnamed: 0,city,district,latitude,longitude,neighbourhood,rent_price_€,sq_mt,bedrooms,bathrooms,type
0,Madrid,San Blas - Canillejas,40.42227,-3.60449,Rosas,1200,105.0,3,2,rent
1,Madrid,San Blas - Canillejas,40.42227,-3.60449,Rosas,1200,148.0,3,2,rent
2,Madrid,San Blas - Canillejas,40.42227,-3.60449,Rosas,1200,130.0,4,1,rent
3,Madrid,San Blas - Canillejas,40.42227,-3.60449,Rosas,1350,110.0,2,1,rent
4,Madrid,San Blas - Canillejas,40.42227,-3.60449,Rosas,1100,84.0,2,1,rent


**Export to Csv**

In [7]:
#MHR_Merged.to_csv('Madrid_Rents.csv', index = False)