### New version refactored into class object
*** WORK IN PROGRESS ***
#### TO DO
* Add in extra script to go into individual listings & get long - lat coordinates.

In [120]:
from lxml import html, etree
import requests
import pandas as pd
import datetime as dt

class rightmove_data(object):

    def __init__(self, url):
        self.url = url

    def rent_or_sale(self):
        try:
            start,end = str(self.url).split('www.rightmove.co.uk/property-')
            if end[:3] == 'for':
                return end[4:8]
            elif end[:3] == 'to-':
                return end[3:7]
        except ValueError:
            print("ERROR! Not a valid rightmove search URL.")

    def number_of_results(self):
        page = requests.get(self.url)
        tree = html.fromstring(page.content)
        xp_result_count = '//span[@class="searchHeader-resultCount"]/text()'
        return int(tree.xpath(xp_result_count)[0].replace(",", ""))
    
    def number_of_result_pages(self):
        page_count = self.number_of_results() / 24
        if self.number_of_results() % 24 > 0:
            page_count += 1
            
        # Rightmove will only actually return a maximum of 42 pages of results, hence:
        if page_count >42: page_count = 42
            
        return page_count
    
    def one_page_results(self,page_url):
        
        if self.rent_or_sale() == 'rent':
            xp_prices = '//span[@class="propertyCard-priceValue"]/text()'
        elif self.rent_or_sale() == 'sale':
            xp_prices = '//div[@class="propertyCard-priceValue"]/text()'
        
        xp_titles = '//div[@class="propertyCard-details"]//a[@class="propertyCard-link"]\
        //h2[@class="propertyCard-title"]/text()'
        xp_addresses = '//address[@class="propertyCard-address"]/text()'
        xp_weblinks = '//div[@class="propertyCard-details"]//a[@class="propertyCard-link"]/@href'

        page = requests.get(page_url)
        tree = html.fromstring(page.content)

        price_pcm, titles, addresses, weblinks = [], [], [], []

    # Create data lists from Xpaths
        for val in tree.xpath(xp_prices):
            price_pcm.append(val)
        for val in tree.xpath(xp_titles):
            titles.append(val)
        for val in tree.xpath(xp_addresses):
            addresses.append(val)
        for val in tree.xpath(xp_weblinks):
            weblinks.append('http://www.rightmove.co.uk'+val)

    # Convert data to temporary DataFrame
        data = [price_pcm, titles, addresses, weblinks]
        temp_df = pd.DataFrame(data)
        temp_df = temp_df.transpose()
        temp_df.columns = ['price','type','address','url']
        
    # Drop empty rows from DataFrame which come from placeholders in rightmove html
    # Note this url is correct for Rent or Sale searches.
        temp_df = temp_df[temp_df.url != 'http://www.rightmove.co.uk/property-for-sale/property-0.html']

        return temp_df
    
    def full_results(self):
        
        full_results = pd.DataFrame(columns={'price','type','address','url'})
                
        for i in range(0,self.number_of_result_pages()+1,1):
            iteration_url = str(self.url) + '&index=' + str((i*24))
            temp_df = self.one_page_results(iteration_url)
            frames = [full_results,temp_df]
            full_results = pd.concat(frames)
            
        # Tidy up results data frame for analysis
        
        full_results = full_results.reset_index(drop=True)
            
        # Convert price column to numeric values for analysis
        full_results.price.replace(regex=True,inplace=True,to_replace=r'\D',value=r'')
        full_results.price = pd.to_numeric(full_results.price)

        # Extract postcode stems to a separate column
        full_results['postcode'] = full_results['address'].str.extract\
        (r'\b([A-Za-z][A-Za-z]?[0-9][0-9]?[A-Za-z]?)\b',expand=True)

    # Extract number of bedrooms from 'type' to a separate column
        full_results['number_bedrooms'] = full_results.type.str.extract(r'\b([\d][\d]?)\b',expand=True)
        full_results.loc[full_results['type'].str.contains('studio',case=False),'number_bedrooms']=0

    # Add in search_date column to record the date the search was run (i.e. today's date)
        now = dt.datetime.today().strftime("%d/%m/%Y")
        full_results['search_date'] = now

        return full_results
        

## TESTING

In [121]:
all_london_rent_url = 'http://www.rightmove.co.uk/property-to-rent/find.html?searchType=RENT&locationIdentifier\
=REGION%5E87490&insId=3&radius=0.0&minPrice=&maxPrice=&minBedrooms=&maxBedrooms=&displayPropertyType=\
&maxDaysSinceAdded=&sortByPriceDescending=&_includeLetAgreed=on&primaryDisplayPropertyType=\
&secondaryDisplayPropertyType=&oldDisplayPropertyType=&oldPrimaryDisplayPropertyType=&letType=\
&letFurnishType=&houseFlatShare=false'

london_fields_buy_url = 'http://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=\
REGION%5E70417&propertyTypes=detached%2Csemi-detached%2Cterraced%2Cflat%2Cbungalow&includeSSTC=false'

fake_url = 'www.fakewebsite.com'

rent_data = rightmove_data(all_london_rent_url)
buy_data = rightmove_data(london_fields_buy_url)

#### Rent testing

In [122]:
rent_data.rent_or_sale()

'rent'

In [123]:
rent_data.number_of_results()

60244

In [124]:
rent_data.number_of_result_pages()

42

In [125]:
rent_data.one_page_results(rent_data.url)

Unnamed: 0,price,type,address,url
0,"£2,097 pcm",3 bedroom flat,"Telferscot Road, SW12",http://www.rightmove.co.uk/property-to-rent/pr...
1,"£1,000 pcm",2 bedroom flat,"Sidney Road, South Norwood, SE25",http://www.rightmove.co.uk/property-to-rent/pr...
2,"£1,200 pcm",1 bedroom flat,"North End Road, Golders Green, London, NW11 7HT",http://www.rightmove.co.uk/property-to-rent/pr...
3,"£1,000 pcm",1 bedroom flat,Rushey Green London SE6,http://www.rightmove.co.uk/property-to-rent/pr...
4,£900 pcm,Studio flat,Anerley Road London SE20,http://www.rightmove.co.uk/property-to-rent/pr...
5,"£1,050 pcm",1 bedroom flat,Maberley Road Upper Norwood SE19,http://www.rightmove.co.uk/property-to-rent/pr...
6,"£1,625 pcm",1 bedroom flat,"Southstand Highbury Stadium Square, London, N5",http://www.rightmove.co.uk/property-to-rent/pr...
7,"£3,012 pcm",3 bedroom apartment,"Gloucester Terrace, Powell House, W2",http://www.rightmove.co.uk/property-to-rent/pr...
8,"£1,993 pcm",2 bedroom maisonette,"Hazlebury Road, Fulham, London, SW6",http://www.rightmove.co.uk/property-to-rent/pr...
9,"£2,167 pcm",3 bedroom maisonette,"Lysia Street, Fulham, London, SW6",http://www.rightmove.co.uk/property-to-rent/pr...


#### Sale testing

In [126]:
buy_data.rent_or_sale()

'sale'

In [131]:
buy_data.number_of_results()

169

In [128]:
buy_data.number_of_result_pages()

8

In [129]:
buy_data.one_page_results(buy_data.url)

Unnamed: 0,price,type,address,url
0,"£465,000",2 bedroom flat for sale,"Well Street, London",http://www.rightmove.co.uk/property-for-sale/p...
1,POA,8 bedroom apartment for sale,"King Edward's Road, London",http://www.rightmove.co.uk/property-for-sale/p...
2,"£2,350,000",6 bedroom house for sale,"Forest Road, Hackney, E8",http://www.rightmove.co.uk/property-for-sale/p...
3,"£2,350,000",6 bedroom semi-detached house for sale,"Forest Road, London, E8",http://www.rightmove.co.uk/property-for-sale/p...
4,"£2,350,000",4 bedroom terraced house for sale,"Albion Square, Hackney, E8",http://www.rightmove.co.uk/property-for-sale/p...
5,"£2,000,000",Detached house for sale,"76 Shrubland Road, London, Hackney, E8 4NH",http://www.rightmove.co.uk/property-for-sale/p...
6,"£1,975,000",3 bedroom house for sale,"Malvern Road, London Fields, E8",http://www.rightmove.co.uk/property-for-sale/p...
7,"£1,900,000",4 bedroom terraced house for sale,"Elrington Road, Hackney",http://www.rightmove.co.uk/property-for-sale/p...
8,"£1,825,000",4 bedroom end of terrace house for sale,"Lavender Grove, London, E8",http://www.rightmove.co.uk/property-for-sale/p...
9,"£1,775,000",4 bedroom terraced house for sale,"Greenwood Road, Hackney, E8",http://www.rightmove.co.uk/property-for-sale/p...


In [130]:
buy_data.full_results()

Unnamed: 0,address,price,type,url,postcode,number_bedrooms,search_date
0,"Well Street, London",465000.0,2 bedroom flat for sale,http://www.rightmove.co.uk/property-for-sale/p...,,2,09/03/2017
1,"King Edward's Road, London",,8 bedroom apartment for sale,http://www.rightmove.co.uk/property-for-sale/p...,,8,09/03/2017
2,"Albion Square, Hackney, E8",2350000.0,4 bedroom terraced house for sale,http://www.rightmove.co.uk/property-for-sale/p...,E8,4,09/03/2017
3,"Forest Road, London, E8",2350000.0,6 bedroom semi-detached house for sale,http://www.rightmove.co.uk/property-for-sale/p...,E8,6,09/03/2017
4,"Forest Road, Hackney, E8",2350000.0,6 bedroom house for sale,http://www.rightmove.co.uk/property-for-sale/p...,E8,6,09/03/2017
5,"76 Shrubland Road, London, Hackney, E8 4NH",2000000.0,Detached house for sale,http://www.rightmove.co.uk/property-for-sale/p...,E8,,09/03/2017
6,"Malvern Road, London Fields, E8",1975000.0,3 bedroom house for sale,http://www.rightmove.co.uk/property-for-sale/p...,E8,3,09/03/2017
7,"Elrington Road, Hackney",1900000.0,4 bedroom terraced house for sale,http://www.rightmove.co.uk/property-for-sale/p...,,4,09/03/2017
8,"Lavender Grove, London, E8",1825000.0,4 bedroom end of terrace house for sale,http://www.rightmove.co.uk/property-for-sale/p...,E8,4,09/03/2017
9,"Greenwood Road, Hackney, E8",1775000.0,4 bedroom terraced house for sale,http://www.rightmove.co.uk/property-for-sale/p...,E8,4,09/03/2017
