### Imports

In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import sqlalchemy
import urllib.parse

### HTTP Request

#### store website in variable

In [3]:
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36'}

website = 'https://www.realtor.com/realestateandhomes-search/Texas'

#### Get Request

In [4]:
response = requests.get(website, headers=header)

#### Status Code

In [5]:
response

<Response [200]>

### Soup Object

In [6]:
soup = BeautifulSoup(response.content, 'html.parser')
soup

<!DOCTYPE html>
<html lang="en"><head><meta charset="utf-8"/><script>window.kxdl = { jy: '' };  window.rdcdl = window.kxdl;  !function(n,e,i){if(!n){n=n||{},window.permutive=n,n.q=[],n.config=i||{},n.config.apiKey=e,n.config.environment=n.config.environment||'production';for(var o=['addon','identify','track','trigger','query','segment','segments','ready','on','once','user','consent'],r=0;r<o.length;r++){var t=o[r];n[t]=function(e){return function(){var i=Array.prototype.slice.call(arguments,0);n.q.push({functionName:e,arguments:i})}}(t)}}}(window.permutive,'2336e88c-2653-4e72-9d49-c562730fd85f',{});(function(g,o){g[o]=g[o]||function(){(g[o]['q']=g[o]['q']||[]).push(arguments)},g[o]['t']=1*new Date})(window,'_googCsa');</script><script async="" data-testid="dfp" fetchpriority="auto" src="https://securepubads.g.doubleclick.net/tag/js/gpt.js"></script><script async="" data-testid="prebid" fetchpriority="auto" src="https://pbcs.realtor.com/p/realtor/realtor/pb.js"></script><script async=""

### Results

In [7]:
results = soup.find_all('div', {'class' : 'BasePropertyCard_propertyCard__JXqlE'})

In [8]:
len(results)

42

### Target necessary data

- title
- address
- bed
- bath
- price
- property_link 

#### title

In [9]:
results[0].find('div', {'data-testid' : 'card-description'}).get_text()

'For Sale'

#### address

In [10]:
results[0].find('div', {'class' : 'content-col-left'}).get_text()

'3441 Loop 1781 # 4DRockport, TX 78382'

#### bed

In [11]:
results[0].find('ul', {'data-testid' : 'card-meta'}).findChildren('span')[0].get_text().replace('+','')

'4'

#### bath

In [12]:
results[0].find('ul', {'data-testid' : 'card-meta'}).findChildren('span')[1].get_text()

'3'

#### price

In [13]:
results[0].find('div', {'class' : 'price-wrapper'}).findChildren('div')[0].get_text().replace('$','').replace(',','')

'432500'

#### property link

In [14]:
results[0].find('a', {'class' : 'LinkComponent_anchor__2uAhr'}).get('href')

'/realestateandhomes-detail/3441-Loop-1781-Unit-4D_Rockport_TX_78382_M98510-56670?from=srp-list-card'

### append 2 links

In [15]:
root_link = 'https://www.realtor.com'

#### append property link

In [16]:
property_link_partial = []
property_link_joined = []

for link_property in soup.findChildren('a', {'class' : 'LinkComponent_anchor__2uAhr'}):
    x = link_property.get('href')
    property_link_partial.append(x)
    
for link in property_link_partial:
    property_link_joined.append(urllib.parse.urljoin(root_link, link))

In [17]:
property_link_joined

['https://www.realtor.com/realestateandhomes-detail/3441-Loop-1781-Unit-4D_Rockport_TX_78382_M98510-56670?from=srp-list-card',
 'https://www.realtor.com/realestateandhomes-detail/3441-Loop-1781-Unit-4D_Rockport_TX_78382_M98510-56670?from=srp-list-card',
 'https://www.realtor.com/realestateandhomes-detail/116-Trail-Wood_New-Braunfels_TX_78130_M96844-97202?from=srp-list-card',
 'https://www.realtor.com/realestateandhomes-detail/116-Trail-Wood_New-Braunfels_TX_78130_M96844-97202?from=srp-list-card',
 'https://www.realtor.com/realestateandhomes-detail/5793-Highway-361-Park-53-Rd-Unit-314_Port-Aransas_TX_78373_M98666-70381?from=srp-list-card',
 'https://www.realtor.com/realestateandhomes-detail/5793-Highway-361-Park-53-Rd-Unit-314_Port-Aransas_TX_78373_M98666-70381?from=srp-list-card',
 'https://www.realtor.com/realestateandhomes-detail/An-County-Road-3855-Lot-101_Palestine_TX_75801_M99115-34254?from=srp-list-card',
 'https://www.realtor.com/realestateandhomes-detail/An-County-Road-3855-Lot

### Put everything together inside a For-Loop

In [18]:
title = []
address = []
bed = []
bath = []
price = []
property_link = []

root_link = 'https://www.realtor.com'

for result in results:
    
    #title
    title.append(result.find('div', {'data-testid' : 'card-description'}).get_text())
    
    #address
    address.append(result.find('div', {'class' : 'content-col-left'}).get_text())
                   
    #bed
    try:
        bed.append(float(result.find('ul', {'data-testid' : 'card-meta'}).findChildren('span')[0].get_text()))
    except:
        bed.append(0)
    #bath
    try:
        bath.append(float(result.find('ul', {'data-testid' : 'card-meta'}).findChildren('span')[1].get_text().replace('+','')))
    except:
        bath.append(0)
    
    #price
    price.append(float(result.find('div', {'class' : 'price-wrapper'}).findChildren('div')[0].get_text().replace('$','').replace(',','')))
    
    #property_link
    property_link=property_link_joined
    
  
    

In [19]:
title

['For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale',
 'For Sale']

### Create Pandas Dataframe

In [32]:
df_realtor_realestate = pd.DataFrame({'title':title, 'address':address, 'bedrooms':bed, 'bathrooms':bath, 'price_$':price})

In [33]:
df_realtor_realestate

Unnamed: 0,title,address,bedrooms,bathrooms,price_$
0,For Sale,"31037 King TerBulverde, TX 78163",5,4.5,839900.0
1,For Sale,"15200 Kent Justin DrAustin, TX 78725",4,2.5,489000.0
2,For Sale,"19923 Pennybacker Bridge DrCypress, TX 77433",3,2.5,405000.0
3,For Sale,"315 N HeltonRandolph, TX 75475",3,2.0,254999.0
4,For Sale,"1306 County Road 415Merkel, TX 79536",4,3.5,889000.0
5,For Sale,"2770 Wilson RdPalmer, TX 75152",3,2.5,699000.0
6,For Sale,"111 Clopton Lake RdWaxahachie, TX 75165",2,2.0,748000.0
7,For Sale,"63259 State Highway 132 NLytle, TX 78052",2,1.0,368000.0
8,For Sale,"2386 Santa Maria LnRobstown, TX 78380",3,2.5,575000.0
9,For Sale,"7038 Cole Creek DrHouston, TX 77092",3,2.0,295000.0


### Output in Excel

In [34]:
df_realtor_realestate.to_excel('realtor estate single.xlsx', index=False)

### Part 2 - Pagination - Scrape 50 Pages

In [28]:
title = []
address = []
bed = []
bath = []
price = []



for i in range (1,51):
    header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36'}
    
    website = 'https://www.realtor.com/realestateandhomes-search/Texas/pg-' + str (i)

    response = requests.get(website, headers=header)

    soup = BeautifulSoup(response.content, 'html.parser')
    
    results = soup.find_all('div', {'class' : 'BasePropertyCard_propertyCard__JXqlE'})
    
    
for result in results:
    
    #title
    try:
        title.append(result.find('div', {'data-testid' : 'card-description'}).get_text())
    except:
        title.append('')
        
    #address
    try:
        address.append(result.find('div', {'class' : 'content-col-left'}).get_text())
    except:
        address.append('')
        
    #bed
    try:
        bed.append(result.find('ul', {'data-testid' : 'card-meta'}).findChildren('span')[0].get_text())
    except:
        bed.append(0)
        
    #bath
    try:
        bath.append(float(result.find('ul', {'data-testid' : 'card-meta'}).findChildren('span')[1].get_text().replace('+','')))
    except:
        bath.append(0)
        
    #price
    try:
        price.append(float(result.find('div', {'class' : 'price-wrapper'}).findChildren('div')[0].get_text().replace('$','').replace(',','')))
    except:
        price.append(0)
    
    
df_realtor_realestate_multiple = pd.DataFrame({'title':title, 'address':address, 'bedrooms':bed, 'bathrooms':bath, 'price':price })
df_realtor_realestate_multiple

Unnamed: 0,title,address,bedrooms,bathrooms,price
0,For Sale,"31037 King TerBulverde, TX 78163",5,4.5,839900.0
1,For Sale,"15200 Kent Justin DrAustin, TX 78725",4,2.5,489000.0
2,For Sale,"19923 Pennybacker Bridge DrCypress, TX 77433",3,2.5,405000.0
3,For Sale,"315 N HeltonRandolph, TX 75475",3,2.0,254999.0
4,For Sale,"1306 County Road 415Merkel, TX 79536",4,3.5,889000.0
5,For Sale,"2770 Wilson RdPalmer, TX 75152",3,2.5,699000.0
6,For Sale,"111 Clopton Lake RdWaxahachie, TX 75165",2,2.0,748000.0
7,For Sale,"63259 State Highway 132 NLytle, TX 78052",2,1.0,368000.0
8,For Sale,"2386 Santa Maria LnRobstown, TX 78380",3,2.5,575000.0
9,For Sale,"7038 Cole Creek DrHouston, TX 77092",3,2.0,295000.0
