### Imports

In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd 

### Put all together

In [5]:
# create empty dataframe
real_estate = pd.DataFrame(columns=['Street', 'Regions', 'Beds', 'Baths', 'Price'])

# scrape 3 pages

#sequence of numbers (1 up 3, 4 is not included)
for i in range(1,4):
    # website request
    website = requests.get('https://www.trulia.com/NY/New_York/' +str(i) + '_p/')

    # create soup object
    soup = BeautifulSoup(website.content, 'html.parser')

    # result items
    result = soup.find_all('li', {'class': 'SearchResultsList__WideCell-b7y9ki-2'})
    

    #update results
    results_update = []

    for r in result:
        if r.has_attr('data-testid'):
            results_update.append(r)

    # Lists
    streets = [result.find('div', {'data-testid': 'property-street'}).get_text() for result in results_update]
    regions = [result.find('div', {'data-testid': 'property-region'}).get_text() for result in results_update]
    beds = [result.find('div', {'data-testid': 'property-beds'}).get_text() for result in results_update]
    baths = [result.find('div', {'data-testid': 'property-baths'}).get_text() for result in results_update]
    prices = [result.find('div', {'data-testid': 'property-price'}).get_text() for result in results_update]
    
    for k in range(len(streets)):
        real_estate = real_estate.append({'Street': streets[k], 'Regions': regions[k], 'Beds': beds[k],
                                         'Baths': baths[k], 'Price': prices[k]}, ignore_index=True)

In [6]:
len(result)

42

In [8]:
real_estate

Unnamed: 0,Street,Regions,Beds,Baths,Price
0,303 E 57th St #32B,"Sutton Place, New York, NY",2bd,3ba,"$329,000"
1,432 Park Ave #PENTHOUSE,"Midtown, New York, NY",6bd,9ba,"$169,000,000"
2,8829 183rd St,"Jamaica, Jamaica, NY",3bd,2ba,"$250,000"
3,12118 155th St,"Jamaica, Jamaica, NY",3bd,1ba,"$250,000"
4,9915 200th St,"Hollis, Jamaica, NY",6bd,4ba,"$405,000"
...,...,...,...,...,...
115,100 W 57th St #5J,"Midtown, New York, NY",2bd,2ba,"$429,000"
116,78 Weldon St,"East New York, Brooklyn, NY",5bd,4ba,"$769,000"
117,192 Eltingville Blvd,"Eltingville, Staten Island, NY",3bd,3ba,"$669,000"
118,142 McLaughlin St,"South Beach, Staten Island, NY",4bd,3ba,"$549,000"


### Information about Dataframe

In [9]:
# dataframe info
real_estate.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Street   120 non-null    object
 1   Regions  120 non-null    object
 2   Beds     120 non-null    object
 3   Baths    120 non-null    object
 4   Price    120 non-null    object
dtypes: object(5)
memory usage: 4.8+ KB


In [10]:
# first 5 results
real_estate.head()

Unnamed: 0,Street,Regions,Beds,Baths,Price
0,303 E 57th St #32B,"Sutton Place, New York, NY",2bd,3ba,"$329,000"
1,432 Park Ave #PENTHOUSE,"Midtown, New York, NY",6bd,9ba,"$169,000,000"
2,8829 183rd St,"Jamaica, Jamaica, NY",3bd,2ba,"$250,000"
3,12118 155th St,"Jamaica, Jamaica, NY",3bd,1ba,"$250,000"
4,9915 200th St,"Hollis, Jamaica, NY",6bd,4ba,"$405,000"


In [11]:
# last 5 results
real_estate.tail()

Unnamed: 0,Street,Regions,Beds,Baths,Price
115,100 W 57th St #5J,"Midtown, New York, NY",2bd,2ba,"$429,000"
116,78 Weldon St,"East New York, Brooklyn, NY",5bd,4ba,"$769,000"
117,192 Eltingville Blvd,"Eltingville, Staten Island, NY",3bd,3ba,"$669,000"
118,142 McLaughlin St,"South Beach, Staten Island, NY",4bd,3ba,"$549,000"
119,37 Harvey St,"Rosebank, Staten Island, NY",6bd,5ba,"$955,000"


### Data Cleaning

In [12]:
real_estate['Beds'] = real_estate['Beds'].apply(lambda x: x.strip('bd'))
real_estate['Baths'] = real_estate['Baths'].apply(lambda x: x.strip('ba'))

#### Updated Dataframe

In [13]:
real_estate

Unnamed: 0,Street,Regions,Beds,Baths,Price
0,303 E 57th St #32B,"Sutton Place, New York, NY",2,3,"$329,000"
1,432 Park Ave #PENTHOUSE,"Midtown, New York, NY",6,9,"$169,000,000"
2,8829 183rd St,"Jamaica, Jamaica, NY",3,2,"$250,000"
3,12118 155th St,"Jamaica, Jamaica, NY",3,1,"$250,000"
4,9915 200th St,"Hollis, Jamaica, NY",6,4,"$405,000"
...,...,...,...,...,...
115,100 W 57th St #5J,"Midtown, New York, NY",2,2,"$429,000"
116,78 Weldon St,"East New York, Brooklyn, NY",5,4,"$769,000"
117,192 Eltingville Blvd,"Eltingville, Staten Island, NY",3,3,"$669,000"
118,142 McLaughlin St,"South Beach, Staten Island, NY",4,3,"$549,000"


### Save in Excel

In [14]:
real_estate.to_excel('realestate_multiple_pages.xlsx', index=False)