# Install Required Libraries

In [1]:
!pip install requests
!pip install beautifulsoup4



# Import Required Libraries

In [1]:
import requests
from bs4 import BeautifulSoup
import csv
import random
import pandas as pd

In [2]:
# URL of the real estate listings website
base_url = "https://www.zillow.com/"

## Scrapping Function
 - Take City name as INPUT

In [3]:
def scrape_real_estate_listings(location):
        # URL of the real estate listings website
        url = f"{base_url}{location}"
        
        # Set a user-agent header to simulate a request from a web browser
        user_agents = [
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Firefox/58.0.1',
            # Add more user agents as needed
        ]
        headers = {
            'User-Agent': random.choice(user_agents)
        }

        # Use a session to persist cookies between requests
        session = requests.Session()

        # Make the request with headers and cookies
        response = session.get(url, headers=headers)

        # Check if the request was successful (status code 200)
        if response.status_code == 200:
            # Parse the HTML content of the page
            soup = BeautifulSoup(response.content, 'html.parser')
        
                
            # Extract property information
            property_listings = []   
            
            for listing in soup.find_all('div', class_='StyledCard-c11n-8-84-3__sc-rmiu6p-0 jZuLiI StyledPropertyCardBody-c11n-8-84-3__sc-1p5uux3-0 gHYrNO'):

                title = listing.find('div', class_='StyledPropertyCardDataArea-c11n-8-84-3__sc-yipmu-0 dbDWjx').text.strip()
                price = listing.find('span', class_='PropertyCardWrapper__StyledPriceLine-srp__sc-16e8gqd-1 iMKTKr').text.strip()
                property_url_element = listing.find('a', class_='StyledPropertyCardDataArea-c11n-8-84-3__sc-yipmu-0 jnnxAW property-card-link')
                property_url = "https://www.zillow.com" + property_url_element['href'] if property_url_element else 'N/A'

                property_listings.append({
                    'Title': title,
                    'Price': price,
                    'URL': property_url
                })
            df = pd.DataFrame(property_listings)
            print(df)
            # Save the data to a CSV file
            #print(property_listings)
            save_to_csv(property_listings, location)

        else:
            print(f"Failed to retrieve data. Status code: {response.status_code}")

## Loading Function

 - Take data as INPUT
 - Take path as INPUT

Save the data in csv file

In [4]:
def save_to_csv(data, location):
    # Define the CSV filename
    filename = f"real_estate_listings_{location}.csv"

    # Write data to CSV file
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['Title', 'Price', 'URL']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        # Write header
        writer.writeheader()

        # Write data rows
        for row in data:
            writer.writerow(row)

    print(f"Data has been successfully scraped and saved to {filename}.")

### Scrap NEW YORK city data in csv

In [5]:
# Example usage
scrape_real_estate_listings("ny")

                                              Title     Price  \
0              3 bds2 ba1,244 sqft - House for sale  $174,900   
1                    1 bd1 ba600 sqft - Coming soon  $245,000   
2                      3 bds1 ba1,232 sqft - Active  $169,900   
3              3 bds1 ba1,068 sqft - House for sale  $174,900   
4              3 bds2 ba2,186 sqft - House for sale  $249,000   
5  6 bds2 ba2,200 sqft - Multi-family home for sale  $260,000   
6              4 bds4 ba3,354 sqft - House for sale  $220,000   
7              4 bds2 ba1,552 sqft - House for sale  $199,900   
8                    1 bd1 ba750 sqft - Coming soon  $135,000   

                                                 URL  
0  https://www.zillow.comhttps://www.zillow.com/h...  
1  https://www.zillow.comhttps://www.zillow.com/h...  
2  https://www.zillow.comhttps://www.zillow.com/h...  
3  https://www.zillow.comhttps://www.zillow.com/h...  
4  https://www.zillow.comhttps://www.zillow.com/h...  
5  https://www.zill

### Scrap Chicago city data in csv

In [5]:
# Example usage
scrape_real_estate_listings("chicago-il")

                                Title     Price  \
0              4 bds2 ba-- sqft - New   $29,500   
1          6 bds2 ba-- sqft - Auction       $--   
2               3,750 sqft lot  - New   $39,900   
3               3,049 sqft lot  - New   $79,900   
4               4,375 sqft lot  - New    $9,900   
5  5 bds3 ba2,200 sqft - Re-activated  $238,000   
6              2 bds2 ba-- sqft - New  $329,900   
7               1 bd1 ba-- sqft - New  $138,900   
8           Studio 1 ba569 sqft - New  $149,000   

                                                 URL  
0  https://www.zillow.comhttps://www.zillow.com/h...  
1  https://www.zillow.comhttps://www.zillow.com/h...  
2  https://www.zillow.comhttps://www.zillow.com/h...  
3  https://www.zillow.comhttps://www.zillow.com/h...  
4  https://www.zillow.comhttps://www.zillow.com/h...  
5  https://www.zillow.comhttps://www.zillow.com/h...  
6  https://www.zillow.comhttps://www.zillow.com/h...  
7  https://www.zillow.comhttps://www.zillow.com/h