Collecting Data for Airport Review Analysis

In [19]:
!pip install requests beautifulsoup4 selenium pandas nltk textblob wordcloud matplotlib seaborn



**Scraping Reviews on Skytrax**

In [45]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [47]:


#Listing all airport URLS
airports = {
    "Abu Dhabi": "https://www.airlinequality.com/airport-reviews/abu-dhabi-airport/",
    "Dubai":"https://www.airlinequality.com/airport-reviews/dubai-airport/",
    "Doha":"https://www.airlinequality.com/airport-reviews/doha-airport/",
    "Istanbul":"https://www.airlinequality.com/airport-reviews/istanbul-airport/",
    "Singapore":"https://www.airlinequality.com/airport-reviews/singapore-changi-airport/",
    "Hong Kong": "https://www.airlinequality.com/airport-reviews/hong-kong-airport/",
    "Addis Ababa": "https://www.airlinequality.com/airport-reviews/addis-ababa-airport/",
    "Bangkok-Suvarnabhumi":"https://www.airlinequality.com/airport-reviews/bangkok-suvarnabhumi-airport/",
    "Kuala Lumpur KLIA":"https://www.airlinequality.com/airport-reviews/klia-kuala-lumpur-airport/",
    "Delhi":"https://www.airlinequality.com/airport-reviews/delhi-airport/",
    "Mumbai":"https://www.airlinequality.com/airport-reviews/mumbai-airport/",
    "Bengaluru":"https://www.airlinequality.com/airport-reviews/bangalore-airport/",
    "Hyderabad":"https://www.airlinequality.com/airport-reviews/hyderabad-airport/"
}
    

In [60]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

def scrape_skytrax_reviews(airport_name, base_url, pages=30):
    headers = { 
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"
    }
    
    all_reviews = []

    for i in range(1, pages + 1): 
        print(f"Scraping {airport_name} - Page {i}...")

        # Construct URL for pagination
        url = f"{base_url}/page/{i}/" if i > 1 else base_url
        
        # Send request
        response = requests.get(url, headers=headers)
        if response.status_code != 200:
            print(f"Failed to fetch data for {airport_name} - Page {i}")
            continue

        soup = BeautifulSoup(response.content, "html.parser")

        # Finding all reviews
        reviews = soup.find_all("article", class_="comp comp_reviews-review")
        if not reviews:
            print(f"No reviews found on {airport_name} - Page {i}")
            continue  # Skip if no reviews found

        # Extracting details from each review
        for review in reviews:
            try:
                title = review.find("h2", class_="text_header").get_text(strip=True) if review.find("h2", class_="text_header") else "N/A"
                author = review.find("span", itemprop="name").get_text(strip=True) if review.find("span", itemprop="name") else "N/A"
                date_published = review.find("time", itemprop="datePublished")["datetime"] if review.find("time", itemprop="datePublished") else "N/A"
                review_text = review.find("div", class_="text_content").get_text(strip=True) if review.find("div", class_="text_content") else "N/A"
                
                # Extract overall rating
                overall_rating = review.find("span", itemprop="ratingValue")
                overall_rating = overall_rating.get_text(strip=True) if overall_rating else "N/A"

                # Extract traveler type, experience (arrival/departure/transit)
                traveller_type = review.find("td", class_="review-rating-header type_of_traveller")
                traveller_type = traveller_type.find_next_sibling("td").get_text(strip=True) if traveller_type else "N/A"

                experience = review.find("td", class_="review-rating-header experience_at_airport")
                experience = experience.find_next_sibling("td").get_text(strip=True) if experience else "N/A"

                # Extract recommended (Yes/No)
                recommended = review.find("td", class_="review-rating-header recommended")
                recommended = recommended.find_next_sibling("td").get_text(strip=True) if recommended else "N/A"

                # Extract category ratings (Terminal Cleanliness, Seating, Signs, Food, Shopping, WiFi, Staff)
                category_ratings = {}
                rating_labels = ["Terminal Cleanliness", "Terminal Seating", "Terminal Signs", 
                                 "Food & Beverages", "Airport Shopping", "Wifi Connectivity", "Airport Staff"]
                
                rating_cells = review.find_all("td", class_="review-rating-stars")

                for label, cell in zip(rating_labels, rating_cells):
                    category_ratings[label] = len(cell.find_all("span", class_="star fill"))  # Count filled stars
                
                # Append data to list
                all_reviews.append({
                    "Airport": airport_name,
                    "Title": title,
                    "Author": author,
                    "Date Published": date_published,
                    "Review Text": review_text,
                    "Overall Rating": overall_rating,
                    "Type of Traveller": traveller_type, 
                    "Experience": experience,
                    "Recommended": recommended,
                    **category_ratings  # Unpack category ratings into dictionary
                })

            except Exception as e:
                print(f"Error extracting review for {airport_name}: {e}")

        print(f" ---> {len(all_reviews)} total reviews collected so far")

        # Add delay to avoid getting blocked
        time.sleep(3)  

    return all_reviews

# List of airports to scrape
airports = {
    "Istanbul": "https://www.airlinequality.com/airport-reviews/istanbul-airport",
    "Addis Ababa": "https://www.airlinequality.com/airport-reviews/addis-ababa-airport",
    "Dubai": "https://www.airlinequality.com/airport-reviews/dubai-airport",
    "Doha": "https://www.airlinequality.com/airport-reviews/doha-hamad-airport",
    "Abu Dhabi": "https://www.airlinequality.com/airport-reviews/abu-dhabi-airport",
    "Bangkok": "https://www.airlinequality.com/airport-reviews/bangkok-airport",
    "Singapore": "https://www.airlinequality.com/airport-reviews/singapore-changi-airport",
    "Hong Kong": "https://www.airlinequality.com/airport-reviews/hong-kong-airport",
    "Delhi": "https://www.airlinequality.com/airport-reviews/delhi-indira-gandhi-airport",
    "Mumbai": "https://www.airlinequality.com/airport-reviews/mumbai-chhatrapati-shivaji-airport",
    "Bangalore": "https://www.airlinequality.com/airport-reviews/bangalore-kempegowda-airport",
    "Hyderabad": "https://www.airlinequality.com/airport-reviews/hyderabad-rajiv-gandhi-airport"
}

# Scrape all airports
all_reviews = []
for airport, url in airports.items():
    print(f"\nStarting scraping for {airport}...")
    airport_reviews = scrape_skytrax_reviews(airport, url, pages=30)
    all_reviews.extend(airport_reviews)

# Convert to DataFrame and save to CSV
df = pd.DataFrame(all_reviews)
df.head()



Starting scraping for Istanbul...
Scraping Istanbul - Page 1...
No reviews found on Istanbul - Page 1
Scraping Istanbul - Page 2...
No reviews found on Istanbul - Page 2
Scraping Istanbul - Page 3...
No reviews found on Istanbul - Page 3
Scraping Istanbul - Page 4...
No reviews found on Istanbul - Page 4
Scraping Istanbul - Page 5...
No reviews found on Istanbul - Page 5
Scraping Istanbul - Page 6...
No reviews found on Istanbul - Page 6
Scraping Istanbul - Page 7...
No reviews found on Istanbul - Page 7
Scraping Istanbul - Page 8...
No reviews found on Istanbul - Page 8
Scraping Istanbul - Page 9...
No reviews found on Istanbul - Page 9
Scraping Istanbul - Page 10...
No reviews found on Istanbul - Page 10
Scraping Istanbul - Page 11...
No reviews found on Istanbul - Page 11
Scraping Istanbul - Page 12...
No reviews found on Istanbul - Page 12
Scraping Istanbul - Page 13...
No reviews found on Istanbul - Page 13
Scraping Istanbul - Page 14...
No reviews found on Istanbul - Page 14
Scr

In [39]:
df.to_csv("airport_reviews.csv", index=False)
print("\nScraping complete! Data saved to airport_reviews.csv")

In [64]:
import requests
from bs4 import BeautifulSoup
import time

# Define base URL (Example for one airport)
airport_url = "https://www.airlinequality.com/airport-reviews/istanbul-airport/page/2/"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"
}

# Send request
response = requests.get(airport_url, headers=headers)

# Check response status
if response.status_code == 200:
    print("\n✅ Successfully fetched the page!")
else:
    print(f"\n❌ Failed to fetch the page. Status Code: {response.status_code}")
    exit()

# Parse HTML
soup = BeautifulSoup(response.text, "html.parser")

# Print full HTML to check structure (Use only for debugging)
print("\n🔍 Full Page HTML:\n")
print(soup.prettify())  # Prints the entire page content

# Find all reviews
review_containers = soup.find_all("article", class_="comp compo_reviews_review")

# Check if reviews were found
if not review_containers:
    print("\n❌ No reviews found on the page. Check class name or site structure!")
    exit()

# Extract first review for testing
first_review = review_containers[0]  # Get the first review

# Extract review text
review_text = first_review.find("div", class_="text_content")
review_text = review_text.text.strip() if review_text else "No review text found"

# Print extracted review content
print("\n✅ First Review Extracted:\n")
print(review_text)



✅ Successfully fetched the page!

🔍 Full Page HTML:

<!DOCTYPE html>
<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7 lt-ie10" lang="en-GB"> <![endif]-->
<!--[if IE 7]>    <html class="no-js lt-ie9 lt-ie8 lt-ie10" lang="en-GB"> <![endif]-->
<!--[if IE 8]>    <html class="no-js lt-ie9 lt-ie10" lang="en-GB"> <![endif]-->
<!--[if IE 9]>    <html class="no-js lt-ie10" lang="en-GB"> <![endif]-->
<!--[if gt IE 8]><!-->
<html lang="en-GB">
 <!--<![endif]-->
 <head>
  <meta charset="utf-8"/>
  <title>
   Istanbul Airport Customer Reviews - SKYTRAX
  </title>
  <link as="image" data-rocket-preload="" fetchpriority="high" href="https://www.airlinequality.com/wp-content/themes/airlinequality2014new/library/images/nav/grad-header.jpg" rel="preload"/>
  <!-- Google Chrome Frame for IE -->
  <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
  <!-- mobile meta -->
  <meta content="True" name="HandheldFriendly"/>
  <meta content="320" name="MobileOptimized"/>
  <meta content="w

IndexError: list index out of range

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# Function to scrape Skytrax reviews
def scrape_skytrax_reviews(airport_name, url, pages=5):  
    reviews = []  # Store all reviews

    headers = { 
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"
    }

    for page in range(1, pages + 1):
        print(f"Scraping {airport_name} - Page {page}...")

        # Construct URL for each page
        page_url = f"{url}/page/{page}/" if page > 1 else url

        # Send request
        response = requests.get(page_url, headers=headers)
        if response.status_code != 200:
            print(f"❌ Failed to fetch {airport_name} - Page {page}")
            continue

        # Parse HTML
        soup = BeautifulSoup(response.text, "html.parser")
        
        # Find all reviews
        review_containers = soup.find_all("article", itemprop="review")
        if not review_containers:
            print(f"⚠️ No reviews found for {airport_name} - Page {page}")
            continue

        # Extract data from each review
        for review in review_containers:
            try:
                review_text = review.find("div", itemprop="reviewBody").text.strip()
                overall_rating = review.find("div", itemprop="reviewRating").find("span", itemprop="ratingValue").text.strip()
                recommended = review.find("td", class_="review-value").text.strip()

                # Extract additional details
                date_of_visit = review.find("tr", class_="review-rating-header date_visit").find_next_sibling("td").text.strip()
                traveller_type = review.find("tr", class_="review-rating-header type_of_traveller").find_next_sibling("td").text.strip()
                experience = review.find("tr", class_="review-rating-header experience_at_airport").find_next_sibling("td").text.strip()

                # Extract ratings for different categories
                rating_labels = {
                    "Terminal Cleanliness": "terminal_cleanliness",
                    "Terminal Seating": "terminal_seating",
                    "Terminal Signs": "terminal_signs",
                    "Food & Beverages": "food_beverages",
                    "Airport Shopping": "airport_shopping",
                    "Wifi Connectivity": "wifi_connectivity",
                    "Airport Staff": "airport_staff"
                }

                ratings_dict = {}
                for label, class_name in rating_labels.items():
                    rating_td = review.find("td", class_=f"review-rating-header {class_name}")
                    if rating_td:
                        stars = len(rating_td.find_next_sibling("td").find_all("span", class_="star fill"))
                        ratings_dict[label] = stars
                    else:
                        ratings_dict[label] = None  # No rating found

                # Append review data
                reviews.append({
                    "Airport": airport_name,
                    "Review": review_text,
                    "Overall Rating": overall_rating,
                    "Recommended": recommended,
                    "Date of Visit": date_of_visit,
                    "Type of Traveller": traveller_type,
                    "Experience": experience,
                    **ratings_dict  # Unpack category ratings into the dictionary
                })

            except Exception as e: 
                print(f"⚠️ Error extracting review: {e}")

        time.sleep(1)  # Avoid overwhelming the server

    return reviews

# List of airports to scrape
airport_data = {
    "Istanbul Airport": "https://www.airlinequality.com/airport-reviews/istanbul-airport",
    "Addis Ababa Bole Airport": "https://www.airlinequality.com/airport-reviews/addis-ababa-airport",
    "Dubai Airport": "https://www.airlinequality.com/airport-reviews/dubai-airport",
    "Doha Hamad Airport": "https://www.airlinequality.com/airport-reviews/doha-airport",
    "Abu Dhabi Airport": "https://www.airlinequality.com/airport-reviews/abu-dhabi-airport",
    "Bangkok Suvarnabhumi": "https://www.airlinequality.com/airport-reviews/bangkok-suvarnabhumi-airport",
    "Singapore Changi": "https://www.airlinequality.com/airport-reviews/singapore-changi-airport",
    "Hong Kong Airport": "https://www.airlinequality.com/airport-reviews/hong-kong-airport",
    "Delhi IGI Airport": "https://www.airlinequality.com/airport-reviews/delhi-airport",
    "Mumbai Airport": "https://www.airlinequality.com/airport-reviews/mumbai-airport",
    "Bangalore Airport": "https://www.airlinequality.com/airport-reviews/bangalore-airport",
    "Hyderabad Airport": "https://www.airlinequality.com/airport-reviews/hyderabad-airport",
}

# Run scraper for all airports
all_reviews = []
for airport, url in airport_data.items():
    reviews = scrape_skytrax_reviews(airport, url, pages=3)  # Scrape first 3 pages
    all_reviews.extend(reviews)

# Save to CSV
df = pd.DataFrame(all_reviews)
df.to_csv("airport_reviews.csv", index=False)
print("\n✅ Scraping complete! Data saved to 'airport_reviews.csv'")


Scraping Istanbul Airport - Page 1...
⚠️ Error extracting review: 'NoneType' object has no attribute 'find_next_sibling'
⚠️ Error extracting review: 'NoneType' object has no attribute 'find_next_sibling'
⚠️ Error extracting review: 'NoneType' object has no attribute 'find_next_sibling'
⚠️ Error extracting review: 'NoneType' object has no attribute 'find_next_sibling'
⚠️ Error extracting review: 'NoneType' object has no attribute 'find_next_sibling'
⚠️ Error extracting review: 'NoneType' object has no attribute 'find_next_sibling'
⚠️ Error extracting review: 'NoneType' object has no attribute 'find_next_sibling'
⚠️ Error extracting review: 'NoneType' object has no attribute 'find_next_sibling'
⚠️ Error extracting review: 'NoneType' object has no attribute 'find_next_sibling'
⚠️ Error extracting review: 'NoneType' object has no attribute 'find_next_sibling'
Scraping Istanbul Airport - Page 2...
⚠️ Error extracting review: 'NoneType' object has no attribute 'find_next_sibling'
⚠️ Error ex

In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# Function to scrape Skytrax reviews
def scrape_skytrax_reviews(airport_name, url, pages=3):  
    reviews = []  # Store all reviews

    headers = { 
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"
    }

    for page in range(1, pages + 1):
        print(f"\nScraping {airport_name} - Page {page}...")

        # Construct URL for each page
        page_url = f"{url}/page/{page}/" if page > 1 else url

        # Send request
        response = requests.get(page_url, headers=headers)
        if response.status_code != 200:
            print(f"❌ Failed to fetch {airport_name} - Page {page}")
            continue

        # Parse HTML
        soup = BeautifulSoup(response.text, "html.parser")
        
        # Find all reviews
        review_containers = soup.find_all("article", itemprop="review")
        print(f"Found {len(review_containers)} reviews on {airport_name} - Page {page}")

        if not review_containers:
            print(f"⚠️ No reviews found for {airport_name} - Page {page}")
            continue

        # Extract data from each review
        for review in review_containers:
            try:
                # Extract review text
                review_text_element = review.find("div", itemprop="reviewBody")
                review_text = review_text_element.text.strip() if review_text_element else "N/A"

                # Extract overall rating
                rating_element = review.find("div", itemprop="reviewRating")
                overall_rating = rating_element.find("span", itemprop="ratingValue").text.strip() if rating_element else "N/A"

                # Extract recommended (yes/no)
                recommended_element = review.find("td", class_="review-value")
                recommended = recommended_element.text.strip() if recommended_element else "N/A"

                # Extract date of visit
                date_visit_element = review.find("td", class_="review-rating-header date_visit")
                date_of_visit = date_visit_element.find_next_sibling("td").text.strip() if date_visit_element else "N/A"

                # Extract traveller type
                traveller_element = review.find("td", class_="review-rating-header type_of_traveller")
                traveller_type = traveller_element.find_next_sibling("td").text.strip() if traveller_element else "N/A"

                # Extract experience (departure/arrival)
                experience_element = review.find("td", class_="review-rating-header experience_at_airport")
                experience = experience_element.find_next_sibling("td").text.strip() if experience_element else "N/A"

                # Extract ratings for different categories
                rating_labels = {
                    "Terminal Cleanliness": "terminal_cleanliness",
                    "Terminal Seating": "terminal_seating",
                    "Terminal Signs": "terminal_signs",
                    "Food & Beverages": "food_beverages",
                    "Airport Shopping": "airport_shopping",
                    "Wifi Connectivity": "wifi_connectivity",
                    "Airport Staff": "airport_staff"
                }

                ratings_dict = {}
                for label, class_name in rating_labels.items():
                    rating_td = review.find("td", class_=f"review-rating-header {class_name}")
                    if rating_td:
                        stars = len(rating_td.find_next_sibling("td").find_all("span", class_="star fill"))
                        ratings_dict[label] = stars
                    else:
                        ratings_dict[label] = "N/A"  # No rating found

                # Debug: Print extracted review
                print(f"📝 Review from {airport_name} - Page {page}:")
                print(f"Review Text: {review_text[:100]}...")  # Print first 100 characters
                print(f"Overall Rating: {overall_rating}, Recommended: {recommended}, Date of Visit: {date_of_visit}")
                print(f"Traveller Type: {traveller_type}, Experience: {experience}")
                print(f"Ratings: {ratings_dict}")

                # Append review data
                reviews.append({
                    "Airport": airport_name,
                    "Review": review_text,
                    "Overall Rating": overall_rating,
                    "Recommended": recommended,
                    "Date of Visit": date_of_visit,
                    "Type of Traveller": traveller_type,
                    "Experience": experience,
                    **ratings_dict  # Unpack category ratings into the dictionary
                })

            except Exception as e: 
                print(f"⚠️ Error extracting review: {e}")

        time.sleep(1)  # Avoid overwhelming the server

    return reviews

# List of airports to scrape
airport_data = {
    "Istanbul Airport": "https://www.airlinequality.com/airport-reviews/istanbul-airport",
    "Addis Ababa Bole Airport": "https://www.airlinequality.com/airport-reviews/addis-ababa-airport",
}

# Run scraper for all airports
all_reviews = []
for airport, url in airport_data.items():
    reviews = scrape_skytrax_reviews(airport, url, pages=3)  # Scrape first 3 pages
    all_reviews.extend(reviews)

# Save to CSV
df = pd.DataFrame(all_reviews)
df.to_csv("airport_reviews_debug.csv", index=False)
print("\n✅ Scraping complete! Data saved to 'airport_reviews_debug.csv'")



Scraping Istanbul Airport - Page 1...
Found 10 reviews on Istanbul Airport - Page 1
📝 Review from Istanbul Airport - Page 1:
Review Text: Not Verified |  Ridiculous prices on everything. Tired staff. Will avoid or bring my own food and dr...
Overall Rating: 2, Recommended: Arrival and Departure, Date of Visit: February 2025
Traveller Type: Family Leisure, Experience: Arrival and Departure
Ratings: {'Terminal Cleanliness': 2, 'Terminal Seating': 1, 'Terminal Signs': 3, 'Food & Beverages': 1, 'Airport Shopping': 1, 'Wifi Connectivity': 1, 'Airport Staff': 1}
📝 Review from Istanbul Airport - Page 1:
Review Text: Not Verified |  The biggest trap for the tourists worldwide, one Big Mac 25 euros and a bag of pista...
Overall Rating: 1, Recommended: Departure Only, Date of Visit: February 2025
Traveller Type: Couple Leisure, Experience: Departure Only
Ratings: {'Terminal Cleanliness': 1, 'Terminal Seating': 1, 'Terminal Signs': 1, 'Food & Beverages': 1, 'Airport Shopping': 1, 'Wifi Connectiv

In [6]:
df.head()

Unnamed: 0,Airport,Review,Overall Rating,Recommended,Date of Visit,Type of Traveller,Experience,Terminal Cleanliness,Terminal Seating,Terminal Signs,Food & Beverages,Airport Shopping,Wifi Connectivity,Airport Staff
0,Istanbul Airport,Not Verified | Ridiculous prices on everythin...,2,Arrival and Departure,February 2025,Family Leisure,Arrival and Departure,2,1,3,1,1.0,1,1.0
1,Istanbul Airport,Not Verified | The biggest trap for the touri...,1,Departure Only,February 2025,Couple Leisure,Departure Only,1,1,1,1,1.0,1,1.0
2,Istanbul Airport,✅ Trip Verified | Fast food price is just ri...,2,Transit,January 2025,Solo Leisure,Transit,5,5,5,1,1.0,5,3.0
3,Istanbul Airport,✅ Trip Verified | This airport is luxury and...,2,Arrival and Departure,January 2025,Business,Arrival and Departure,1,2,3,3,4.0,2,1.0
4,Istanbul Airport,✅ Trip Verified | Arrived just before midnight...,3,Transit,December 2024,Solo Leisure,Transit,3,1,2,0,,3,


In [8]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# Function to scrape reviews
def scrape_skytrax_reviews(airport_name, url, pages=30):  
    reviews = []  

    headers = { 
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"
    }

    for page in range(1, pages + 1):
        print(f"\nScraping {airport_name} - Page {page}...")

        # Construct URL for each page
        page_url = f"{url}/page/{page}/" if page > 1 else url

        # Send request
        response = requests.get(page_url, headers=headers)
        if response.status_code != 200:
            print(f"❌ Failed to fetch {airport_name} - Page {page}")
            continue

        # Parse HTML
        soup = BeautifulSoup(response.text, "html.parser")
        
        # Find all reviews
        review_containers = soup.find_all("article", itemprop="review")
        print(f"Found {len(review_containers)} reviews on {airport_name} - Page {page}")

        if not review_containers:
            print(f"⚠️ No reviews found for {airport_name} - Page {page}")
            continue

        # Extract data from each review
        for review in review_containers:
            try:
                review_text_element = review.find("div", itemprop="reviewBody")
                review_text = review_text_element.text.strip() if review_text_element else "N/A"

                rating_element = review.find("div", itemprop="reviewRating")
                overall_rating = rating_element.find("span", itemprop="ratingValue").text.strip() if rating_element else "N/A"

                recommended_element = review.find("td", class_="review-value")
                recommended = recommended_element.text.strip() if recommended_element else "N/A"

                date_visit_element = review.find("td", class_="review-rating-header date_visit")
                date_of_visit = date_visit_element.find_next_sibling("td").text.strip() if date_visit_element else "N/A"

                traveller_element = review.find("td", class_="review-rating-header type_of_traveller")
                traveller_type = traveller_element.find_next_sibling("td").text.strip() if traveller_element else "N/A"

                experience_element = review.find("td", class_="review-rating-header experience_at_airport")
                experience = experience_element.find_next_sibling("td").text.strip() if experience_element else "N/A"

                rating_labels = {
                    "Terminal Cleanliness": "terminal_cleanliness",
                    "Terminal Seating": "terminal_seating",
                    "Terminal Signs": "terminal_signs",
                    "Food & Beverages": "food_beverages",
                    "Airport Shopping": "airport_shopping",
                    "Wifi Connectivity": "wifi_connectivity",
                    "Airport Staff": "airport_staff"
                }

                ratings_dict = {}
                for label, class_name in rating_labels.items():
                    rating_td = review.find("td", class_=f"review-rating-header {class_name}")
                    if rating_td:
                        stars = len(rating_td.find_next_sibling("td").find_all("span", class_="star fill"))
                        ratings_dict[label] = stars
                    else:
                        ratings_dict[label] = "N/A"

                print(f"📝 Review from {airport_name} - Page {page}:")
                print(f"Review Text: {review_text[:100]}...")
                print(f"Overall Rating: {overall_rating}, Recommended: {recommended}, Date of Visit: {date_of_visit}")
                print(f"Traveller Type: {traveller_type}, Experience: {experience}")
                print(f"Ratings: {ratings_dict}")

                reviews.append({
                    "Airport": airport_name,
                    "Review": review_text,
                    "Overall Rating": overall_rating,
                    "Recommended": recommended,
                    "Date of Visit": date_of_visit,
                    "Type of Traveller": traveller_type,
                    "Experience": experience,
                    **ratings_dict
                })

            except Exception as e: 
                print(f"⚠️ Error extracting review: {e}")

        time.sleep(1)

    return reviews

# More airports to scrape
airport_data = {
    "Istanbul Airport": "https://www.airlinequality.com/airport-reviews/istanbul-airport",
    "Addis Ababa Bole Airport": "https://www.airlinequality.com/airport-reviews/addis-ababa-airport",
    "Dubai": "https://www.airlinequality.com/airport-reviews/dubai-airport",
    "Doha": "https://www.airlinequality.com/airport-reviews/doha-hamad-airport",
    "Abu Dhabi": "https://www.airlinequality.com/airport-reviews/abu-dhabi-airport",
    "Bangkok": "https://www.airlinequality.com/airport-reviews/bangkok-airport",
    "Singapore": "https://www.airlinequality.com/airport-reviews/singapore-changi-airport",
    "Hong Kong": "https://www.airlinequality.com/airport-reviews/hong-kong-airport",
    "Delhi": "https://www.airlinequality.com/airport-reviews/delhi-indira-gandhi-airport",
    "Mumbai": "https://www.airlinequality.com/airport-reviews/mumbai-chhatrapati-shivaji-airport",
    "Bangalore": "https://www.airlinequality.com/airport-reviews/bangalore-kempegowda-airport",
    "Hyderabad": "https://www.airlinequality.com/airport-reviews/hyderabad-rajiv-gandhi-airport"
}

# Run scraper for all airports
all_reviews = []
for airport, url in airport_data.items():
    reviews = scrape_skytrax_reviews(airport, url, pages=3)  # Scrape first 3 pages
    all_reviews.extend(reviews)

# Data is stored in 'all_reviews' list, but NOT saved to CSV yet
print("\n✅ Scraping complete! Data is stored in the 'all_reviews' list.")



Scraping Istanbul Airport - Page 1...
Found 10 reviews on Istanbul Airport - Page 1
📝 Review from Istanbul Airport - Page 1:
Review Text: Not Verified |  Ridiculous prices on everything. Tired staff. Will avoid or bring my own food and dr...
Overall Rating: 2, Recommended: Arrival and Departure, Date of Visit: February 2025
Traveller Type: Family Leisure, Experience: Arrival and Departure
Ratings: {'Terminal Cleanliness': 2, 'Terminal Seating': 1, 'Terminal Signs': 3, 'Food & Beverages': 1, 'Airport Shopping': 1, 'Wifi Connectivity': 1, 'Airport Staff': 1}
📝 Review from Istanbul Airport - Page 1:
Review Text: Not Verified |  The biggest trap for the tourists worldwide, one Big Mac 25 euros and a bag of pista...
Overall Rating: 1, Recommended: Departure Only, Date of Visit: February 2025
Traveller Type: Couple Leisure, Experience: Departure Only
Ratings: {'Terminal Cleanliness': 1, 'Terminal Seating': 1, 'Terminal Signs': 1, 'Food & Beverages': 1, 'Airport Shopping': 1, 'Wifi Connectiv

In [None]:
df = pd.DataFrame(all_reviews)
df.to_csv("airport_reviews.csv", index=False)
print("Data saved to airport_reviews.csv")