In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
base_url = "https://www.airlinequality.com/airline-reviews/british-airways"
pages = 10
page_size = 100

In [None]:
reviews = []
rating = []
ratings_dict = {
    "seat_comfort": [],
    "cabin_staff_service": [],
    "food_and_beverages": [],
    "inflight_entertainment": [],
    "ground_service": [],
    "wifi_and_connectivity": [],
    "value_for_money": [],
}
recommended = []

for i in range (1,pages + 1):
    print(f"Scraping page {i}")
    url = f"{base_url}/page{i}/?sortby=post_date%3ADesc&pagesize={page_size}"

    response = requests.get(url)
    content = response.content
    parsed_content = BeautifulSoup(content, 'html.parser')

    # reviews
    for para in parsed_content.find_all('div', class_='text_content'):
        reviews.append(para.get_text())   

    # rating
    for rate in parsed_content.find_all('span',itemprop='ratingValue'):
        if not rate.find_parent("div", class_="rating-10 rating-large"):
            rating.append(rate.text)

    # recommend
    for recommendation in parsed_content.find_all('td', class_="review-value"):
            if recommendation.find_parent("tr").find("td", class_="review-rating-header recommended"):
                recommended_status = recommendation.text.strip()
                recommended.append(recommended_status)
    
    print(f"Found {len(reviews)} reviews")

    # Find all review sections (assuming each review is inside a div with class="body")
    review_rows = parsed_content.find_all("div", class_="body")

    for review in review_rows:
        for category in ratings_dict.keys():
            # Find the specific <td> for this category within the review
            rating_td = review.find("td", class_=f"review-rating-header {category}")

            if rating_td:
                # Get the next <td> sibling that contains the stars
                star_td = rating_td.find_next_sibling("td", class_="review-rating-stars stars")

                if star_td:
                    star_count = len(star_td.find_all("span", class_="star fill"))
                    ratings_dict[category].append(star_count)
                else:
                    ratings_dict[category].append("NA")  # No stars found
            else:
                ratings_dict[category].append("NA")  # No rating found

    print(f"Collected {len(review_rows)} reviews on page {i}")

# Ensure all lists match expected review count (1000 total)
for key in ratings_dict:
    while len(ratings_dict[key]) < 1000:  
        ratings_dict[key].append("NA")

print(f"Final counts: { {k: len(v) for k, v in ratings_dict.items()} }")

Scraping page 1
Found 100 reviews
Collected 100 reviews on page 1
Scraping page 2
Found 200 reviews
Collected 100 reviews on page 2
Scraping page 3
Found 300 reviews
Collected 100 reviews on page 3
Scraping page 4
Found 400 reviews
Collected 100 reviews on page 4
Scraping page 5
Found 500 reviews
Collected 100 reviews on page 5
Scraping page 6
Found 600 reviews
Collected 100 reviews on page 6
Scraping page 7
Found 700 reviews
Collected 100 reviews on page 7
Scraping page 8
Found 800 reviews
Collected 100 reviews on page 8
Scraping page 9
Found 900 reviews
Collected 100 reviews on page 9
Scraping page 10
Found 1000 reviews
Collected 100 reviews on page 10
Final counts: {'seat_comfort': 1000, 'cabin_staff_service': 1000, 'food_and_beverages': 1000, 'inflight_entertainment': 1000, 'ground_service': 1000, 'wifi_and_connectivity': 1000, 'value_for_money': 1000}


In [27]:
df_1 = pd.DataFrame({"Reviews": reviews, "Overall Ratings": rating, "Recommended":recommended})
df_2 = pd.DataFrame(ratings_dict)

if len(df_1)!=len(df_2):
    raise ValueError("Mismatch in number of reviews and ratings!")
df = pd.concat([df_1,df_2], axis=1)
print(df.head())

                                             Reviews Overall Ratings  \
0  ✅ Trip Verified |   This time British Airways ...               9   
1  ✅ Trip Verified |   The seats were excellent, ...               9   
2  ✅ Trip Verified |   After the nightmare of get...               5   
3  ✅ Trip Verified |   Prior to boarding a gate a...               3   
4  ✅ Trip Verified |   I flew from Amsterdam to L...               1   

  Recommended seat_comfort cabin_staff_service food_and_beverages  \
0         yes            5                   5                  4   
1         yes            5                   4                  3   
2          no            4                   1                  2   
3          no            4                   1                 NA   
4          no            3                   3                  3   

  inflight_entertainment ground_service wifi_and_connectivity  value_for_money  
0                      4              5                     4          

In [None]:
df.to_csv("BA_reviews.csv", index=False)