# Scraping airport's data

## Import libraries

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

## Web Scraping

### Function for Airlinequality

In [None]:
def createReviewFrameFromAirlinequality(airport, pages):
    reviews = []
    scores = []

    for page in range(1, pages+1):
        url = "https://www.airlinequality.com/airport-reviews/{}/page/{}".format(airport, page)
        response = requests.get(url)

        soup = BeautifulSoup(response.content, "html.parser")
        result = soup.find("article", class_="comp comp_reviews-airline querylist position-content ")
        review_content = result.find_all("div", class_="text_content")
        score_content = result.find_all("div", class_="rating-10")

        for score in score_content:
            if score.find("span", itemprop="ratingValue") is None:
                scores.append(np.nan)
            else:
                scores.append(int(score.find("span", itemprop="ratingValue").text))

        for review in review_content:
            if len(review.text.split('|')) > 1:
              reviews.append(review.text.split('|')[1].strip())
            else:
              reviews.append(review.text.strip())

    df = pd.DataFrame({"review": reviews, "score": scores})
    return df

### Function for Trustpilot

In [None]:
def createReviewFrameFromTrustpilot(airport, pages):
    reviews = []
    scores = []

    for page in range(1, pages+1):
        url = "https://www.trustpilot.com/review/{}?page={}".format(airport, page)
        response = requests.get(url)

        soup = BeautifulSoup(response.content, "html.parser")
        results = soup.find_all("article", class_="paper_paper__1PY90 paper_outline__lwsUX card_card__lQWDv styles_reviewCard__hcAvl ")

        for result in results:
            review = result.find("p", class_="typography_body-l__KUYFJ typography_appearance-default__AAY17 typography_color-black__5LYEn")
            score = result.find("img")
            if score['alt'] == "" or review is None:
                continue

            reviews.append(review.text.strip())
            scores.append(int(score['alt'].split(' ')[1]))

    df = pd.DataFrame({"review": reviews, "score": scores})
    return df

## Get data from Airlinequality

### Suvarnabhumi

In [None]:
# Assign airport's name
airport = "bangkok-suvarnabhumi-airport"
# Assign page of airport's review
pages = 44

df = createReviewFrameFromAirlinequality(airport, pages)

In [None]:
df.head(10)

Unnamed: 0,review,score
0,Visit Bangkok Suvarnabhumi Airport many times....,8.0
1,They have decided to get rid of the smoking ar...,1.0
2,Bangkok Suvarnabhumi Airport is okay for arriv...,5.0
3,Made a transit from domestic to international ...,5.0
4,Great airport but the closure of the smoking r...,6.0
5,Again a very poor experience of another Thaila...,4.0
6,Very hot inside. I'm sure the aircon was an af...,1.0
7,"Suvarnabhumi is big, very big. But even in thi...",5.0
8,Very crowded airport. It was very difficult to...,5.0
9,A very large airport. Very crowded. Very unorg...,6.0


In [None]:
df.to_csv("suvarnabhumi_airlinequality.csv")

### Don muang

In [None]:
# Assign airport's name
airport = "bangkok-don-muang-airport"
# Assign page of airport's review
pages = 12

df = createReviewFrameFromAirlinequality(airport, pages)

In [None]:
df.head(10)

Unnamed: 0,review,score
0,Haven't flown from here for 3 years due to cov...,6.0
1,The domestic terminal is better than the inter...,5.0
2,I think Don Muang is much more beautiful than ...,9.0
3,"On the 4th of March, I was flying back to Bang...",10.0
4,Absolute trash customer service. Accidentally ...,1.0
5,Great experience. Almost zero queues at immigr...,10.0
6,Very good organisation. Quite quick drop-off. ...,7.0
7,I had an issue with bringing in an so-called p...,1.0
8,Very impressed with this place - used to be te...,9.0
9,I've been through Don Muang many times. Every ...,10.0


In [None]:
df.to_csv("don_muang_airlinequality.csv")

### Manchester

In [None]:
# Assign airport's name
airport = "manchester-airport"
# Assign page of airport's review
pages = 134

df = createReviewFrameFromAirlinequality(airport, pages)

In [None]:
df.head(10)

Unnamed: 0,review,score
0,Used the Airport for an early morning flight t...,8.0
1,After arriving on the Lisbon to Manchester TAP...,1.0
2,I have just been through Manchester Airport se...,1.0
3,My july visit to this airport is an absolute j...,1.0
4,"Security is a disaster! OMG, What a huge mess ...",2.0
5,Absolutely shambolic! This airport is a total ...,1.0
6,This was the worst experience of my travel exp...,3.0
7,We used Manchester Terminal 2 for our 06.10 fl...,1.0
8,Worst experience of my life. I am fit and heal...,1.0
9,Terminal 2 bag drop and security no problems. ...,5.0


In [None]:
df.to_csv("manchester_airlinequality.csv")

### London

In [None]:
# Assign airport's name
airport = "london-heathrow-airport"
# Assign page of airport's review
pages = 99

df = createReviewFrameFromAirlinequality(airport, pages)

In [None]:
df.head(10)

Unnamed: 0,review,score
0,"Arrived Heathrow on Oct 22, 2022, on British A...",2.0
1,London Heathrow should be a level of Dante’s h...,1.0
2,"My elderly mother who cannot walk, was left in...",1.0
3,I try to avoid transferring through Heathrow b...,2.0
4,Terminal 2 arrivals awful. The staff were forc...,2.0
5,"I arrived in T4, looks and smells old and outd...",5.0
6,Terminal 2 is pleasant enough. Plenty of food ...,3.0
7,"Heathrow now charge £5 just to drive in, stop ...",1.0
8,I missed my flight due to some medical issues ...,10.0
9,Lets start off with the signage to return your...,5.0


In [None]:
df.to_csv("london_heathrow_airlinequality.csv")

## Get data from Trustpilot

### Manchester

In [None]:
# Assign airport's name
airport = "www.manchesterairport.co.uk"
# Assign page of airport's review
pages = 1252

df = createReviewFrameFromTrustpilot(airport, pages)

In [None]:
df.head(10)

Unnamed: 0,review,score
0,Horrible experience. Meet and greet parking - ...,1
1,Extremely rude staff at baggage check. Unhelpf...,1
2,"Utterly, totally clueless at security. Putting...",1
3,Have collected family from Manchester airport ...,1
4,"What a truely, grotty depressing place. Ripped...",1
5,I went to pickup from T3 today. Trying to get ...,1
6,Just returned home from a trip to Europe. Flew...,1
7,The charges for being dropped off and collecte...,1
8,Manchester airport is a shambles. Security was...,1
9,This airport is possibly the worst in the worl...,1


In [None]:
df.to_csv("manchester_trustpilot.csv")