In [None]:
import os
for dirname, _, filenames in os.walk('/Airline/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import pandas as pd
import spacy
from spacy import displacy
import en_core_web_sm

# Loading spaCy's pre-trained model
nlp = en_core_web_sm.load()

# Loading the airline reviews dataset
df = pd.read_csv('/content/Airline_Reviews.csv')

# Processing each review and extracting named entities
for i in range(0,20):
    review = df['Review'][i]
    doc = nlp(review)

    # Print entities and their labels
    print(f"Review #{i + 1}: {review}")
    for ent in doc.ents:
        print(f"Entity: {ent.text} - Label: {ent.label_}")
    print('\n')

Review #1:   Moroni to Moheli. Turned out to be a pretty decent airline. Online booking worked well, checkin and boarding was fine and the plane looked well maintained. Its a very short flight - just 20 minutes or so so i didn't expect much but they still managed to hand our a bottle of water and some biscuits which i though was very nice. Both flights on time.
Entity: Moheli - Label: GPE


Review #2:  Moroni to Anjouan. It is a very small airline. My ticket advised me to turn up at 0800hrs which I did. There was confusion at this small airport. I was then directed to the office of AB Aviation which was still closed. It opened at 0900hrs and I was told that the flight had been put back to 1300hrs and that they had tried to contact me. This could not be true as they did not have my phone number. I was with a local guide and he had not been informed either. I presume that I was bumped off. The later flight did operate but as usual, there was confusion at check-in. The flight was only 30m

In [None]:
# Visualizing named entities using displaCy
for i in range(0,20):
    review = df['Review'][i]
    doc = nlp(review)

    displacy.render(doc, style='ent', jupyter=True)

In [None]:
from collections import Counter
# Initializing a Counter to keep track of entity frequencies
entity_counter = Counter()

# Processing each review and extract named entities
for i in range(0,5000):
    review = df['Review'][i]
    doc = nlp(review)

    # Counting the frequency of each named entity
    for ent in doc.ents:
        entity_counter[ent.text] += 1

# Printing the most common named entities and their frequencies
print("Top 10 Most Common Named Entities:")
for entity, freq in entity_counter.most_common(10):
    print(f"Entity: {entity} - Frequency: {freq}")

Top 10 Most Common Named Entities:
Entity: first - Frequency: 733
Entity: one - Frequency: 502
Entity: 2 - Frequency: 454
Entity: two - Frequency: 358
Entity: 3 - Frequency: 315
Entity: London - Frequency: 242
Entity: First - Frequency: 222
Entity: Paris - Frequency: 214
Entity: second - Frequency: 209
Entity: Beijing - Frequency: 202
