In [None]:
import os
for dirname, _, filenames in os.walk('/Airline/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import pandas as pd
import spacy
import en_core_web_sm
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM

# Loading spaCy's pre-trained model
nlp = en_core_web_sm.load()

# Loading the airline reviews dataset
df = pd.read_csv('/content/Airline_Reviews.csv')

# Limiting the analysis to the first 20 datapoints
df_subset = df.head(20).copy()

# Preprocessing function to tokenize and remove stopwords
def preprocess(text):
    doc = nlp(text)
    tokens = [token.lemma_ for token in doc if not token.is_stop and token.is_alpha]
    return ' '.join(tokens)

# Applying preprocessing to the reviews
df_subset['Processed_Review'] = df_subset['Review'].apply(preprocess)

# Initializing the BART model and tokenizer
model_name = "facebook/bart-large-cnn"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Functioning to generate concise one-line summaries using BART
def generate_summary(text):
    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model.generate(inputs, max_length=50, min_length=10, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Generating summaries for the first 20 reviews
for i, review in enumerate(df_subset['Processed_Review']):
    summary = generate_summary(review)
    print(f"Review #{i + 1}:\n{summary}\n")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Review #1:
Moroni Moheli turn pretty decent airline online booking work checkin boarding fine plane look maintain short flight minute expect manage hand bottle water biscuit nice flight time.

Review #2:
Moroni Anjouan is a small airline ticket advise turn confusion small airport direct office AB Aviation closed open tell flight try contact true phone number local guide inform presume bump later flight operate usual confusion.

Review #3:
Anjouan Dzaoudzi small airline airline base Comoros check disorganise local big package disinterested staff flight fortunately short min take time land time short flight like course flight entertainment.

Review #4:
 fly Adria route Munich Pristina July lose luggage day row despite numerous phone call able locate day later luggage arrive destination completely ruin apply compensation ignore request foolishly book flight euro Frankfurt Pristin September cancel flight reason hour departure desperate phone

Review #5:
summarize: book flight airline frien