In [1]:
import requests
from bs4 import BeautifulSoup

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

import re
import string

import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
# Create empty lists to store the review data
review_text = []
rating = []
review = []
review_date = []

# Loop through the first 30 pages of the reviews
for i in range(1, 355):
    # Define the URL to scrape
    url = f"https://www.airlinequality.com/airline-reviews/british-airways/page/{i}/"

    # Send a GET request to the URL
    response = requests.get(url)

    # Parse the HTML content of the page using Beautiful Soup
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all the review elements on the page
    reviews = soup.find_all('article', {'itemprop': 'review'})

    # Loop through each review element and extract the data
    for r in reviews:
        # Check if the review is for British Airways
        airline_name = r.find('span', {'itemprop': 'name'}).text.strip()
        # Extract the review text
        review_text.append(r.find('div', {'class': 'text_content'}).text.strip())
            
        # Extract the rating
        rating_value = r.find('span', {'class': 'star fill'}).text.strip()
        rating.append(float(rating_value))
        
        # Extract the review
        review.append(r.find('h2', {'class': 'text_header'}).text.strip())
        
        # Extract the review date
        review_date.append(r.find('time', {'itemprop': 'datePublished'}).text.strip())

# Create a pandas DataFrame from the extracted data
df = pd.DataFrame({'ReviewText': review_text, 'Rating': rating, 'Review': review, 'ReviewDate': review_date})

# Print the DataFrame
print(df.head())


                                          ReviewText  Rating  \
0  Not Verified | Top Ten REASONS to not use Brit...     1.0   
1  Not Verified |  Easy check in on the way to He...     1.0   
2  ✅ Trip Verified |  Online check in worked fine...     1.0   
3  ✅ Trip Verified |. The BA first lounge at Term...     1.0   
4  Not Verified | Paid a quick visit to Nice yest...     1.0   

                                        Review     ReviewDate  
0          "cancel your flight without notice"  23rd May 2023  
1               "flights changed with no cost"  23rd May 2023  
2                 "Cheap, quick and efficient"  23rd May 2023  
3           "the worst major European airline"  22nd May 2023  
4  "do not think the fare was worth the money"  22nd May 2023  


In [3]:
df.shape

(3540, 4)

In [4]:
df.head()

Unnamed: 0,ReviewText,Rating,Review,ReviewDate
0,"Not Verified | Top Ten REASONS to not use British Airways To all: Be Aware and be on Notice- Travel at your own risk I have travelled for years on many different airlines, and I have to say that British Airways ranks as one of the top two most horrible airlines to fly. Don’t waste your time or money. Customer service is non-existent. Please be on notice that if you purchase a ticket from them and once, they have your money, you can kiss any customer service and or help resolving your issue goodbye. We used them to travel to Europe and it was the worst experience I have had with an airline in years. When you work hard and save your money to travel, you expect a good experience. Welp not with this company. Who I am sure, makes millions on customers per year. I understand that sometimes things happen, but this started right after we booked our flight AND THE BAD EXPERIENCE DID NOT END UNTIL WE SET FOOT BACK TO OUR HOMETOWN. Please take note: #1 - The customer service number is buried on the internet and not listed on their website. I had to google it and still got the run around when someone finally answered. Trust me, the number is a secret. #2 – When you FINALLY find a customer service number, there is a long wait and when or if someone answers, they will not resolve your issues. Instead, they pass the buck to whatever airlines that they sold your ticket to. NO ONE TAKES OWNERSHIP #4- What airlines operates 365 days a year and around the clock but doesn’t have customer service reps to help you, the customer? BRITISH AIRWAYS that’s who. #5 - They will cancel your flight without notice and not notify you, AT ALL. NO EMAIL, NO TEXT. #6- If you are not the primary on your ticket you will get no notifications or even if you are the primary, you may or may not get notifications. #7 - If you pay for seats and you pick them, it doesn’t matter, they will cancel your seats and not refund your money and then play stupid. #8 - The flight may be cheaper, but it is not once you are done paying for seats. #9- They will route to another partner airline and not notify you #10 – If you have a connecting flight, they will cancel one leg and not the other and NOT NOTIFY YOU. I will never use them again. And if this review helps one person that heeds my advice, then my job is done.",1.0,"""cancel your flight without notice""",23rd May 2023
1,Not Verified | Easy check in on the way to Heathrow. The flight was on time with no issues. Our return flight to Valencia was booked for the afternoon and our long haul flight arrived early at Heathrow. This was with another airline and was not a connecting flight. I asked at the British Airways check in if we could be put onto the morning flight back to Valencia to avoid seven hours waiting at Heathrow. We had our flights changed with no cost and were checked in within five minutes. That is indeed customer service. Thank you.,1.0,"""flights changed with no cost""",23rd May 2023
2,"✅ Trip Verified | Online check in worked fine. Quick security check. Once onboard quick flight up to Glasgow, water and snack provided. All in all very pleased. Cheap, quick and efficient.",1.0,"""Cheap, quick and efficient""",23rd May 2023
3,"✅ Trip Verified |. The BA first lounge at Terminal 5 was a zoo at 2pm, dirty tables and used tableware everywhere. Worse than this, the usual atrocious boarding gate service, and boarding started 50 mins late. The flight was operated by a Finnair A320, which is barely low cost standard let alone business class. Hard, thin, uncomfortable seats, poor legroom and the cabin was simply tatty - I remain shocked that BA allow this, and it's time for BA's CEO and management focused on standards rather than bean counting. Finnair cabin staff were fundamentally courteous, but they provide a very trimmed down, budget airline type service, as the whole trip turned out to be - and yet this business class ticket was one of the more expensive on recent trips. I thought post Covid that BA were better for a few months, but with the many cost cuts they are making, they fall well below all other European full service Airlines.",1.0,"""the worst major European airline""",22nd May 2023
4,"Not Verified | Paid a quick visit to Nice yesterday from Heathrow. Decided to go Business class since my last short haul flight in Economy was a real crush and rather unpleasant. I paid over £300 for a business class round trip. I went into the to BA lounge in Terminal 5 which was very crowded. Being 6.30 in the morning I decided against having a drink and stuck to the breakfast offering. The cooked options did not look that appealing but the fresh fruit selection was very enjoyable. We subsequently discovered a much nicer BA lounge closer to the gate, B36, and waited there for 40 mins. Boarding was efficient and we got ourselves seated in 2A and 2C, seat 2B was blocked out, which acted as an additional table. There did not seem to be any difference in the width of the seat from the cramped seating in Economy but there may have been an additional inch of leg room. I think that BA are trying to outdo EasyJet in terms on no frills. It would not cost BA that much to supply Business class passengers with a complimentary newspaper to read during the flight. The inflight magazine is no longer available. There are no screens so there was zero in terms of in flight entertainment. The inflight WiFi was flaky at best and non-existent at other times. I was glad it was only a two hour flight. I really do not think the fare that we paid was worth the money",1.0,"""do not think the fare was worth the money""",22nd May 2023


In [4]:
df.to_csv('BritishAirwaysReviews.csv')

In [5]:
import requests
import csv
from bs4 import BeautifulSoup

def scrape_medicines():
    url = 'https://www.1mg.com/drugs-all-medicines'
    
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    medicines = []

    # Find all A-Z labels
    labels = soup.find_all('a', {'class': 'style__link-l4f3a-2'})

    for label in labels:
        label_url = base_url + label['href']

        # Process each page for a given label
        while label_url:
            response = requests.get(label_url, headers=headers)
            soup = BeautifulSoup(response.content, 'html.parser')

            # Find all medicine cards on the current page
            cards = soup.find_all('div', {'class': 'style__inner-container___3BZU9 style__product-grid___3noQW style__padding-top-bottom-12px___1-DPF'})            
            for card in cards:
                brand_name = card.find('div', {'class': 'style__font-bold___1k9Dl style__font-14px___YZZrf style__flex-row___2AKyf style__space-between___2mbvn style__padding-bottom-5px___2NrDR'}).text.strip()
                composition = card.find('div', {'class': 'style__font-12px___2ru_e style__product-content___5PFBW style__display-inline-block___2y7gd'}).text.strip()
                manufacturer = card.find('div', {'class': 'style__padding-bottom-5px___2NrDR'}).text.strip()
                mrp = card.find('div', {'class': 'style__font-normal___2gZqF style__margin-left-8px___3Sw1d'}).text.strip()

                medicine_data = {
                    'Brand Name': brand_name,
                    'Composition': composition,
                    'Manufacturer': manufacturer,
                    'MRP': mrp
                }
                medicines.append(medicine_data)

            # Find the link to the next page (if available)
            next_page_link = soup.find('a', {'class': 'style__pagination-next___2s6CL'})
            label_url = base_url + next_page_link['href'] if next_page_link else None

    return medicines

def save_to_csv(data):
    filename = 'medicine_data.csv'
    fields = ['Brand Name', 'Composition', 'Manufacturer', 'MRP']

    with open(filename, 'w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=fields)
        writer.writeheader()
        writer.writerows(data)

    print(f'Medicine data has been successfully saved to {filename}.')

# Scrape medicines
medicines_data = scrape_medicines()

# Save data to CSV file
save_to_csv(medicines_data)


Medicine data has been successfully saved to medicine_data.csv.
