In [2]:
# AI banner categoy
# importing the modules
from tabulate import tabulate
from bs4 import BeautifulSoup
import requests
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline

# Sample data for training the AI model
training_data = [
    ("Big Sale on all flights!", "Sale"),
    ("Priceless Journeys", "Sale"),
    ("Incredible India at incredible prices", "Sale"),
    ("Enjoy Special Fares this Onam starting at AED 315	Booking Period: 06th Sept 2024 – 29th Sept 2024*T&C apply", "Sale"),
    ("Get extra baggage allowance", "Ancillary"),
    ("New route launch: 3x weekly flights to Paris", "Route Launch"),
    ("Introducing our new in-flight entertainment system", "Product Launch"),
    ("Easy Visa Assistance for your Journey", "Product Launch"),
    ("Lock Fares ToUnLock Joy", "Product Launch"),
    ("Special bank offer: 10% cashback on tickets", "Bank Offer"),
    ("ENJOY 10%* DISCOUNT ON DOMESTIC FLIGHTS Valid on HDFC Credit and Debit CardsUse Promo Code HDFC10 *T&C Apply", "Bank Offer"),
    ("10% INSTANT DISCOUNT on Domestic Flights with SBI Credit CardsUse Promo Code: SBITENBooking Period: 05th Sep to 15th Sep 2024*T&C Apply", "Bank Offer"),
    ("Join our FLYING RETURNS loyalty program", "Loyalty"),
    ("Our brand campaign: Fly with the best", "Brand Campaign"),
    ("WANT TO DOWNLOAD BOARDING PASS?	Give AI.g your PNR and name to easily download your boarding pass in seconds", "Brand Campaign"),
    ("Important information about flight schedules", "Information"),
    ("Exclusive partnership offer with XYZ hotels", "Partnership Offer")
]

# Split the training data into texts and labels
texts, labels = zip(*training_data)

# Create a model pipeline with TF-IDF vectorizer and Naive Bayes classifier
model = make_pipeline(TfidfVectorizer(), MultinomialNB())

# Train the model
model.fit(texts, labels)

# Function to classify banner text using AI model
def classify_banner(text):
    return model.predict([text])[0]

# URL of the website
url = "https://www.airindia.com/"

# Send a GET request to the website
response = requests.get(url)

# Parse the HTML content of the page
soup = BeautifulSoup(response.content, "html.parser")

# Find all h2 elements with class "promotionTitle"
promotion_titles = soup.find_all("h2", class_="promotionTitle")

# Find all paragraph elements with class "promotionDescription"
promotion_descriptions = soup.find_all("p", class_="promotionDescription")

# Initialize lists to store positions, titles, descriptions, categories, and AI categories
positions = []
titles = []
descriptions = []
categories = []
ai_categories = []

# Iterate through each promotionTitle and store the text
for i, title in enumerate(promotion_titles):
    positions.append(i + 1)
    titles.append(title.get_text(strip=True))

# Iterate through each promotionDescription and store the text
for description in promotion_descriptions:
    descriptions.append(description.get_text(strip=True))

# Classify the banners based on the text in titles and descriptions
for title, description in zip(titles, descriptions):
    if "sale" in title.lower() or "sale" in description.lower():
        categories.append("Sale")
    elif "x weekly" in title.lower() or "x weekly" in description.lower():
        categories.append("Route Launch")
    elif "flying returns" in title.lower() or "flying returns" in description.lower() or "membership" in title.lower() or "membership" in description.lower():
        categories.append("Loyalty")
    else:
        categories.append("Other")
    
    # Classify using AI model and store the AI category
    ai_categories.append(classify_banner(title + " " + description))

# Create a DataFrame with the fetched information
data = {
    "Banner Position": positions,
    "Banner Title": titles,
    "Banner Description": descriptions,
    "Banner Category": categories,
    "AI Category": ai_categories
}
df = pd.DataFrame(data)

# Save the DataFrame to an Excel file
df.to_excel("banner_info.xlsx", index=False)
# displaying the DataFrame
print(tabulate(df, headers = 'keys', tablefmt = 'psql'))
# print(df)

print(df.to_markdown()) 

# Print a success message
# print("The banner information was successfully fetched, classified, and saved to banner_info.xlsx")


+----+-------------------+------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------+----------------+
|    |   Banner Position | Banner Title                                                                 | Banner Description                                                                                                                         | Banner Category   | AI Category    |
|----+-------------------+------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------+----------------|
|  0 |                 1 | UNVEILING A WORLD OF POSSIBILITIES                                           | Vistara is joining wings with Air India to