In [None]:
# Downloading the Required Libraries
!pip -q install openai langchain huggingface_hub

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m220.3/220.3 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.0/75.0 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.1/46.1 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.9/76.9 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the follo

In [None]:
# Importing pandas for data manipulation and analysis.
# Pandas is typically used for working with structured data (like CSV, Excel files) in tabular form.
import pandas as pd

# Importing os module to interact with the operating system.
# Commonly used for file and directory operations.
import os

# Importing getpass to securely prompt the user for a password without echoing.
from getpass import getpass

# Importing OpenAI from langchain.llms.
# This is typically used to interface with OpenAI's language models like GPT-3.
from langchain.llms import OpenAI

# Importing PromptTemplate from langchain.
# This is used for defining templates for generating prompts for language models.
from langchain import PromptTemplate

# Importing LLMChain from langchain.chains.
# LLMChain stands for Language Learning Model Chain, which is used to create a sequence of processing steps involving language models.
from langchain.chains import LLMChain

# Importing SimpleSequentialChain from langchain.chains.
# This is used for creating a simple sequence of processing steps without the complexity of a full LLMChain.
from langchain.chains import SimpleSequentialChain

# Importing BaseModel from pydantic for creating data models.
# Pydantic is used for data validation and settings management using Python type annotations.
from pydantic import BaseModel, ValidationError, validator

In [None]:
# Connecting to the drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
# Define the file path where the CSV file is located.
# This path points to a CSV file named 'airlines_reviews.csv' located in a Google Drive folder.
file_path = "/content/drive/My Drive/ISB-TERM-5/NLP/airlines_reviews.csv"

# Read the CSV file into a Pandas DataFrame.
# Pandas' read_csv function is used to load the data from the CSV file located at the given file_path.
# The resulting DataFrame 'df' will contain the data structured in a tabular format, with columns and rows corresponding to the data in the CSV file.
df = pd.read_csv(file_path)

In [None]:
# Getting the OpenAPI Key
# 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

os.environ['OPENAI_API_KEY'] = getpass()

··········


In [None]:
# Define a Pydantic model named ReviewAnalysisValidation.
# This model is used to validate data structures for airline review analysis.
class ReviewAnalysisValidation(BaseModel):
    # Define fields for the model. Each field represents a piece of data that needs validation.
    review_text: str  # Text of the customer review
    sentiment: str  # Sentiment expressed in the review
    complaint_category: str  # Category of the complaint, if any
    urgency_level: str  # Level of urgency of the issue raised in the review
    risk_type: str  # Type of risk associated with the complaint

    # Validator for the 'sentiment' field.
    # This checks if the provided sentiment value is within the allowed options.
    @validator('sentiment')
    def check_sentiment(cls, value):
        # Allowed sentiment values are "Positive" and "Negative"
        assert value in ["Positive", "Negative"], 'Invalid sentiment'
        return value

    # Validator for the 'complaint_category' field.
    # This ensures the complaint category is valid and recognized.
    @validator('complaint_category')
    def check_complaint_category(cls, value):
        # List of valid complaint categories
        valid_categories = [
            "Ticketing and Booking", "In-Flight Experience", "Baggage Handling",
            "Customer Service", "Delays and Cancellations", "Safety and Security",
            "Boarding Process", "Loyalty Programs", "No Complaint"
        ]
        # Check if the provided category is in the list of valid categories
        assert value in valid_categories, 'Invalid complaint category'
        return value

    # Validator for the 'urgency_level' field.
    # This checks if the urgency level is correctly categorized.
    @validator('urgency_level')
    def check_urgency_level(cls, value):
        # List of valid urgency levels
        valid_urgencies = ["High Urgency", "Medium Urgency", "Low Urgency", "No Urgency"]
        # Check if the provided urgency level is valid
        assert value in valid_urgencies, 'Invalid urgency level'
        return value

    # Validator for the 'risk_type' field.
    # This checks if the risk type is one of the predefined options.
    @validator('risk_type')
    def check_risk_type(cls, value):
        # List of valid risk types
        valid_risks = [
            "Reputational Risk", "Operational Risk", "Financial Risk",
            "Safety Risk", "Regulatory Risk", "No Risk"
        ]
        # Check if the provided risk type is valid
        assert value in valid_risks, 'Invalid risk type'
        return value

In [None]:
# Define a class for analyzing airline reviews.
class AirlineReviewAnalysis():

    # Constructor for initializing the AirlineReviewAnalysis instance.
    def __init__(self, llm, sentiment_prompt, complaint_prompt, urgency_prompt, risk_prompt):
        # Store the language model (llm) and various prompt templates for analysis.
        self.llm              = llm  # Language learning model
        self.sentiment_prompt = sentiment_prompt  # Template for sentiment analysis
        self.complaint_prompt = complaint_prompt  # Template for complaint category
        self.urgency_prompt   = urgency_prompt  # Template for urgency level
        self.risk_prompt      = risk_prompt  # Template for risk assessment

        # Create LLM Chains for each analysis aspect.
        # LLM Chains are sequences of processing steps for the language model.
        self.sentiment_chain  = LLMChain(llm=self.llm, prompt=self.sentiment_prompt)
        self.complaint_chain  = LLMChain(llm=self.llm, prompt=self.complaint_prompt)
        self.urgency_chain    = LLMChain(llm=self.llm, prompt=self.urgency_prompt)
        self.risk_chain       = LLMChain(llm=self.llm, prompt=self.risk_prompt)

    # Method to analyze a given review text.
    def analyze_review(self, review_text):
        # Run the sentiment chain and clean up the result.
        sentiment_answer = self.sentiment_chain.run(review_text)
        sentiment_answer = sentiment_answer.strip().replace(".", "")

        # Run the complaint chain and clean up the result.
        complaint_answer = self.complaint_chain.run(review_text)
        complaint_answer = complaint_answer.strip().replace(".", "")

        # Run the urgency chain and clean up the result.
        urgency_answer   = self.urgency_chain.run(review_text)
        urgency_answer   = urgency_answer.strip().replace(".", "")

        # Run the risk chain and clean up the result.
        risk_answer      = self.risk_chain.run(review_text)
        risk_answer      = risk_answer.strip().replace(".", "")

        # Validate and structure the analysis using the Pydantic model.
        # The model ensures that the analysis fits the predefined schema.
        review_analysis = ReviewAnalysisValidation(
            review_text = review_text,
            sentiment   = sentiment_answer,
            complaint_category = complaint_answer,
            urgency_level      = urgency_answer,
            risk_type          = risk_answer
        )

        # Return the structured analysis as a dictionary.
        return review_analysis.dict()

In [None]:
# Define a template for analyzing the sentiment of a customer review.
# This template is formatted to ask the model to classify the sentiment of the provided review as either positive or negative.
sentiment_template = """
What is the sentiment of the customer review given below? It should be a positive or negative sentiment.

review: {review_text}
Sentiment:
"""

# Create a PromptTemplate object for sentiment analysis.
# This object will be used to generate prompts for the language model based on the provided sentiment template.
sentiment_prompt = PromptTemplate(
    input_variables=["review_text"],
    template=sentiment_template,
)

# Define a template for determining the category of a customer's complaint from a review.
# This template includes detailed descriptions of possible complaint categories for better context and accurate classification.
complaint_template = """
What is the category of complaint of the customer review given below? It can be Ticketing and Booking, In-Flight Experience, Baggage Handling, Customer service, Delays and Cancellation, Safety and Security, Boarding Process, Loyalty Program or No Complaint
To give you more context on each of the complaints following are the definitions:
1. Ticketing and Booking: Issues related to ticket purchasing, booking processes, pricing, and refunds.
2. In-Flight Experience: Quality of in-flight services, seat comfort, cabin cleanliness, food and beverages, entertainment options.
3. Baggage Handling: Lost, damaged, or delayed baggage; baggage fees.
4. Customer Service: Quality of service at check-in, gate, and customer support (including call centers and online support).
5. Delays and Cancellations: Issues related to flight delays, cancellations, and the handling of these situations.
6. Safety and Security: Concerns regarding safety procedures, security checks, and overall safety perceptions.
7. Boarding Process: Efficiency and organization of the boarding process.
8. Loyalty Programs: Issues or feedback related to frequent flyer programs and other loyalty incentives.
9. No Complaint: There was no complain from the passenger. The sentiment of the review was positive.

review: {review_text}
Category of Complaint:

"""
# Create a PromptTemplate object for complaint category analysis.
# This is used to generate prompts for the language model to classify the complaint category in a review.
complaint_prompt = PromptTemplate(
    input_variables=["review_text"],
    template=complaint_template,
)

# Define a template for evaluating the level of urgency in a customer review.
# This template guides the model to classify the review's urgency level based on predefined categories.
urgency_template = """
What is the level of urgency of the customer review given below? It can be High Urgency, Medium Urgency, Low Urgency or No Urgency
To give more contect on each of the levels of urgency following are the definitions:
1. High Urgency: Immediate action required; issues that significantly impact customer safety or satisfaction.
2. Medium Urgency: Requires timely attention; important but not critical.
3. Low Urgency: Can be addressed in a routine manner; less impact on immediate customer experience or safety.
4. No urgency: No urgency.

review: {review_text}
Level of Urgency:

"""
# Create a PromptTemplate object for assessing the urgency level.
# This object helps in generating structured prompts for the language model to analyze urgency levels in reviews.
urgency_prompt = PromptTemplate(
    input_variables=["review_text"],
    template=urgency_template
)

# Define a template for assessing the type of risk indicated in a customer review.
# The template includes detailed descriptions of risk types to guide the model's classification.
risk_template = """
Evaluate the customer review provided below and determine the type of risk it indicates. The risk can be Reputational Risk, Operational Risk, Financial Risk, Safety Risk, Regulatory Risk, or No Risk. Below are the definitions for each risk type:

1. Reputational Risk: Potential damage to the airline's brand and public image.
2. Operational Risk: Impacts on the efficiency and effectiveness of airline operations.
3. Financial Risk: Direct or indirect financial losses, including compensation claims.
4. Safety Risk: Any risk that can affect the safety and security of passengers and staff.
5. Regulatory Risk: Non-compliance with aviation regulations and standards.
6. No Risk: The review does not indicate any risk.

review: {review_text}
Identified Risk Type:

"""
# Create a PromptTemplate object for risk assessment.
# This will be used to create prompts for the language model to classify the type of risk in a review.
risk_prompt = PromptTemplate(
    input_variables=["review_text"],
    template=risk_template
)

# Initialize an instance of the OpenAI language model.
# This instance is configured with specific parameters like model name, temperature, and maximum tokens.
# The model will be used to process the generated prompts and provide responses.
llm = OpenAI(model_name='text-davinci-003',
             temperature=0.9,
             max_tokens = 256)

In [None]:
# Instantiate the AirlineReviewAnalysis class.
# This creates an object named 'airlinereviewanalysis_obj' of the class AirlineReviewAnalysis.
# The instantiation is done by passing the following parameters to the class constructor:
# 1. llm: An instance of the OpenAI language learning model.
# 2. sentiment_prompt: A PromptTemplate object for generating prompts to analyze sentiment.
# 3. complaint_prompt: A PromptTemplate object for generating prompts to categorize complaints.
# 4. urgency_prompt: A PromptTemplate object for generating prompts to determine the urgency level.
# 5. risk_prompt: A PromptTemplate object for generating prompts to assess the type of risk.
# These parameters are used to set up the internal logic of the AirlineReviewAnalysis object
# for analyzing airline customer reviews through different aspects.

airlinereviewanalysis_obj = AirlineReviewAnalysis(llm, sentiment_prompt, complaint_prompt, urgency_prompt, risk_prompt)

In [None]:
# Initialize an empty list to store the results of review analysis.
review_analysis_list = []

# Extract the list of reviews from the DataFrame 'df'.
# This assumes that 'df' is a Pandas DataFrame with a column named 'review',
# which contains the text of customer reviews.
reviews = list(df['review'])

# Iterate over each review in the list.
for review in reviews:
    # Analyze the current review using the analyze_review method of the AirlineReviewAnalysis object.
    # The method analyze_review processes the review text to determine its sentiment,
    # complaint category, urgency level, and risk type.
    review_analysis = airlinereviewanalysis_obj.analyze_review(review)

    # Append the result of the analysis to the review_analysis_list.
    # Each item in the list is a dictionary containing structured and analyzed data of a review.
    review_analysis_list.append(review_analysis)

In [None]:
# Convert the list of review analysis dictionaries into a Pandas DataFrame.
# This DataFrame, named 'output_analysis_df', will structure the analyzed data in a tabular format.
# Each dictionary in 'review_analysis_list' becomes a row in the DataFrame,
# and the keys of the dictionaries become the column headers.
output_analysis_df = pd.DataFrame(review_analysis_list)

# Display the first five rows of the DataFrame 'output_analysis_df'.
# This is helpful for getting a quick overview of the data structure and some of the analysis results.
# The '.head()' method is a common way to preview the beginning of a DataFrame.
output_analysis_df.head()

Unnamed: 0,review_text,sentiment,complaint_category,urgency_level,risk_type
0,The service was excellent. The cabin staff we...,Positive,No Complaint,No Urgency,No Risk
1,We have had some torrid experiences with BA -...,Positive,No Complaint,No Urgency,No Risk
2,We had a flight from ZRH to SFO via LHR. The l...,Positive,No Complaint,No Urgency,No Risk
3,London to Paris. I wish that they would updat...,Negative,In-Flight Experience,Low Urgency,Operational Risk


In [None]:
# Saving the DataFrame
output_analysis_df.to_csv('/content/drive/My Drive/ISB-TERM-5/NLP/output_analysis_df.csv', index=False)