# Importing Required Libraries and Reading Data

In [1]:
import pandas as pd

In [3]:
# Reading CSV file into a DataFrame
df=pd.read_csv("/Users/srijanagella/Documents/Airline_review.csv")
# Displaying a random sample of 20 rows from the DataFrame
df.sample(20)

Unnamed: 0.1,Unnamed: 0,Airline Name,Overall_Rating,Review_Title,Review Date,Verified,Review,Aircraft,Type Of Traveller,Seat Type,Route,Date Flown,Seat Comfort,Cabin Staff Service,Food & Beverages,Ground Service,Inflight Entertainment,Wifi & Connectivity,Value For Money,Recommended
15552,15552,Onur Air,4,"""otherwise I would avoid""",2nd September 2015,False,Very small seats and drinks were not free. Boa...,,Couple Leisure,Economy Class,Dalaman to Istanbul,August 2015,1.0,3.0,1.0,1.0,,,3.0,no
15082,15082,Nok Air,1,"""flight cancelled the night before""",17th February 2019,True,We purchased 4 tickets from Bangkok to Phuke...,,Couple Leisure,Economy Class,Bangkok to Phuket,November 2018,1.0,1.0,,1.0,,,1.0,no
1760,1760,Air France,6,"""The in flight experience was fine""",9th August 2022,True,We were able to fit everything into carry on...,Boeing 777,Family Leisure,Economy Class,New York to Paris CDG,July 2022,3.0,3.0,3.0,2.0,4.0,,4.0,yes
4472,4472,AnadoluJet,7,AnadoluJet customer review,21st July 2013,False,Flight Antalya to Ankara flight on time pleasa...,,,Economy Class,,,4.0,3.0,1.0,,1.0,,4.0,yes
2750,2750,Air Serbia,1,"""twice the flights were cancelled""",13th January 2022,True,Air Serbia show flights which have already b...,,Business,Business Class,Zurich to Sofia via Belgrade,January 2022,,,,,,,1.0,no
15422,15422,Olympic Air,8,Olympic Air customer review,31st December 2009,False,AMS-ATH return. New Airbus aircraft friendly a...,,,Economy Class,,,,,,,,,4.0,yes
4440,4440,ANA All Nippon Airways,1,"""best ANA flight attendant service""",19th June 2019,True,"Jakarta to Tokyo Narita. I love ANA, is the ...",Boeing 787-8,Family Leisure,Economy Class,Jakarta to Tokyo Narita,March 2019,5.0,5.0,5.0,5.0,5.0,4.0,4.0,yes
12316,12316,Jetstar Airways,1,"""left my 15 year old daughter stranded""",2nd May 2023,False,If I could give no stars I would. Seriously ...,,Solo Leisure,Economy Class,Adelaide to Brisbane,May 2023,1.0,1.0,1.0,1.0,1.0,1.0,1.0,no
3965,3965,Alitalia,2,"""the rudest staff""",24th May 2019,False,Montenegro to Cairo via Rome. Worst airline ...,,Family Leisure,Economy Class,Montenegro to Cairo via Rome,May 2019,4.0,2.0,3.0,1.0,,,2.0,no
6133,6133,Brussels Airlines,1,"""such an arrogant company""",6th April 2023,True,"Brussels Airlines sells you a seat and then,...",A320,Family Leisure,Economy Class,Geneva to Brussels,April 2023,,,,,,,1.0,no


In [4]:
#!pip install scikit-learn

# Preprocessing and Sentiment Analysis

In [5]:
# Importing the preprocessing module from the scikit-learn library
from sklearn import preprocessing
# Importing the Natural Language Toolkit (NLTK)
import nltk
# Downloading the opinion lexicon dataset from NLTK
nltk.download('opinion_lexicon')
# Importing the opinion_lexicon dataset from the NLTK corpus module
from nltk.corpus import opinion_lexicon
# Importing the word_tokenize function from the NLTK tokenize module
from nltk.tokenize import word_tokenize
# Printing the total number of words in the opinion_lexicon dataset
print('Total number of words in opinion lexicon', len(opinion_lexicon.words()))
# Printing examples of positive words in the opinion_lexicon dataset
print('Examples of positive words in opinion lexicon',
      opinion_lexicon.positive()[:10])
# Printing examples of negative words in the opinion_lexicon dataset
print('Examples of negative words in opinion lexicon',
      opinion_lexicon.negative()[:10])

Total number of words in opinion lexicon 6789
Examples of positive words in opinion lexicon ['a+', 'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'accessible', 'acclaim', 'acclaimed', 'acclamation']
Examples of negative words in opinion lexicon ['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably', 'abominate', 'abomination', 'abort', 'aborted']


[nltk_data] Downloading package opinion_lexicon to
[nltk_data]     /Users/srijanagella/nltk_data...
[nltk_data]   Package opinion_lexicon is already up-to-date!


# Data Preparation and Scoring

In [6]:
# Let's create a dictionary which we can use for scoring our review text

# Downloading punkt from NLTK library
nltk.download('punkt')

# Renaming the column 'reviewText' to 'Modules' in the DataFrame
df.rename(columns={"reviewText": "Modules"}, inplace=True)

# Assigning positive and negative scores
pos_score = 1
neg_score = -1

# Initializing an empty dictionary
word_dict = {}
 
# Adding the positive words to the dictionary
for word in opinion_lexicon.positive():
        word_dict[word] = pos_score
      
# Adding the negative words to the dictionary
for word in opinion_lexicon.negative():
        word_dict[word] = neg_score

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/srijanagella/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [7]:
#Creating a fuunction text
def bing_liu_score(Modules):
    #Initializing the sentiment score
    sentiment_score = 0
    #Tokenizing the input text into words and convert them to lowercase
    bag_of_words = word_tokenize(Modules.lower())
     # creating loop to check each word in the bag of words
    for word in bag_of_words:
        #Checking if the word exists in the sentiment dictionary
        if word in word_dict:
            # If the word exist, adding its sentiment score to the sentiment score
            sentiment_score += word_dict[word]
    return sentiment_score  #Returning the sentiment score for the text

In [10]:
# Fill NaN values in the 'Review' column
df['Review'].fillna('no review', inplace=True)
#creating new column 'Bing_Liu_Score' to store the scores by applying  bing_liu_score to calculate sentiment scores for Review column
df['Bing_Liu_Score'] = df['Review'].apply(bing_liu_score)

# Analysis and Visualization

In [17]:
# Displaying the first 5 rows of the DataFrame with few columns
df[['Airline Name',"Overall_Rating","Review", 'Bing_Liu_Score']].head(5)

Unnamed: 0,Airline Name,Overall_Rating,Review,Bing_Liu_Score
0,AB Aviation,9,Moroni to Moheli. Turned out to be a pretty ...,7
1,AB Aviation,1,Moroni to Anjouan. It is a very small airline...,-3
2,AB Aviation,1,Anjouan to Dzaoudzi. A very small airline an...,2
3,Adria Airways,1,Please do a favor yourself and do not fly wi...,-6
4,Adria Airways,1,Do not book a flight with this airline! My fr...,-2


In [18]:
# Grouping the DataFrame by the 'Airline Name' column and calculating the mean of 'Bing_Liu_Score' for each group
df.groupby('Airline Name').agg({'Bing_Liu_Score':'mean'})

Unnamed: 0_level_0,Bing_Liu_Score
Airline Name,Unnamed: 1_level_1
AB Aviation,2.000000
ANA All Nippon Airways,5.290000
ASKY Airlines,-1.956522
ATA Airlines,-0.500000
Adria Airways,2.648352
...,...
euroAtlantic Airways,-0.187500
fastjet,0.030303
flyadeal,-2.684211
flybe,0.750000


In [19]:
# Grouping the DataFrame by the 'Overall_Rating' column and calculating the mean of 'Bing_Liu_Score' for each group
df.groupby('Overall_Rating').agg({'Bing_Liu_Score':'mean'})

Unnamed: 0_level_0,Bing_Liu_Score
Overall_Rating,Unnamed: 1_level_1
1,-0.881501
2,-1.891551
3,-1.196903
4,0.038417
5,1.137349
6,1.90963
7,3.670578
8,4.628913
9,5.16233
n,0.606888
