# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords,wordnet
import string
from nltk import pos_tag
from nltk.stem import WordNetLemmatizer

# Dataset: Airline sentiment

A dataset for US airlines comments analysis,Tweets analysis on Kaggle (<a href="https://www.kaggle.com/datasets/welkin10/airline-sentiment">See dataset page</a>)

### About Dataset
#### Context:
    - This is US airlines data which contain comments of passengers on basis of service provided by airlines.
--------------------------------------------------------------------------------
#### Inspiration:
    -you can use it for sentiment analysis .
---------------------------------------------------------------------------------

# Importing Dataset

In [2]:
dataset=pd.read_csv('Dataset/airline data.csv')
dataset.head()

Unnamed: 0,tweet_id,airline_sentiment,airline_sentiment_confidence,negativereason,negativereason_confidence,airline,airline_sentiment_gold,name,negativereason_gold,retweet_count,text,tweet_coord,tweet_created,tweet_location,user_timezone
0,570306133677760513,neutral,1.0,,,Virgin America,,cairdin,,0,@VirginAmerica What @dhepburn said.,,2015-02-24 11:35:52 -0800,,Eastern Time (US & Canada)
1,570301130888122368,positive,0.3486,,0.0,Virgin America,,jnardino,,0,@VirginAmerica plus you've added commercials t...,,2015-02-24 11:15:59 -0800,,Pacific Time (US & Canada)
2,570301083672813571,neutral,0.6837,,,Virgin America,,yvonnalynn,,0,@VirginAmerica I didn't today... Must mean I n...,,2015-02-24 11:15:48 -0800,Lets Play,Central Time (US & Canada)
3,570301031407624196,negative,1.0,Bad Flight,0.7033,Virgin America,,jnardino,,0,@VirginAmerica it's really aggressive to blast...,,2015-02-24 11:15:36 -0800,,Pacific Time (US & Canada)
4,570300817074462722,negative,1.0,Can't Tell,1.0,Virgin America,,jnardino,,0,@VirginAmerica and it's a really big bad thing...,,2015-02-24 11:14:45 -0800,,Pacific Time (US & Canada)


In [3]:
print(len(dataset.columns))
dataset.columns

15


Index(['tweet_id', 'airline_sentiment', 'airline_sentiment_confidence',
       'negativereason', 'negativereason_confidence', 'airline',
       'airline_sentiment_gold', 'name', 'negativereason_gold',
       'retweet_count', 'text', 'tweet_coord', 'tweet_created',
       'tweet_location', 'user_timezone'],
      dtype='object')

In [4]:
text=dataset['text'].values
text=[word_tokenize(doc) for doc in text]

In [5]:
text[0]

['@', 'VirginAmerica', 'What', '@', 'dhepburn', 'said', '.']

In [6]:
sentiment=dataset['airline_sentiment'].values #'neutral' 'positive' 'negative'

In [7]:
for col in ['airline_sentiment']:
    print(col,dataset[col].unique())

airline_sentiment ['neutral' 'positive' 'negative']


In [8]:
documents=[(text[i],sentiment[i]) for i in range(dataset.shape[0])]

In [9]:
dataset.shape[0]

14640

In [10]:
documents[0]

(['@', 'VirginAmerica', 'What', '@', 'dhepburn', 'said', '.'], 'neutral')

# Dataset Preprocessing
1. TOKENIZING
2. LEMMATIZING
3. REMOVING STOPWORDS
4. PUNCTUATIONS

In [11]:
lemmatizer=WordNetLemmatizer()

In [12]:
def get_simple_pos(tag):   
    
    if tag.startswith('J'):
        return wordnet.ADJ
    elif tag.startswith('V'):
        return wordnet.VERB
    elif tag.startswith('N'):
        return wordnet.NOUN
    elif tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN

In [13]:
stops=set(stopwords.words('english'))
punctuations = list(string.punctuation)
stops.update(punctuations)
stops

{'!',
 '"',
 '#',
 '$',
 '%',
 '&',
 "'",
 '(',
 ')',
 '*',
 '+',
 ',',
 '-',
 '.',
 '/',
 ':',
 ';',
 '<',
 '=',
 '>',
 '?',
 '@',
 '[',
 '\\',
 ']',
 '^',
 '_',
 '`',
 'a',
 'about',
 'above',
 'after',
 'again',
 'against',
 'ain',
 'all',
 'am',
 'an',
 'and',
 'any',
 'are',
 'aren',
 "aren't",
 'as',
 'at',
 'be',
 'because',
 'been',
 'before',
 'being',
 'below',
 'between',
 'both',
 'but',
 'by',
 'can',
 'couldn',
 "couldn't",
 'd',
 'did',
 'didn',
 "didn't",
 'do',
 'does',
 'doesn',
 "doesn't",
 'doing',
 'don',
 "don't",
 'down',
 'during',
 'each',
 'few',
 'for',
 'from',
 'further',
 'had',
 'hadn',
 "hadn't",
 'has',
 'hasn',
 "hasn't",
 'have',
 'haven',
 "haven't",
 'having',
 'he',
 'her',
 'here',
 'hers',
 'herself',
 'him',
 'himself',
 'his',
 'how',
 'i',
 'if',
 'in',
 'into',
 'is',
 'isn',
 "isn't",
 'it',
 "it's",
 'its',
 'itself',
 'just',
 'll',
 'm',
 'ma',
 'me',
 'mightn',
 "mightn't",
 'more',
 'most',
 'mustn',
 "mustn't",
 'my',
 'myself',
 'need

In [14]:
def clean_review(words):
    output_words = []
    for w in words:
        if w.lower() not in stops:
            pos = pos_tag([w])
            clean_word = lemmatizer.lemmatize(w, pos = get_simple_pos(pos[0][1]))
            output_words.append(clean_word.lower())
    return output_words

In [15]:
document = [(clean_review(doc), category) for doc, category in documents]

In [17]:
print("The clean dataset is \n")
for i in document:
    print(i)

The clean dataset is 

(['virginamerica', 'dhepburn', 'say'], 'neutral')
(['virginamerica', 'plus', "'ve", 'add', 'commercial', 'experience', '...', 'tacky'], 'positive')
(['virginamerica', "n't", 'today', '...', 'must', 'mean', 'need', 'take', 'another', 'trip'], 'neutral')
(['virginamerica', "'s", 'really', 'aggressive', 'blast', 'obnoxious', '``', 'entertainment', "''", 'guest', 'face', 'amp', 'little', 'recourse'], 'negative')
(['virginamerica', "'s", 'really', 'big', 'bad', 'thing'], 'negative')
(['virginamerica', 'seriously', 'would', 'pay', '30', 'flight', 'seat', "n't", 'play', "'s", 'really', 'bad', 'thing', 'fly', 'va'], 'negative')
(['virginamerica', 'yes', 'nearly', 'every', 'time', 'fly', 'vx', '“', 'ear', 'worm', '”', '’', 'go', 'away'], 'positive')
(['virginamerica', 'really', 'miss', 'prime', 'opportunity', 'men', 'without', 'hats', 'parody', 'http', '//t.co/mwpg7grezp'], 'neutral')
(['virginamerica', 'well', "didn't…but", '-d'], 'positive')
(['virginamerica', 'amaze', 

(['united', 'ask', 'lot', 'customer', 'routinely', 'screw', 'know', 'right'], 'negative')
(['united', "'m", 'first', 'time', 'solo', 'female', 'traveller', "'s", 'pretty', 'scary', 'foreign', 'place', 'know', 'belonging', '...'], 'negative')
(['united', 'know', 'bag', 'wait', 'around', 'hope', 'arrive', 'one', 'answer', 'plan', 'answer', '....'], 'negative')
(['united', 'somehow', 'knew', "'d", 'wait', 'airborne', 'respond', 'hack', 'joker', 'neveragain'], 'negative')
(['united', 'missing', 'flight', 'attendant', 'delayed', 'flight', 'prime', 'example', '1', 'person', 'impact', '100', "'s", 'people', 'hnl', 'gt', 'iah', 'ua252', 'wastedtime'], 'negative')
(['united', 'strand', 'texas', 'hang', "'m", 'try', 'figure', 'get', 'home', 'cool'], 'negative')
(['united', 'try', 'change', 'flight', 'three', 'time', 'phone', 'get', 'disconnect', 'time'], 'negative')
(['united', 'poor', 'show', 'today', 'assistance', 'passenger', 'mi', 'connect', 'due', 'maintenance', 'wait', 'baggage'], 'negativ

(['southwestair', 'would', 'love', '+1', 'redcarpet', 'treatment'], 'neutral')
(['southwestair', 'get', 'companion', 'pas'], 'neutral')
(['southwestair', 'tell', 'secret', 'fly', 'high', 'redcarpet'], 'neutral')
(['southwestair', 'would', 'brighter', 'star', 'combine', 'red', 'carpet', 'tonight'], 'positive')
(['southwestair', 'auto', 'check', 'cp', 'holder', 'come'], 'negative')
(['southwestair', 'get', 'mine', 'time', 'wife', '15th', 'anniversary'], 'positive')
(['southwestair', 'love', 'companion', 'pass', 'qualify', '4th', 'year', 'hollymais'], 'positive')
(['southwestair', 'mine'], 'neutral')
(['southwestair', 'continue', 'amaze', 'amaze', 'customer', 'service', 'thank', 'swa'], 'positive')
(['southwestair', '...', 'one', 'obtain', 'companion', 'pas'], 'neutral')
(['southwestair', 'yes', 'please'], 'positive')
(['southwestair', 'http', '//t.co/oqukso3s2o', 'subscribe', 'please', 'http', '//t.co/oqukso3s2o'], 'neutral')
(['southwestair', 'sign'], 'positive')
(['southwestair', 'get'

(['southwestair', 'trying', 'get', 'phone', 'confirm', 'fund', 'cancelled', 'flightled', 'reservation', 'still', 'use', 'future', 'u', 'help'], 'negative')
(['southwestair', 'consider', 'add', '``', "'ll", 'call', 'back', 'someone', 'free', "''", 'feature', 'support', 'line'], 'negative')
(['southwestair', "'ve", 'hold', 'customer', 'service', 'hour', 'help'], 'negative')
(['southwestair', "'ve", 'hold', 'hour', '59:57', 'type', 'ridiculous', 'need', 'link', 'chart', 'route', 'time'], 'negative')
(['southwestair', 'sent'], 'neutral')
(['southwestair', 'flight', 'cancelled', 'flightled', 'reflight', 'booking', 'problems', 'online', 'work', 'second', 'round', 'hold', '3', 'hr', 'option'], 'negative')
(['southwestair', 'kudos', 'rsw', 'cs', 'crew', 're-routing', 'pax', 'alleviate', 'sale', 'due', 'grade', 'eqp', '800', '500'], 'negative')
(['southwestair', 'try', 'fly', 'nashville', 'tomorrow', 'look'], 'neutral')
(['southwestair', 'think', 'flight', 'nashville', 'cancelled', 'flighted', 

(['usairways', 'question', 'need', 'talk', 'someone', 'email', 'give', 'email', 'address', 'thanks'], 'neutral')
(['usairways', 'gregm528', 'well', 'certainly', 'nothing', 'happen', '``', 'front', "''", 'scene', 'noaccountability', 'disappoint'], 'negative')
(['usairways', '4', 'hour', 'tarmac', 'charleston', 'still', 'ca', "n't", 'get', 'response', 'week', 'unacceptable'], 'negative')
(['usairways', 'customer', 'service', 'best', 'rachel', 's.', 'take', 'great', 'care', 'u', 'phx', 'airport', 'http', '//t.co/hg7veqhghy'], 'positive')
(['usairways', 'amp', 'americanair', 'plane', 'grandcayman', 'http', '//t.co/gx7qbtckbr'], 'neutral')
(['usairways', 'never', 'fly', 'u', 'airway'], 'negative')
(['usairways', 'strike', '--', 'late', 'flight', 'crew', '3/4', 'trip', 'maintenance', '2/4', 'worth', 'extra', '200/trip', 'less', 'hassle', 'few', 'delay'], 'negative')
(['usairways', 'follow', 'dm'], 'neutral')
(['usairways', 'get', 'bumped', '6pm', 'birmingham', 'delayed', 'pilot', 'schedule',

(['usairways', 'phone', 'look', 'like', 'global'], 'neutral')
(['usairways', 'already', 'file', 'report', 'personally', 'airport', 'however', 'idea', 'even', 'track', 'number'], 'negative')
(['usairways', 'already', 'call', 'option', 'flight', 'reimburse', 'never', 'unreliable', 'business', 'traveler'], 'negative')
(['usairways', 'save', 'grace', 'flight', 'attendant', 'dallas', 'amaze', 'wish', 'would', 'transfer', 'delta', 'would', 'see'], 'negative')
(['usairways', 'link', 'lead', 'website', 'wo', "n't", 'open', 'cell', 'phone', 'good', 'job', 'shock', 'airline', 'fold'], 'negative')
(['usairways', 'flight', 'us558', 'get', 'cancelled', 'flightled', '2/16', 'due', 'lack', 'flight', 'crew.i', 'told', "'d", 'reimburse', 'hotel', 'contact'], 'negative')
(['usairways', 'need', 'apology', 'unfortunate', 'situation', 'date', "'ve", 'land', 'suppose', 'land', '5'], 'negative')
(['usairways', '~45', 'minute', 'mean', 'public', 'transit', 'home', 'expensive', 'cab', 'promises', "n't", 'make'

(['americanair', 'sell', 'mce', 'post', 'door-close', 'shuffle', 'way', 'u', 'sell', 'drinks/snacks', 'people', 'flight', 'self-upgraded', 'w/o', 'pay'], 'negative')
(['americanair', '2396', 'cancelled', 'flightled', 'tonight'], 'neutral')
(['americanair', 'good', 'enough', 'info', 'communicate', 'point', 'silence', 'hour', 'sat', 'oh', 'seat', 'broken', 'bad'], 'negative')
(['americanair', 'ca', "n't", 'get', 'operator', 'hour', "'s", 'worth', 'call', 'cancelled', 'flight', 'delayed', 'flight', 'automated', 'system', "n't", 'go', 'beyond', 'message'], 'negative')
(['americanair', 'provide', 'u', 'alternative', 'flight', '36', 'hour', 'late', 'flightr', 'ruin', 'trip', 'cancelled', 'flightled', 'angry', 'problem', 'ruin'], 'negative')
(['americanair', 'stand', 'baggage', 'claim', 'hour', 'wait', 'bag', 'knew', 'never', 'make', 'plane'], 'negative')
(['americanair', 'love', 'travel', 'plane', 'people', 'nice', "'s", 'amaze', 'please', 'follow', 'back', '😋i', 'love', 'company'], 'positiv

In [18]:
categories = [category for document, category in documents]

In [19]:
categories[:10]

['neutral',
 'positive',
 'neutral',
 'negative',
 'negative',
 'negative',
 'positive',
 'neutral',
 'positive',
 'positive']

In [20]:
text_documents = [" ".join(document) for document, category in documents]

In [23]:
print('not clean dataset:\n')
for i in text_documents:
    print(i)

not clean dataset:

@ VirginAmerica What @ dhepburn said .
@ VirginAmerica plus you 've added commercials to the experience ... tacky .
@ VirginAmerica I did n't today ... Must mean I need to take another trip !
@ VirginAmerica it 's really aggressive to blast obnoxious `` entertainment '' in your guests ' faces & amp ; they have little recourse
@ VirginAmerica and it 's a really big bad thing about it
@ VirginAmerica seriously would pay $ 30 a flight for seats that did n't have this playing . it 's really the only bad thing about flying VA
@ VirginAmerica yes , nearly every time I fly VX this “ ear worm ” won ’ t go away : )
@ VirginAmerica Really missed a prime opportunity for Men Without Hats parody , there . https : //t.co/mWpG7grEZP
@ virginamerica Well , I didn't…but NOW I DO ! : -D
@ VirginAmerica it was amazing , and arrived an hour early . You 're too good to me .
@ VirginAmerica did you know that suicide is the second leading cause of death among teens 10-24
@ VirginAmerica I

@ united what a nightmare ! ! Both sides of my flights are a disaster ! At Houston getting attitude cuz I was sent to ticket counter
@ united you 're terrible .
@ united it was eventually explained that weather conditions too extreme to get luggage off planes . Will be sent on . But I wo n't be for days !
@ united Anyone there ? Did you read my DM ?
@ united 732 from Denver . We just boarded ! Fingers crossed we get into the air ! ! !
@ united EWR TO MCO made unplanned landing because of pressurization failure . Worst pain I 've EVER felt , I thought I was going to pass out
@ united How does that make a flight takeoff on time ? And regardless it makes me Late Flight because now I have to wait for my bag at baggage claim .
@ united it 's been over 3 hours ... at what point do you let people off of the plane ? @ FoxNews @ CNN @ msnbc
@ united trying to get a customer service agent . Just landed in SFO . Ca n't fly with 3 layovers with 3 kids ! !
@ united why are there no early morning fl

@ SouthwestAir has n't evennotified us that theflight isdelayed via email/text/phone call.If we wererunning Late Flight I would be pissed # unreliable
@ SouthwestAir can tweet through the weekend/ bad weather , but closes down customer relations center to process refunds for Cancelled Flighted flights .
@ SouthwestAir stop Cancelled Flighting my flight I have US history tests to take and chemistry things to learn do you not understand ! ! ! ! ! ! ! ! !
@ SouthwestAir 2/22-MDW 2 SAN flt 1687 attendant Melissa was awesome ! Fast , smiling , great . After weather Cancelled Flight day b4 , it was welcome
@ SouthwestAir been all up and down the area where the pic was taken and do n't see any albums .
@ SouthwestAir I enjoyed a call from my good friend he 's Flight Booking Problems his flights elsewhere as I tweet one at a time I will tell as many as I can
@ SouthwestAir thank you . Great customer service so far . Accidents happen I understand . Hopefully everything works out .
@ SouthwestAi

@ SouthwestAir yeah they told me it would be on the next flight . I drove down and it wasnt . I just drove down for a second time and they were
@ SouthwestAir much respect !
@ SouthwestAir it 's not letting me DM you ! !
@ SouthwestAir @ Imaginedragons when are we gon na know I have a math test tomoro and I ca n't concentrate😭😭 # DestinationDragons
@ SouthwestAir Thank you ! # thankful # feelingtheluv
@ SouthwestAir Im just praying you get me home alive
@ SouthwestAir I hope you 're happy ! You have officially become the next @ AmericanAir # ProfitBeforePeople IMO you will be bankrupt by 2020
@ SouthwestAir what 's up with these delays ? ! Throw some priority boarding my way & amp ; I 'll forgive you ! ! 👍 # southwest # southwestairlines
@ SouthwestAir I got it added thank you ! : )
@ SouthwestAir Great , thank you . Best of luck dealing with this horrible winter .
@ SouthwestAir thank you : - )
@ SouthwestAir needed my flight info so I can add my rapid rewards to my flight ... First t

@ JetBlue 1951 BOS to ORD
@ JetBlue thanks . Line moved quick . Already done .
@ JetBlue I do follow you !
@ JetBlue nope . None to be found
@ JetBlue I may need to . I had a 40 minute layover and now have a 15 min delay on my first flight .
@ JetBlue I may not make my connection and need to find out my options , yet no one is here .
@ JetBlue nope . Currently sitting at my gate .
@ JetBlue their names are both Angel ( seriously - how cool is that ! ) . Truly FANTASTIC service !
@ JetBlue extra-speed line closed in Tampa . What gives ?
@ JetBlue I sent you an email
@ JetBlue thank you : - )
@ JetBlue thanks !
@ JetBlue also . Emergency exit seats . 6 ' 2 '' and that 's a huge win .
@ JetBlue friendly , engaging , personable , handled clarifying questions about baggage fees well , and took an interest in what I was doing .
@ JetBlue so I do n't know confirmation number or the names of the flight attendants and supervisor
@ JetBlue would love to respond to link but supervisor tore ticket

@ USAirways Plus a US Airways - you need to do something about this ! I left Philly to thaw out ! ! ! ! http : //t.co/pKy7ZhnNRH
@ USAirways ordered a Scotch & amp ; water and F/A asked if I wanted full glass or half ? 60 yrs old and first time ever asked that ? ? ? ?
@ USAirways yeah , the mark was like a mile back . Also we 're an hour Late Flight . So thanks a ton for the great service today .
@ USAirways we got 1 drink then F/A sat in jump seat doing crosswords . Glasses picked up at landing . Just a very lazy service 4 First class .
@ USAirways Paid for a Choice Seat my Choice is to NOT have big man next to me in my seat , too . # oldseatnocushion # worstflight # nohelponboard
@ USAirways First class no snack basket was catered so drinks only No preflight drink or coathanging 1st drink service 50 min after take off
@ USAirways you told us last night that there was a ground stop at O'Hare FOUR HOURS AFTER Obama landed , stop blaming someone else
@ USAirways # 428 delayed due to # f

@ AmericanAir the issue was n't a long wait . It was an infinite do loop . Your system did n't let me ( or any customer ) wait or leave a message .
@ AmericanAir Did the policy change ? I 've seen people with hamsters and rabbits before .
@ AmericanAir thanks to your attendant on flight for advising connect flight held . Ran with kids and flight departed . Poor service at Miami
@ AmericanAir an hour now waiting on the phone for US Air help . 10 hours waiting at the airport yesterday . Love the service guys .
@ AmericanAir I 'm here standing at baggage claim waiting for bags FOR OVER AN HOUR at DFW . The gate is 100 feet from here ! # nothappy
@ AmericanAir right so I missed my connection / had a three hour Kay over / then you lost my bag .
@ AmericanAir obviously we did see an agent-booked us for tomorrow morning . No hotel , no transportation . # pregnantwithtwins # stranded # angry
@ AmericanAir I was suppose to be in FL 4 hours ago . And I 'm not . I 've been waiting for hours this 