Importing the required libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
import plotly as py
from plotly.graph_objs import graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
import cufflinks as cf

In [None]:
init_notebook_mode(connected = True)
cf.go_offline(connected = True)

Importing the DATA

In [None]:
tweet_data = pd.read_csv("/kaggle/input/twitter-airline-sentiment/Tweets.csv")

In [None]:
tweet_data.info()

Dropping the Columns with NULL values

In [None]:
tweet_data.drop(['airline_sentiment_gold', 'negativereason_gold', 'tweet_coord'], axis = 1, inplace = True)

In [None]:
tweet_data.info()

Finding and Plotting the count of various Airlines

In [None]:
data = go.Pie(values = tweet_data['airline'].value_counts().values, 
              labels = tweet_data['airline'].value_counts().index.values, hole = 0.3)

map_airlinecount = go.Figure(data = data)

In [None]:
map_airlinecount

Plotting the count of Sentiments { Positive, Negative, Neutral}

In [None]:
tweet_data['airline_sentiment'].iplot(kind = 'histogram')

Plotting the reasons for Negative Sentiments

In [None]:
data1 = go.Bar(x = tweet_data['negativereason'].value_counts().index.values, 
               y = tweet_data['negativereason'].value_counts().values,)

map_negativereason1 = go.Figure(data = data1)

In [None]:
map_negativereason1

In [None]:
data2 = go.Pie(values = tweet_data['negativereason'].value_counts().values,
               labels = tweet_data['negativereason'].value_counts().index.values,
               hole = 0.3)

map_negativereason2 = go.Figure(data = data2)

In [None]:
map_negativereason2

Function to find the negative sentiment reasons by AIRLINES

In [None]:
def negative_sentiment_plot(airline) :
    df = tweet_data[tweet_data['airline'] == airline]
    count = dict(df['negativereason'].value_counts())
    count1 = list(df['negativereason'].value_counts())
    reasons = list(df['negativereason'].unique())
    reasons = [x for x in reasons if str(x) != 'nan']
    df_reason = pd.DataFrame({'Reasons' : reasons})
    df_reason['count']=df_reason['Reasons'].apply(lambda x: count[x])
    return df_reason
    #plt.figure(figsize = (30,12))
    #plt.bar(reasons, count1)

In [None]:
negative_sentiment_plot('United')

Importing the Libraries for NLP

In [None]:
import nltk
import re
from nltk.corpus import stopwords
import string

Removing the Stopwords and cleaning the messages

In [None]:
def clean_tweet(raw_tweet) :
    clean_tweet = re.sub('[^a-zA-Z]', ' ', raw_tweet)
    clean_tweet = clean_tweet.lower().split()
    clean_tweet = [x for x in clean_tweet if x not in stopwords.words('english')]
    clean_tweet = ' '.join(clean_tweet)
    return clean_tweet

Applying the Function to DataSet

In [None]:
tweet_data['clean_tweet'] = tweet_data['text'].apply(lambda x : clean_tweet(x))

Creating a New Column and marking Negative Sentiments as 0 and Positive Sentiments as 1

In [None]:
tweet_data['rating'] = tweet_data['airline_sentiment'].apply(lambda x : 0 if x == 'negative' else 1)

In [None]:
tweet_ML_data = tweet_data[['clean_tweet', 'rating']]

In [None]:
tweet_ML_data.info()

Importing the Multinomial Naive Bayes for Classification of Tweets 

In [None]:
from sklearn.naive_bayes import MultinomialNB

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

In [None]:
x = tweet_ML_data['clean_tweet']
y = tweet_ML_data['rating']

In [None]:
countvectorizer = CountVectorizer()

In [None]:
countvectorizermatrix = countvectorizer.fit_transform(x)

In [None]:
tfidf = TfidfTransformer()

In [None]:
tfidfmatrix = tfidf.fit_transform(countvectorizermatrix)

Dividing the Dataset into Training and Testing Data

In [None]:
x_train, x_test, y_train, y_test = train_test_split(tfidfmatrix, y, test_size = 0.3, random_state = 101)

In [None]:
model = MultinomialNB()

In [None]:
model.fit(x_train, y_train)

In [None]:
predictions = model.predict(x_test)

Measuring the Accuracy of the Model

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
print(classification_report(y_test, predictions))
print(confusion_matrix(y_test, predictions))

Importing the Random Forrest Classifier for Classification

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
model1 = RandomForestClassifier(n_estimators = 200)

In [None]:
model1.fit(x_train, y_train)

In [None]:
predictions1 = model1.predict(x_test)

In [None]:
print(classification_report(y_test, predictions1))
print(confusion_matrix(y_test, predictions1))