**ULMFiT model to Twitter US Airlines Sentiment**

In [None]:
# importing libraries 
from fastai.text import * 

1. Data Wrangling

In [None]:
# importing from csv 
dataset = pd.read_csv("Tweets.csv")
dataset.head()

2. Data Exploration

In [None]:
# total number of tweets per each airline
tweetsCount = dataset.groupby(["airline"])["tweet_id"].count()
tweetsCount

In [None]:
# horizontal bar graph showcasing the total number of tweets per each airline
tweetsCount.plot.barh()

In [None]:
# total number of tweets per each sentiment
sentimentCount = dataset.groupby(["airline_sentiment"])["tweet_id"].count()
sentimentCount

In [None]:
# vertical bar graph showcasing the total number of tweets per each airline
sentimentCount.plot.bar()

In [None]:
# total number of tweets per each airline of each sentiment
tweetTypeCount = dataset.groupby(["airline", "airline_sentiment"])["tweet_id"].count()
tweetTypeCount

In [None]:
# color coded bar graph showing the total number of tweets per each airline of each sentiment
tweetTypeCount.unstack().plot.bar()

3. Data Cleansing

In [None]:
# subsetting tweet text and sentiment label for modeling
df = dataset[["airline_sentiment", "text"]]
df = df.rename(columns = {"airline_sentiment": "label"})
df.head()

In [None]:
# helper functions to remove twitter handle name and unwanted characters from tweet text 
 
def removeHandleID(text):
  text = re.sub('@[^\s]+', '', text)
  return text

def removeExtraSpace(text):
  text = re.sub(' {2,}', '', text)
  return text

def removeHashTags(text):
  text = re.sub(r'([#])', r' \1 ', text)
  return text

In [None]:
# cleaning the tweet texts 
df.text = df.text.apply(removeHandleID)
df.text = df.text.apply(removeExtraSpace)
df.text = df.text.apply(removeHashTags)
df.head()

In [None]:
# splitting dataset into training and validation sets 
validDF = df.sample(frac = 0.2)
trainDF = df.drop(validDF.index)

4. Data Modeling

In [None]:
# making data ready for modeling

path = ""
# Language model data
data_lm = TextLMDataBunch.from_df(train_df = trainDF, 
                                  valid_df = validDF, 
                                  path = path)
# Classifier model data
data_clas = TextClasDataBunch.from_df(path = path, 
                                      train_df = trainDF, 
                                      valid_df = validDF, 
                                      vocab = data_lm.train_ds.vocab, 
                                      bs = 32)

In [None]:
# saving the preprocessed vocabulary mappings
data_lm.save('data_lm_export.pkl')
data_clas.save('data_clas_export.pkl')

In [None]:
# loading the language data model
data_lm = load_data(path, 'data_lm_export.pkl')
data_clas = load_data(path, 'data_clas_export.pkl', bs=16)

In [None]:
# fine tuning language model 
learn = language_model_learner(data_lm, AWD_LSTM, drop_mult = 0.5)
learn.fit_one_cycle(1, 1e-2)

In [None]:
# unfreezing the model in order to fine-tune it
learn.unfreeze()
learn.fit_one_cycle(1, 1e-3)

In [None]:
# text completion prediction
tempText = learn.predict("missing baggage", n_words = 10)
tempText

In [None]:
# saving model for transfer learning
learn.save_encoder('ft_enc')

In [None]:
# loading ULMFit Model and building a classifier
learn = text_classifier_learner(data_clas, AWD_LSTM, drop_mult = 0.5)
learn.load_encoder('ft_enc')

In [None]:
# sample batch of classified data
data_clas.show_batch()

In [None]:
# learning rate tuning
learn.fit_one_cycle(1, 1e-2)

In [None]:
# unfreezing the model in order to fine-tune it
learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(5e-3/2., 5e-3))

In [None]:
# unfreezing the model in order to fine-tune it
learn.unfreeze()
learn.fit_one_cycle(1, slice(2e-3/100, 2e-3))

5. Results

In [None]:
# calculating accuracy from classification interpretation
interpret = TextClassificationInterpretation.from_learner(learn)
accuracy = accuracy(interpret.preds, interpret.y_true)
print("Accuracy: {0:.3f}%".format(accuracy*100))

In [None]:
# plotting a confusion matirx
interpret.plot_confusion_matrix()
plt.title("Confusion Matrix")

In [None]:
# sample test 
tempText
learn.predict(tempText)