# Using FastAI ULMFiT to complete your assignment


## 0. Setup

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
# Installing and importing the necessary libraries 
!pip install fastai --quiet
!pip install kaggle --quiet

from fastai.text.all import *

import warnings
warnings.filterwarnings('ignore')

In [None]:
pd.set_option('display.max_colwidth', -1)

from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


## 1. Import the data

In [None]:
# Loaded dataset and lables after creating the folder structure as shown below in Google
text_path = 'gdrive/My Drive/Developer/Datasets/stackoverflow-dataset/title_StackOverflow.txt'
label_path = 'gdrive/My Drive/Developer/Datasets/stackoverflow-dataset/label_StackOverflow.txt'

df_text = pd.read_csv(text_path, sep='\t', names=['text'], header=None)
df_label = pd.read_csv(label_path, sep='\t', names=['label'], header=None)

df = pd.concat([df_label, df_text], axis=1, sort=False)
print('Length of dataset: '+str(len(df.index)))
df.head()

Length of dataset: 20000


Unnamed: 0,label,text
0,18,How do I fill a DataSet or a DataTable from a LINQ query resultset ?
1,18,How do you page a collection with LINQ?
2,3,Best Subversion clients for Windows Vista (64bit)
3,3,"Best Practice: Collaborative Environment, Bin Directory, SVN"
4,7,Visual Studio Setup Project - Per User Registry Settings


In [None]:
mapping = {
  1: 'wordpress',
  2: 'oracle',
  3: 'svn',
  4: 'apache',
  5: 'excel',
  6: 'matlab',
  7: 'visual-studio',
  8: 'cocoa',
  9: 'osx',
  10: 'bash',
  11: 'spring',
  12: 'hibernate',
  13: 'scala',
  14: 'sharepoint',
  15: 'ajax',
  16: 'qt',
  17: 'drupal',
  18: 'linq',
  19: 'haskell',
  20: 'magento'
}

df['label'] = df['label'].map(mapping)

df.head()

Unnamed: 0,label,text
0,linq,How do I fill a DataSet or a DataTable from a LINQ query resultset ?
1,linq,How do you page a collection with LINQ?
2,svn,Best Subversion clients for Windows Vista (64bit)
3,svn,"Best Practice: Collaborative Environment, Bin Directory, SVN"
4,visual-studio,Visual Studio Setup Project - Per User Registry Settings


## 2. Create train & validation datasets and FastAI data bunch

In [None]:
from sklearn.model_selection import train_test_split

df_trn, df_val = train_test_split(df, stratify = df['label'], test_size = 0.3)

df_trn.shape, df_val.shape

((14000, 2), (6000, 2))

In [None]:
df_trn.head()

Unnamed: 0,label,text
8369,3,Should I add a new SVN repository or a new folder ?
4096,14,Random errors while downloading files from sharepoint
1315,8,What's the best way to validate a user-entered URL in a Cocoa application?
18099,13,Difference between method and function in Scala
7423,10,How to program using cat


In [None]:
# Concatenate the label and the text data 
df = pd.concat([df_label, df_text], axis=1, sort=False)
print('Length of dataset: '+str(len(df.index)))
df.head()

Length of dataset: 20000


Unnamed: 0,label,text
0,18,How do I fill a DataSet or a DataTable from a LINQ query resultset ?
1,18,How do you page a collection with LINQ?
2,3,Best Subversion clients for Windows Vista (64bit)
3,3,"Best Practice: Collaborative Environment, Bin Directory, SVN"
4,7,Visual Studio Setup Project - Per User Registry Settings


In [None]:
# Extract the text data only
df_lm = pd.concat([df_trn, df_val], axis=0)[['text']]
df_lm.head()

Unnamed: 0,text
8369,Should I add a new SVN repository or a new folder ?
4096,Random errors while downloading files from sharepoint
1315,What's the best way to validate a user-entered URL in a Cocoa application?
18099,Difference between method and function in Scala
7423,How to program using cat


In [None]:
# Creating a dataloader for self-supervised learning task
dls_lm = DataBlock(
    blocks=TextBlock.from_df('text', is_lm=True),
    get_x=ColReader('text'),
    splitter=RandomSplitter(0.1))

In [None]:
dls_lm = dls_lm.dataloaders(df_lm, bs=64, seq_len=72)

In [None]:
dls_lm.show_batch(max_n = 3)

Unnamed: 0,text,text_
0,"xxbos xxmaj xxunk web services connections and xxunk certificates xxbos sharepoint xxunk column names not match field names xxbos xxmaj how to configure term on macosx with color xxbos nspopupbutton , xxmaj bindings and a xxunk xxunk xxbos xxmaj is asp.net xxmaj ajax only used at presentation layer , or also at xxmaj business xxmaj logic layer ? xxbos xxmaj insert a xxmaj newline in xxmaj excel xxmaj formula ( macos )","xxmaj xxunk web services connections and xxunk certificates xxbos sharepoint xxunk column names not match field names xxbos xxmaj how to configure term on macosx with color xxbos nspopupbutton , xxmaj bindings and a xxunk xxunk xxbos xxmaj is asp.net xxmaj ajax only used at presentation layer , or also at xxmaj business xxmaj logic layer ? xxbos xxmaj insert a xxmaj newline in xxmaj excel xxmaj formula ( macos ) xxbos"
1,"get ? xxbos xxmaj how can i configure xxmaj spring to save as much memory as possible ? xxbos xxmaj xxunk between “ mock ” xxmaj database and “ real ” database xxunk . xxbos xxup vs2008 files and "" start debugging "" xxbos xxmaj trying to upgrade xxmaj python to 3.0 on xxmaj mac xxup os 10.5.8 xxbos stop apache injecting it 's own 404 not found page to the custom","? xxbos xxmaj how can i configure xxmaj spring to save as much memory as possible ? xxbos xxmaj xxunk between “ mock ” xxmaj database and “ real ” database xxunk . xxbos xxup vs2008 files and "" start debugging "" xxbos xxmaj trying to upgrade xxmaj python to 3.0 on xxmaj mac xxup os 10.5.8 xxbos stop apache injecting it 's own 404 not found page to the custom 404"
2,possible to test the return value of xxmaj haskell i / o functions ? xxbos xxmaj sharepoint web part xxunk working because of xxunk file xxbos nsimage to xxmaj base64 xxbos xxmaj how do i roll out a xxmaj matlab app for others to use ? xxbos xxmaj how could i get my svn - only host to pull from a git repository ? xxbos xxmaj how to cache a collection in,to test the return value of xxmaj haskell i / o functions ? xxbos xxmaj sharepoint web part xxunk working because of xxunk file xxbos nsimage to xxmaj base64 xxbos xxmaj how do i roll out a xxmaj matlab app for others to use ? xxbos xxmaj how could i get my svn - only host to pull from a git repository ? xxbos xxmaj how to cache a collection in xxmaj


## 3. Create and Train the Language Model

In [None]:
# Saving the encoder


## 4. Using the Language Model to Train the Classifier

In [None]:
# Saving the encoder


In [None]:
# Defining the learner


In [None]:
# Applying gradual unfreezing of one layer after another


## 5. Analyzing our results

## 6. Predictions

## 7. Export the model