In [0]:
# fastai lib for NLP
from fastai.text import *
# Colab library to upload files to notebook
from google.colab import files

In [0]:
# Install Kaggle library
!pip install -q kaggle

## Download Data

In [25]:
# Upload kaggle API key file
uploaded = files.upload()

Saving kaggle.json to kaggle.json


In [0]:
# Move kaggle API key file to the root folder
!mv kaggle.json /root/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json

In [31]:
# Downlaod data for the videogamesales
!kaggle datasets download -d gregorut/videogamesales --unzip

Downloading videogamesales.zip to /content
  0% 0.00/381k [00:00<?, ?B/s]
100% 381k/381k [00:00<00:00, 57.3MB/s]


## Pre-processing

In [0]:
df = pd.read_csv("vgsales.csv")

In [33]:
df.head(5)

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


In [0]:
unwanted_cols = ['Rank','Platform','Publisher', 'Year' ,'NA_Sales','EU_Sales','JP_Sales','Other_Sales','Global_Sales']
df.drop(unwanted_cols, inplace=True, axis=1)

In [37]:
df.head(5)

Unnamed: 0,Name,Genre
0,Wii Sports,Sports
1,Super Mario Bros.,Platform
2,Mario Kart Wii,Racing
3,Wii Sports Resort,Sports
4,Pokemon Red/Pokemon Blue,Role-Playing


In [0]:
path = ""
target_col = 'Genre'
input_col = ['Name']
train_df = df.iloc[0:12000]
valid_df = df.iloc[12000:16000]

## Language Model

In [0]:
data_lm = TextLMDataBunch.from_df(path=path,
                                  text_cols=input_col,
                                  label_cols=target_col,
                                  train_df=train_df,
                                  valid_df=valid_df)

In [48]:
data_lm.show_batch(5)

idx,text
0,xxmaj nintendogs xxbos xxmaj mario xxmaj kart xxup ds xxbos xxmaj pokemon xxmaj gold / xxmaj pokemon xxmaj silver xxbos xxmaj wii xxmaj fit xxbos xxmaj wii xxmaj fit xxmaj plus xxbos xxmaj kinect xxmaj adventures ! xxbos xxmaj grand xxmaj theft xxmaj auto v xxbos xxmaj grand xxmaj theft xxmaj auto : xxmaj san xxmaj andreas xxbos xxmaj super xxmaj mario xxmaj world xxbos xxmaj brain xxmaj age :
1,xxbos xxmaj fallout 3 xxbos xxmaj pokemon xxmaj mystery xxmaj dungeon : xxmaj explorers of xxmaj time / xxmaj explorers of xxmaj darkness xxbos xxmaj uncharted : xxmaj drake 's xxmaj fortune xxbos xxmaj madden xxup nfl 06 xxbos xxup lego xxmaj star xxmaj wars : xxmaj the xxmaj complete xxmaj saga xxbos xxmaj diddy xxmaj kong xxmaj racing xxbos xxmaj monster xxmaj hunter xxmaj freedom 3 xxbos xxmaj dr.
2,creed xxup iv : xxmaj black xxmaj flag xxbos xxmaj english xxmaj training : xxmaj have xxmaj fun xxmaj xxunk xxmaj your xxmaj xxunk ! xxbos xxmaj james xxmaj bond 007 : xxmaj nightfire xxbos xxmaj ratchet & xxmaj clank xxbos xxmaj mario xxmaj party 9 xxbos xxmaj star xxmaj wars xxmaj episode xxup iii : xxmaj revenge of the xxmaj sith xxbos xxmaj max xxmaj payne xxbos xxmaj the
3,xxmaj invaders xxbos xxmaj crazy xxmaj taxi xxbos xxmaj perfect xxmaj dark xxbos xxmaj game xxmaj party xxbos xxmaj dragon xxmaj warrior xxbos xxmaj tomb xxmaj raider ( 2013 ) xxbos xxmaj marvel : xxmaj ultimate xxmaj alliance xxbos xxup pes 2009 : xxmaj pro xxmaj evolution xxmaj soccer xxbos xxmaj mario xxmaj party 2 xxbos xxmaj saints xxmaj row : xxmaj the xxmaj third xxbos xxmaj rockstar xxmaj games
4,airborne xxmaj commando xxbos xxmaj kingdom xxmaj hearts : xxmaj birth by xxmaj sleep xxbos xxup ufc 2009 xxmaj undisputed xxbos xxmaj dark xxmaj souls xxbos xxmaj yu - xxmaj gi - xxmaj oh ! xxmaj the xxmaj eternal xxmaj duelist xxmaj soul xxbos xxmaj need for xxmaj speed : prostreet xxbos xxmaj the xxmaj sims 3 xxbos xxmaj tom xxmaj clancy 's xxmaj splinter xxmaj cell : xxmaj conviction


In [50]:
learn = language_model_learner(data_lm, AWD_LSTM, drop_mult=0.5)
learn.fit_one_cycle(1, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,3.701308,3.005414,0.472396,00:08


In [74]:
learn.unfreeze()
learn.fit_one_cycle(1, 1e-3)

epoch,train_loss,valid_loss,accuracy,time
0,1.120106,1.636049,0.45725,00:16


In [57]:
learn.predict("Dragon ", n_words=7)

'Dragon  Ball : Dora Zero'

In [0]:
# Save the encoder
learn.save_encoder('ft_enc')

## Classifier

In [0]:
data_class = TextClasDataBunch.from_df(path=path,
                                       train_df= train_df,
                                       valid_df=valid_df,
                                       label_cols=target_col,
                                       text_cols=input_col,
                                       vocab=data_lm.train_ds.vocab)

In [0]:
learn = text_classifier_learner(data_class, AWD_LSTM, drop_mult=0.5)
learn.load_encoder('ft_enc')

In [62]:
data_class.show_batch()

text,target
xxbos xxup ds xxmaj xxunk xxmaj xxunk xxmaj suspense : xxmaj xxunk xxmaj xxunk - xxmaj xxunk xxmaj xxunk - xxmaj xxunk xxmaj xxunk xxmaj xxunk - xxmaj koto ni xxmaj maru xxmaj xxunk xxmaj xxunk : xxmaj kyoto xxmaj xxunk xxmaj xxunk xxmaj file,Adventure
xxbos xxmaj xxunk xxmaj daigaku xxmaj xxunk xxmaj xxunk xxmaj xxunk - xxmaj kawashima xxmaj ryuuta xxmaj kyouju xxmaj kanshuu - xxmaj mono xxmaj xxunk xxmaj nou o xxmaj xxunk xxunk no xxmaj oni xxmaj training,Action
xxbos xxup ds xxmaj xxunk xxmaj xxunk xxmaj suspense xxmaj shin xxmaj tantei xxmaj series : xxmaj kyoto xxmaj xxunk xxmaj xxunk no xxmaj xxunk - xxmaj xxunk no xxmaj wana,Adventure
xxbos xxmaj johnny xxmaj bravo in xxmaj the xxmaj xxunk - xxmaj mega - xxmaj mighty - xxmaj ultra - xxmaj extreme xxmaj date - o - xxmaj rama,Action
xxbos xxmaj dragon xxmaj quest 25 xxmaj xxunk xxmaj xxunk : xxmaj famicom & xxmaj super xxmaj famicom xxmaj dragon xxmaj quest i - xxup ii - xxup iii,Role-Playing


In [63]:
learn.fit_one_cycle(1, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,1.849294,1.986713,0.319,00:06


In [64]:
learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(5e-3/2., 5e-3))

epoch,train_loss,valid_loss,accuracy,time
0,1.671223,1.900727,0.35425,00:08


In [65]:
learn.unfreeze()
learn.fit_one_cycle(1, slice(2e-3/100, 2e-3))

epoch,train_loss,valid_loss,accuracy,time
0,1.42283,1.797472,0.39775,00:15


In [73]:
learn.predict("Genital Jousting")

(Category Action,
 tensor(0),
 tensor([0.1625, 0.0817, 0.0283, 0.1539, 0.0572, 0.0750, 0.0654, 0.1193, 0.0659,
         0.0779, 0.0700, 0.0428]))