# RNN Architecture - Vanilla RNN

## Setup - Libraries, Packages, Embeddings, Paths

### Libraries 

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import os 
import urllib.request
import zipfile 
from tqdm import tqdm 

### Glove Embeddings

In [None]:
embeddings_path = "./Embeddings"
def download_progress(block_num, block_size, total_size):
    if not hasattr(download_progress, "pbar"):
        download_progress.pbar = tqdm(total=total_size, unit="B", unit_scale=True)
    download_progress.pbar.update(block_size)

if not os.path.exists(embeddings_path):
    print(f"create directory to store pre-trained glove embeddings")
    os.makedirs(embeddings_path)
    print(f"download pre-trained Glove Embeddings")
    urllib.request.urlretrieve(
        "http://nlp.stanford.edu/data/glove.6B.zip",
        "./Embeddings/glove.6B.zip",
        download_progress,
    )
    print("unpack embeddings")
    with zipfile.ZipFile("./Embeddings/glove.6B.zip", "r") as zip_ref:
        zip_ref.extractall("./Embeddings/")
    os.remove("./Embeddings/glove.6B.zip")
    
    print("embeddings download complete")

create directory to store pre-trained glove embeddings
download pre-trained Glove Embeddings


100%|█████████▉| 862M/862M [06:57<00:00, 2.54MB/s]   

unpack embeddings
embeddings download complete


862MB [07:10, 2.54MB/s]                           

### Paths 

In [6]:
glove_6b_50_path = "./Embeddings/glove.6B.50d.txt"
train_data_path = "./Datasets/model_data/train_data.csv"
test_data_path = "./Datasets/model_data/test_data.csv"

## Data

### Train Data

In [20]:
train_df = pd.read_csv(train_data_path)
train_df.head(10)

Unnamed: 0,text,decade,decade_label,book_title,book_id,paragraph_id,word_count
0,Produced by Gary R. Young THE SCHOOL FOR SCAND...,1770,0,The School for Scandal,1770_The_School_for_Scand,1770_1770_The_School_for_Scand_000,210
1,"the works of Sheridan as he wrote them, I may ...",1770,0,The School for Scandal,1770_The_School_for_Scand,1770_1770_The_School_for_Scand_001,210
2,he had been nineteen years endeavouring to sat...,1770,0,The School for Scandal,1770_The_School_for_Scand,1770_1770_The_School_for_Scand_002,210
3,"That even you assist her fame to raise, Approv...",1770,0,The School for Scandal,1770_The_School_for_Scand,1770_1770_The_School_for_Scand_003,210
4,and face-- Poets would study the immortal line...,1770,0,The School for Scandal,1770_The_School_for_Scand,1770_1770_The_School_for_Scand_004,210
5,who the peril of her lips shall paint? Strip t...,1770,0,The School for Scandal,1770_The_School_for_Scand,1770_1770_The_School_for_Scand_005,210
6,"might well be thought Prerogative in her, and ...",1770,0,The School for Scandal,1770_The_School_for_Scand,1770_1770_The_School_for_Scand_006,210
7,th' acknowledged praise Has spread conviction ...,1770,0,The School for Scandal,1770_The_School_for_Scand,1770_1770_The_School_for_Scand_007,210
8,LAST NIGHT LORD L. [Sips] WAS CAUGHT WITH LADY...,1770,0,The School for Scandal,1770_The_School_for_Scand,1770_1770_The_School_for_Scand_008,210
9,he would through-- He'll fight--that's write--...,1770,0,The School for Scandal,1770_The_School_for_Scand,1770_1770_The_School_for_Scand_009,210


In [21]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84860 entries, 0 to 84859
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   text          84860 non-null  object
 1   decade        84860 non-null  int64 
 2   decade_label  84860 non-null  int64 
 3   book_title    84860 non-null  object
 4   book_id       84860 non-null  object
 5   paragraph_id  84860 non-null  object
 6   word_count    84860 non-null  int64 
dtypes: int64(3), object(4)
memory usage: 4.5+ MB


### Test Data

In [22]:
test_df = pd.read_csv(test_data_path)
test_df.head(10)

Unnamed: 0,text,decade,decade_label,book_title,book_id,paragraph_id,word_count
0,An Inquiry into the Nature and Causes of the W...,1770,0,An Inquiry into the Nature and Causes of the W...,1770_An_Inquiry_into_the_,1770_1770_An_Inquiry_into_the__000,210
1,THE EXPENSE OF MAINTAINING THE NATIONAL CAPITA...,1770,0,An Inquiry into the Nature and Causes of the W...,1770_An_Inquiry_into_the_,1770_1770_An_Inquiry_into_the__001,210
2,"PRODUCE OF LAND, AS EITHER THE SOLE OR THE PRI...",1770,0,An Inquiry into the Nature and Causes of the W...,1770_An_Inquiry_into_the_,1770_1770_An_Inquiry_into_the__002,210
3,"Whatever be the soil, climate, or extent of te...",1770,0,An Inquiry into the Nature and Causes of the W...,1770_An_Inquiry_into_the_,1770_1770_An_Inquiry_into_the__003,210
4,of those who work; yet the produce of the whol...,1770,0,An Inquiry into the Nature and Causes of the W...,1770_An_Inquiry_into_the_,1770_1770_An_Inquiry_into_the__004,210
5,"of capital stock, of the manner in which it is...",1770,0,An Inquiry into the Nature and Causes of the W...,1770_An_Inquiry_into_the_,1770_1770_An_Inquiry_into_the__005,210
6,which some magnify the importance of that indu...,1770,0,An Inquiry into the Nature and Causes of the W...,1770_An_Inquiry_into_the_,1770_1770_An_Inquiry_into_the__006,210
7,"the expenses incumbent on the whole society, a...",1770,0,An Inquiry into the Nature and Causes of the W...,1770_An_Inquiry_into_the_,1770_1770_An_Inquiry_into_the__007,210
8,trifling manufactures which are destined to su...,1770,0,An Inquiry into the Nature and Causes of the W...,1770_An_Inquiry_into_the_,1770_1770_An_Inquiry_into_the__008,210
9,machinery employed in it (to the invention of ...,1770,0,An Inquiry into the Nature and Causes of the W...,1770_An_Inquiry_into_the_,1770_1770_An_Inquiry_into_the__009,210


In [23]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25538 entries, 0 to 25537
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   text          25538 non-null  object
 1   decade        25538 non-null  int64 
 2   decade_label  25538 non-null  int64 
 3   book_title    25538 non-null  object
 4   book_id       25538 non-null  object
 5   paragraph_id  25538 non-null  object
 6   word_count    25538 non-null  int64 
dtypes: int64(3), object(4)
memory usage: 1.4+ MB


## Tokenization

### Train Data - First paragraph

In [24]:
train_data_first = train_df.iloc[0, 0]
train_data_first

'Produced by Gary R. Young THE SCHOOL FOR SCANDAL A COMEDY A PORTRAIT<1> BY R. B. SHERIDAN, ESQ. Transcriber\'s Comments on the preparation of this E-Text: SQUARE BRACKETS: The square brackets, i.e. [ ] are copied from the printed book, without change, except that a closing bracket "]" has been added to the stage directions. FOOTNOTES: For this E-Text version of the book, the footnotes have been consolidated at the end of the play. Numbering of the footnotes has been changed, and each footnote is given a unique identity in the form <X>. CHANGES TO THE TEXT: Character names have been expanded. For Example, SIR BENJAMIN was SIR BEN. THE TEXT OF THE SCHOOL FOR SCANDAL The text of THE SCHOOL FOR SCANDAL in this edition is taken, by Mr. Fraser Rae\'s generous permission, from his SHERIDAN\'S PLAYS NOW PRINTED AS HE WROTE THEM. In his Prefatory Notes (xxxvii), Mr. Rae writes: "The manuscript of it [THE SCHOOL FOR SCANDAL] in Sheridan\'s own handwriting is preserved at Frampton Court and is n

### Test Data - First Paragraph

In [25]:
test_data_first = test_df.iloc[0, 0]
test_data_first

'An Inquiry into the Nature and Causes of the Wealth of Nations by Adam Smith Contents INTRODUCTION AND PLAN OF THE WORK. BOOK I. OF THE CAUSES OF IMPROVEMENT IN THE PRODUCTIVE POWERS OF LABOUR, AND OF THE ORDER ACCORDING TO WHICH ITS PRODUCE IS NATURALLY DISTRIBUTED AMONG THE DIFFERENT RANKS OF THE PEOPLE. CHAPTER I. OF THE DIVISION OF LABOUR. CHAPTER II. OF THE PRINCIPLE WHICH GIVES OCCASION TO THE DIVISION OF LABOUR. CHAPTER III. THAT THE DIVISION OF LABOUR IS LIMITED BY THE EXTENT OF THE MARKET. CHAPTER IV. OF THE ORIGIN AND USE OF MONEY. CHAPTER V. OF THE REAL AND NOMINAL PRICE OF COMMODITIES, OR OF THEIR PRICE IN LABOUR, AND THEIR PRICE IN MONEY. CHAPTER VI. OF THE COMPONENT PART OF THE PRICE OF COMMODITIES. CHAPTER VII. OF THE NATURAL AND MARKET PRICE OF COMMODITIES. CHAPTER VIII. OF THE WAGES OF LABOUR. CHAPTER IX. OF THE PROFITS OF STOCK. CHAPTER X. OF WAGES AND PROFIT IN THE DIFFERENT EMPLOYMENTS OF LABOUR AND STOCK. CHAPTER XI. OF THE RENT OF LAND. BOOK II. OF THE NATURE, AC

##