In [None]:
#word level one hot encoding example(custom code)
import numpy as np
samples = ['The cat sat on the mat.', 'The dog ate my homework.'] 

token_index={}
for sample in samples:
    for word in sample.split():
        if word not in token_index:
            token_index[word]=len(token_index)+1

            
max_length=10

results=np.zeros(shape=(len(samples),max_length,max(token_index.values())+1))

print(results.shape)


for i,sample in enumerate(samples):
    for j,word in list(enumerate(sample.split()))[:max_length]:
        index=token_index.get(word)
        results[i,j,index]=1.
print(samples)
print(token_index)
print(results)

In [None]:
#using keras for doing word level one hot encoding
from keras.preprocessing.text import Tokenizer
samples = ['The cat sat on the mat.', 'The dog ate my homework.']
tokenizer=Tokenizer(num_words=1000)
tokenizer.fit_on_texts(samples)

sequences=tokenizer.texts_to_sequences(samples)

one_hot_results=tokenizer.texts_to_matrix(samples,mode='binary')

word_index=tokenizer.word_index

word_index

In [None]:
#word embedding for imdb dataset
from tensorflow.keras.datasets import imdb
from keras import preprocessing

max_features=10000
maxlen=20

(x_train,y_train),(x_test,y_test)=imdb.load_data(num_words=max_features)

x_train=preprocessing.sequence.pad_sequences(x_train,maxlen=maxlen)
x_test=preprocessing.sequence.pad_sequences(x_test,maxlen=maxlen)

print(x_train.shape) #samples,maxlen

#using the embedding layer & classifier on the IMDB dataset
from keras.models import Sequential
from keras.layers import Flatten,Dense,Embedding


model=Sequential()
model.add(Embedding(10000,8,input_length=maxlen))#no_of_possible_tokens,size of embedding
model.add(Flatten())
model.add(Dense(1,activation='sigmoid'))
model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=['acc'])
model.summary()

history=model.fit(x_train,y_train,epochs=10,batch_size=32,validation_split=0.2)

In [None]:
#using imdb dataset with pretrained embedding 
import os
imdb_dir='/kaggle/input/imdb-raw/aclimdb/aclImdb/'

train_dir = os.path.join(imdb_dir, 'test')

labels = []
texts = []

for label_type in ['neg', 'pos']:
    dir_name = os.path.join(train_dir, label_type)
    for fname in os.listdir(dir_name):
        if fname[-4:] == '.txt':
            f = open(os.path.join(dir_name, fname))
            texts.append(f.read())
            f.close()
            if label_type == 'neg':
                labels.append(0)
            else:
                labels.append(1)

In [None]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import numpy as np

maxlen=100
training_samples=200
validation_samples=10000
max_words=10000

tokenizer=Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(texts)
sequences=tokenizer.texts_to_sequences(texts)

word_index=tokenizer.word_index
print('found %s unique tokens.'%len(word_index))

data=pad_sequences(sequences,maxlen=maxlen)

labels=np.asarray(labels)

print(data.shape)
print(labels.shape)

indices=np.arange(data.shape[0])
np.random.shuffle(indices)
data=data[indices]
labels=labels[indices]

x_train=data[:training_samples]
y_train=labels[:training_samples]
x_val=data[training_samples:training_samples+validation_samples]
y_val=labels[training_samples:training_samples+validation_samples]






In [None]:
glove_dir='/kaggle/input/glove6b'
embeddings_index={}
f=open(os.path.join(glove_dir,'glove.6B.100d.txt'))
for line in f:
    values=line.split()
    word=values[0]
    coefs=np.asarray(values[1:],dtype='float32')
    embeddings_index[word]=coefs
f.close()

print(len(embeddings_index))
    

In [None]:
embedding_dim = 100

embedding_matrix = np.zeros((max_words, embedding_dim))
for word, i in word_index.items():
    if i < max_words:
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector

In [None]:
from keras.models import Sequential
from keras.layers import Embedding,Flatten,Dense
model=Sequential()
model.add(Embedding(max_words,embedding_dim,input_length=maxlen))
model.add(Flatten())
model.add(Dense(32,activation='relu'))
model.add(Dense(1,activation='sigmoid'))
model.summary()

model.layers[0].set_weights([embedding_matrix])
model.layers[0].trainable=False

model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=['acc'])
#history=model.fit(x_train,y_train,epochs=10,batch_size=32,validation_data=(x_val,y_val))
history = model.fit(x_train, y_train,
                    epochs=10,
                    batch_size=32,
                    validation_data=(x_val, y_val))
model.save_weights('pretrained_glovemodel.h5')

In [None]:
def plotCurves(history):
    import matplotlib.pyplot as plt
    acc = history.history['acc']
    val_acc = history.history['val_acc']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs = range(1, len(acc) + 1)

    plt.plot(epochs, acc, 'bo', label='Training acc')
    plt.plot(epochs, val_acc, 'b', label='Validation acc')
    plt.title('Training and validation accuracy')
    plt.legend()

    plt.figure()

    plt.plot(epochs, loss, 'bo', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend()

    plt.show()
plotCurves(history)

In [None]:
#numpy implementation of RNN
import numpy as np
from keras.models import Sequential
from keras.layers import Embedding,SimpleRNN
timesteps=100
input_features=32
output_features=64


inputs=np.random.random((timesteps,input_features))
state_t=np.zeros((output_features,))


W=np.random.random((output_features,input_features))
U=np.random.random((output_features,output_features))
b=np.random.random((output_features,))

successive_outputs=[]
for input_t in inputs:
    output_t=np.tanh(np.dot(W,input_t)+np.dot(U,state_t)+b)
    
    successive_outputs.append(output_t)
    
    state_t=output_t
    
final_output_sequence=np.concatenate(successive_outputs,axis=0)


#inbuilt RNN in keras

model=Sequential()
model.add(Embedding(10000,32))
model.add(SimpleRNN(32,return_sequences=True))
model.add(SimpleRNN(32,return_sequences=True))
model.add(SimpleRNN(32,return_sequences=True))
model.add(SimpleRNN(32))
model.summary()

In [None]:
#imdb datasets with RNN
from tensorflow.keras.datasets import imdb
from keras.preprocessing import sequence

max_features=10000
maxlen=500
batch_size=32

(input_train,y_train),(input_test,y_test)=imdb.load_data(num_words=max_features)

print(len(input_train),'train sequences')
print(len(input_test),'test sequences')

print('pad sequences(samples*time)')
input_train1=sequence.pad_sequences(input_train,maxlen=maxlen)
input_test1=sequence.pad_sequences(input_test,maxlen=maxlen)
print(input_train1.shape,'train sequences')
print(input_test1.shape,'test sequences')


#modelling
from keras.models import Sequential
from keras.layers import Flatten,Dense,Embedding,SimpleRNN
model=Sequential()
model.add(Embedding(max_features,32))
model.add(SimpleRNN(32))
model.add(Dense(1,activation='sigmoid'))
model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=['acc'])
history=model.fit(input_train1,y_train,epochs=10,batch_size=128,validation_split=0.2)

plotCurves(history)


In [None]:
#using the LSTM layers in keras
from keras.layers import Flatten,Dense,Embedding,SimpleRNN,LSTM
model=Sequential()
model.add(Embedding(max_features,32))
model.add(LSTM(32))
model.add(Dense(1,activation='sigmoid'))
model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=['acc'])
history=model.fit(input_train1,y_train,epochs=10,batch_size=128,validation_split=0.2)
plotCurves(history)

In [None]:
#using RNN for time series data
!wget https://s3.amazonaws.com/keras-datasets/jena_climate_2009_2016.csv.zip
!unzip jena_climate_2009_2016.csv.zip
import os
os.listdir('/kaggle/working')
data_dir='/kaggle/working/'
fname=os.path.join(data_dir,'jena_climate_2009_2016.csv')
f=open(fname)
data=f.read()
f.close()

In [None]:
lines=data.split('\n')
header=lines[0].split(',')
print(header)
lines=lines[1:]
print(lines[1:2])
print(len(lines))

In [None]:
import numpy as np
float_data=np.zeros((len(lines),len(header)-1))
for i,line in enumerate(lines):
    values=[float(x) for x in line.split(',')[1:]]
    float_data[i,:]=values

In [None]:
#plotting temp time series
from matplotlib import pyplot as plt
temp=float_data[:,1]
plt.plot(range(len(temp)),temp)


In [None]:
#144 points per day (6 per hour) for 10 days
plt.plot(range(1440),temp[:1440])

In [None]:
#given data going as far back as lookback timesteps (a timestep is 10 minutes) and sampled every steps timesteps,
#can you predict the temperature in delay timesteps?
#lookback = 1440—Observations will go back 10 days.
#steps = 6—Observations will be sampled at one data point per hour.
#delay = 144—Targets will be 24 hours in the future.
#normalizing the data first
mean=float_data[:200000].mean(axis=0)
std=float_data[:200000].std(axis=0)
float_data-=mean
float_data/=std

In [None]:
def generator(data,lookback,delay,min_index,max_index,shuffle=False,batch_size=128,step=6):
    if max_index is None:
        max_index=len(data)-delay-1
    i=min_index+lookback
    while 1:
        if shuffle:
            rows=np.random.randint(min_index+lookback,max_index,size=batch_size)
            print(rows.shape)
        else:
            if i+batch_size>=max_index:
                i=min_index+lookback
            rows=np.arange(i,min(i+batch_size,max_index))
            i+=len(rows)
            
        samples=np.zeros((len(rows),lookback//step,data.shape[-1]))
        targets=np.zeros((len(rows),))
        
        for j, row in enumerate(rows):
            indices = range(rows[j] - lookback, rows[j], step)
            samples[j] = data[indices]
            targets[j] = data[rows[j] + delay][1]
        yield samples,targets

In [None]:
lookback=1440
step=6
delay=144
batch_size=128
train_gen=generator(float_data,lookback=lookback,delay=delay,min_index=0,max_index=200000,shuffle=True,step=step,batch_size=batch_size)
val_gen = generator(float_data,
                    lookback=lookback,
                    delay=delay,
                    min_index=200001,
                    max_index=300000,
                    step=step,
                    batch_size=batch_size)
test_gen = generator(float_data,
                     lookback=lookback,
                     delay=delay,
                     min_index=300001,
                     max_index=None,
                     step=step,
                     batch_size=batch_size)
val_steps=(300000-200001-lookback)//batch_size
tes_steps = (len(float_data) - 300001 - lookback) // batch_size


In [None]:
#naive baseline
def evaluate_naive_method():
    batch_maes=[]
    for step in range(val_steps):
        samples,targets=next(val_gen)
        preds=samples[:,-1,1]
        mae=np.mean(np.abs(preds-targets))
        batch_maes.append(mae)
    print(np.mean(batch_maes))
evaluate_naive_method()
celsius_mae=0.29*std[1]
celsius_mae

In [None]:
#first training with a cheap model
#input is of the form lookback//step,data.shape[-1]
from keras.models import Sequential
from keras import layers
from keras.optimizers import RMSprop

model=Sequential()
model.add(layers.Flatten(input_shape=(lookback//step,float_data.shape[-1])))
model.add(layers.Dense(32,activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer=RMSprop(),loss='mae')
history=model.fit_generator(train_gen,steps_per_epoch=500,epochs=20,validation_data=val_gen,validation_steps=val_steps)

In [None]:
def plotloss(history):
    import matplotlib.pyplot as plt

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs = range(1, len(loss) + 1)

    plt.figure()

    plt.plot(epochs, loss, 'bo', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend()

    plt.show()
plotloss(history)

In [None]:
#using GRU
model=Sequential()
model.add(layers.GRU(32,input_shape=(None,float_data.shape[-1])))
model.add(layers.Dense(1))
model.compile(optimizer=RMSprop(),loss='mae')
history = model.fit_generator(train_gen,
                              steps_per_epoch=500,
                              epochs=20,
                              validation_data=val_gen,
                              validation_steps=val_steps)
plotloss(history)

In [None]:
#training and evaluating dropout stacked GRU model
model=Sequential()
model.add(layers.GRU(32,dropout=0.1,recurrent_dropout=0.5,return_sequences=True,input_shape=(None,float_data.shape[-1])))
model.add(layers.GRU(64,activation='relu',dropout=0.1,recurrent_dropout=0.5))
model.add(layers.Dense(1))

model.compile(optimizer=RMSprop(), loss='mae')
history = model.fit_generator(train_gen,
                              steps_per_epoch=500,
                              epochs=10,
                              validation_data=val_gen,
                              validation_steps=val_steps)
plotloss(history)

In [None]:
#bidirectional LSTM
model=Sequential()
model.add(layers.Embedding(100,32))
model.add(layers.Bidirectional(layers.LSTM(32)))
model.add(layers.Dense(1,activation='sigmoid'))
model.summary()