In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense, Input, GlobalMaxPooling1D, Conv1D, MaxPooling1D
from tensorflow.keras.layers import LSTM, Embedding, GRU
import tensorflow as tf


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
def plot(history, info_type='loss'):

    """
    history: the history callback from a model.fit
    info_type: what you want to show. (e.g. 'loss', 'acc', 'accuracy')
    """
    plt.plot(history.history[info_type], label=[info_type])
    try:
        plt.plot(history.history['val_' + info_type], label=['val_' + info_type])
    except Exception:
        print(f'no val_{info_type}')
    plt.title(info_type)
    plt.legend()

In [None]:
df = pd.read_csv('../input/iitjee-neet-aims-students-questions-data/subjects-questions.csv') 

In [None]:
df.head()

In [None]:
df['ca_labels'] = df['Subject'].map({'Biology':0, 'Chemistry':1, 'Maths':2, 'Physics':3})

In [None]:
y = df['ca_labels'].values

In [None]:
y.shape

In [None]:
x = df['eng'].values

In [None]:
for i in range(len(x)):
    x[i] = x[i].replace('\n', ' ')

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y,test_size=0.33, shuffle=True)

In [None]:
vocab_size = 1000
oov_token = '<oov>'
trunc='post'
maxlen = 50
embedded_dim = 8

In [None]:
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token)

In [None]:
tokenizer.fit_on_texts(x_train)
word_index = tokenizer.word_index

In [None]:
word_index

In [None]:
sequences = tokenizer.texts_to_sequences(x_train)

In [None]:
len(word_index)

In [None]:
padded = pad_sequences(sequences, maxlen=maxlen, truncating=trunc)

In [None]:
padded.shape

In [None]:
testing_sequence = tokenizer.texts_to_sequences(x_test)
test_padded = pad_sequences(testing_sequence, maxlen=maxlen)

In [None]:
test_padded.shape

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size,embedded_dim,input_length=maxlen),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(30, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')
])

In [None]:
model.compile(loss='sparse_categorical_crossentropy',metrics=['acc'], optimizer='adam')

In [None]:
hist = model.fit(padded,y_train,batch_size=64, epochs=10, validation_data=(test_padded,y_test), validation_batch_size=64)

In [None]:
plot(hist)

In [None]:
plot(hist, 'acc')

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size,embedded_dim,input_length=maxlen),
    tf.keras.layers.LSTM(15, return_sequences=True),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(30, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')
])

In [None]:
model.compile(loss='sparse_categorical_crossentropy',metrics=['acc'], optimizer='adam')

In [None]:
hist = model.fit(padded,y_train,batch_size=64, epochs=10, validation_data=(test_padded,y_test), validation_batch_size=64)

In [None]:
plot(hist)

In [None]:
plot(hist, 'acc')

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size,embedded_dim,input_length=maxlen),
    tf.keras.layers.Bidirectional(LSTM(15, return_sequences=True)),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(30, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')
])

In [None]:
model.compile(loss='sparse_categorical_crossentropy',metrics=['acc'], optimizer='adam')

In [None]:
hist = model.fit(padded,y_train,batch_size=64, epochs=10, validation_data=(test_padded,y_test), validation_batch_size=64)

In [None]:
plot(hist)

In [None]:
plot(hist, 'acc')

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size,embedded_dim,input_length=maxlen),
    tf.keras.layers.GRU(15, return_sequences=True),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(30, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')
])

In [None]:
model.compile(loss='sparse_categorical_crossentropy',metrics=['acc'], optimizer='adam')

In [None]:
hist = model.fit(padded,y_train,batch_size=64, epochs=10, validation_data=(test_padded,y_test), validation_batch_size=64)

In [None]:
plot(hist)

In [None]:
plot(hist, 'acc')

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size,embedded_dim,input_length=maxlen),
    tf.keras.layers.Bidirectional(GRU(15, return_sequences=True)),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(30, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')
])

In [None]:
model.compile(loss='sparse_categorical_crossentropy',metrics=['acc'], optimizer='adam')

In [None]:
hist = model.fit(padded,y_train,batch_size=64, epochs=10, validation_data=(test_padded,y_test), validation_batch_size=64)

In [None]:
plot(hist)

In [None]:
plot(hist, 'acc')

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size,embedded_dim,input_length=maxlen),
    tf.keras.layers.Conv1D(64, 3, activation='relu'),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(30, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')
])

In [None]:
model.compile(loss='sparse_categorical_crossentropy',metrics=['acc'], optimizer='adam')

In [None]:
hist = model.fit(padded,y_train,batch_size=64, epochs=10, validation_data=(test_padded,y_test), validation_batch_size=64)

In [None]:
plot(hist)

In [None]:
plot(hist, 'acc')

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size,embedded_dim,input_length=maxlen),
    tf.keras.layers.Conv1D(64, 3, activation='relu'),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(30, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')
])

In [None]:
model.compile(loss='sparse_categorical_crossentropy',metrics=['acc'], optimizer='adam')

In [None]:
hist = model.fit(padded,y_train,batch_size=64, epochs=10, validation_data=(test_padded,y_test), validation_batch_size=64)

In [None]:
plot(hist)

In [None]:
plot(hist, 'acc')

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size,embedded_dim,input_length=maxlen),
    tf.keras.layers.Conv1D(64, 3, activation='relu'),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(50, activation='relu', activity_regularizer=tf.keras.regularizers.L2(0.01)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(50, activation='relu', activity_regularizer=tf.keras.regularizers.L2(0.01)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(4, activation='softmax')
])

In [None]:
model.compile(loss='sparse_categorical_crossentropy',metrics=['acc'], optimizer='adam')

In [None]:
hist = model.fit(padded,y_train,batch_size=64, epochs=50, validation_data=(test_padded,y_test), validation_batch_size=64)

In [None]:
plot(hist)

In [None]:
plot(hist, 'acc')