In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Embedding, LSTM, Flatten, Dropout
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
data=pd.read_csv('mcq.csv')

In [3]:
data.head()

Unnamed: 0,id,prompt,A,B,C,D,E,answer
0,0,Accounting is sometimes called the “language o...,Wall Street,business,Main Street,financial statements,financial statements,B
1,1,What is a characteristic of Financial accounti...,should be incomplete in order to confuse compe...,should be prepared differently by each company,provides investors guarantees about the future,summarizes what has already occurred,should be incomplete in order to confuse compe...,D
2,2,Which of the following is not included in exte...,lenders such as bankers,governmental agencies such as the IRS,employees of a business,potential investors,governmental agencies such as the IRS,C
3,3,Which of the following groups would have acces...,bankers,investors,competitors of the business,managers,bankers,D
4,4,All of the following are examples of manageria...,preparing external financial statements in com...,deciding whether or not to use automation,making equipment repair or replacement decisions,measuring costs of production for each product...,deciding whether or not to use automation,A


In [4]:
data.tail()

Unnamed: 0,id,prompt,A,B,C,D,E,answer
3364,3364,During which is prothrombin converted to throm...,intrinsic pathway,extrinsic pathway,common pathway,formation of the platelet plug,formation of the platelet plug,C
3365,3365,What is hemophilia characterized by?,inadequate production of heparin,inadequate production of clotting factors,excessive production of fibrinogen,excessive production of platelets,excessive production of platelets,B
3366,3366,What is the process called in which antibodies...,sensitization,coagulation,agglutination,hemolysis,coagulation,C
3367,3367,Which of the following is correct about people...,have both antigens A and B on their erythrocytes,lack both antigens A and B on their erythrocytes,have neither anti-A nor anti-B antibodies circ...,are considered universal recipients,have both antigens A and B on their erythrocytes,B
3368,3368,Which of the following in hemolytic disease of...,a type AB mother is carrying a type O fetus,a type O mother is carrying a type AB fetus,an Rh+ mother is carrying an Rh− fetus,an Rh− mother is carrying a second Rh+ fetus,an Rh− mother is carrying a second Rh+ fetus,C


In [5]:
data.describe()

Unnamed: 0,id
count,3369.0
mean,1684.0
std,972.690855
min,0.0
25%,842.0
50%,1684.0
75%,2526.0
max,3368.0


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3369 entries, 0 to 3368
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      3369 non-null   int64 
 1   prompt  3369 non-null   object
 2   A       3369 non-null   object
 3   B       3369 non-null   object
 4   C       3369 non-null   object
 5   D       3369 non-null   object
 6   E       3369 non-null   object
 7   answer  3369 non-null   object
dtypes: int64(1), object(7)
memory usage: 210.7+ KB


In [7]:
questions = data["prompt"].astype(str).tolist()
choices = data[["A", "B", "C", "D", "E"]].astype(str).values.tolist()
answers = data["answer"].astype(str).tolist()

In [8]:
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(questions + [ans for ans in answers])

In [9]:
question_sequences = tokenizer.texts_to_sequences(questions)
answer_sequences = tokenizer.texts_to_sequences(answers)

In [10]:
max_length = max(len(seq) for seq in question_sequences)
X = pad_sequences(question_sequences, maxlen=max_length, padding="post")
y = pad_sequences(answer_sequences, maxlen=1, padding="post")

In [11]:
latent_dim=100
def build_generator():
    model = Sequential([
        Dense(256, activation="relu", input_dim=latent_dim),
        Dense(512, activation="relu"),
        Dense(1024, activation="relu"),
        Dense(max_length, activation="softmax") 
    ])
    return model

In [12]:
def build_discriminator():
    model = Sequential([
        Dense(1024, activation="relu", input_shape=(max_length,)),
        Dense(512, activation="relu"),
        Dense(256, activation="relu"),
        Dense(1, activation="sigmoid") 
    ])
    model.compile(loss="binary_crossentropy", optimizer=Adam(0.0002, 0.5), metrics=["accuracy"])
    return model

In [13]:
def build_gan(generator, discriminator):
    discriminator.trainable = False
    gan_input = Input(shape=(latent_dim,))
    generated_text = generator(gan_input)
    validity = discriminator(generated_text)
    gan = Model(gan_input, validity)
    gan.compile(loss="binary_crossentropy", optimizer=Adam(0.0002, 0.5))
    return gan

In [14]:
generator = build_generator()
discriminator = build_discriminator()
gan = build_gan(generator, discriminator)
epochs = 2500
batch_size = 32
real_labels = np.ones((batch_size, 1))
fake_labels = np.zeros((batch_size, 1))

In [15]:
for epoch in range(epochs):
    real_samples = X[np.random.randint(0, X.shape[0], batch_size)]
    fake_samples = generator.predict(np.random.normal(0, 1, (batch_size, latent_dim)))
    d_loss_real = discriminator.train_on_batch(real_samples, real_labels)
    d_loss_fake = discriminator.train_on_batch(fake_samples, fake_labels)
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    g_loss = gan.train_on_batch(noise, real_labels)
    if epoch % 500 == 0:
        print(f"Epoch {epoch} - D Loss: {d_loss[0]:.4f}, D Acc: {d_loss[1]*100:.2f}% | G Loss: {g_loss:.4f}")

Epoch 0 - D Loss: 0.3477, D Acc: 50.00% | G Loss: 0.6986
Epoch 500 - D Loss: 0.0049, D Acc: 100.00% | G Loss: 4.6397
Epoch 1000 - D Loss: 0.0003, D Acc: 100.00% | G Loss: 7.3623
Epoch 1500 - D Loss: 0.0000, D Acc: 100.00% | G Loss: 9.5910
Epoch 2000 - D Loss: 0.0000, D Acc: 100.00% | G Loss: 10.6649


In [16]:
generator.save("mcq_generator.h5")
print("Model saved as mcq_generator.h5")

Model saved as mcq_generator.h5
