# Individual Problems - Submission

| Date | User | Change Type | Remarks |  
| ---- | ---- | ----------- | ------- |
| 16/09/2025   | Martin | Created   | Notebook to outline steps for full prediction in Kaggle submission | 

# Content

* [Variables](#variables)
* [Load Data](#load-data)
* [Define Models](#define-models)
* [Training Workflow](#training-workflow)

# Variables 

Define the variables used in the model

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow.keras as keras

import polars as pl
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

SEED = 43




In [2]:
EMBED_DIM = 512
FFN_DIM = 256
DROPOUT_RATE = 0.1

# Load Data

- Load raw CSV files
- Simple data transformation
- Create embedding vectors

In [10]:
train = pl.read_csv("data/raw/train.csv")
test = pl.read_csv("data/raw/test.csv")

In [4]:
train.head()

row_id,QuestionId,QuestionText,MC_Answer,StudentExplanation,Category,Misconception
i64,i64,str,str,str,str,str
0,31772,"""What fraction of the shape is …","""\( \frac{1}{3} \)""","""0ne third is equal to tree nin…","""True_Correct""","""NA"""
1,31772,"""What fraction of the shape is …","""\( \frac{1}{3} \)""","""1 / 3 because 6 over 9 is 2 th…","""True_Correct""","""NA"""
2,31772,"""What fraction of the shape is …","""\( \frac{1}{3} \)""","""1 3rd is half of 3 6th, so it …","""True_Neither""","""NA"""
3,31772,"""What fraction of the shape is …","""\( \frac{1}{3} \)""","""1 goes into everything and 3 g…","""True_Neither""","""NA"""
4,31772,"""What fraction of the shape is …","""\( \frac{1}{3} \)""","""1 out of every 3 isn't coloure…","""True_Correct""","""NA"""


In [11]:
def data_transformation(pldf: pl.DataFrame, encoder=None) -> pl.DataFrame:
  """
  Correct:
    - "True": 1
    - "False": 0
  Error:
    - "Correct": 2
    - "Neither": 0
    - "Misconception": 1
  Misconception:
    - Label_encoder
  """
  pldf = pldf.with_columns(
    pl.col('Category').str.split_exact("_", 1)
    .struct.rename_fields([ "Correct", "Error" ])
    .alias("fields")
  ).unnest("fields")

  # Remap according to above encoding
  if encoder is None:
    enc = LabelEncoder()
    pldf = pldf.with_columns(
      Correct=pl.col("Correct").replace_strict(["True", "False"], [1, 0], return_dtype=pl.Int8),
      Error=pl.col("Error").replace_strict(["Correct", "Neither", "Misconception"], [2, 0, 1], return_dtype=pl.Int8),
      Misconception=pl.col("Misconception").map_batches(enc.fit_transform)
    )
    return pldf, enc

In [12]:
train, enc = data_transformation(train)

In [13]:
train.head()

row_id,QuestionId,QuestionText,MC_Answer,StudentExplanation,Category,Misconception,Correct,Error
i64,i64,str,str,str,str,i64,i8,i8
0,31772,"""What fraction of the shape is …","""\( \frac{1}{3} \)""","""0ne third is equal to tree nin…","""True_Correct""",21,1,2
1,31772,"""What fraction of the shape is …","""\( \frac{1}{3} \)""","""1 / 3 because 6 over 9 is 2 th…","""True_Correct""",21,1,2
2,31772,"""What fraction of the shape is …","""\( \frac{1}{3} \)""","""1 3rd is half of 3 6th, so it …","""True_Neither""",21,1,0
3,31772,"""What fraction of the shape is …","""\( \frac{1}{3} \)""","""1 goes into everything and 3 g…","""True_Neither""",21,1,0
4,31772,"""What fraction of the shape is …","""\( \frac{1}{3} \)""","""1 out of every 3 isn't coloure…","""True_Correct""",21,1,2


In [14]:
# Load encoder
embed = hub.load("models/universal_sentence_encoder_v2")





In [15]:
# Create embeddings
train_qns = embed(train['QuestionText'])
train_ans = embed(train['MC_Answer'])
train_exp = embed(train['StudentExplanation'])

In [24]:
# Separate targets
train_correct = train['Correct'].to_numpy()
train_error = train['Error'].to_numpy()
train_misconception = train['Misconception'].to_numpy()

# Define Models

- Model 1: Correct answer classification
- Model 2: Error in explanation classification
- Model 3: Type of misconception classification

In [32]:
# Model 1
qns_input = keras.layers.Input(shape=(EMBED_DIM, )) # Questions
ans_input = keras.layers.Input(shape=(EMBED_DIM, )) # Answers

# Questions model
x1 = keras.layers.Dense(EMBED_DIM, activation="relu")(qns_input)
x1 = keras.layers.Dropout(DROPOUT_RATE)(x1)
x1 = keras.layers.BatchNormalization()(x1)
x1 = keras.layers.Dense(FFN_DIM, activation="relu")(x1)
x1 = keras.layers.Dropout(DROPOUT_RATE)(x1)
x1 = keras.layers.BatchNormalization()(x1)

# Answers model
x2 = keras.layers.Dense(EMBED_DIM, activation="relu")(ans_input)
x2 = keras.layers.Dropout(DROPOUT_RATE)(x2)
x2 = keras.layers.BatchNormalization()(x2)
x2 = keras.layers.Dense(FFN_DIM, activation="relu")(x2)
x2 = keras.layers.Dropout(DROPOUT_RATE)(x2)
x2 = keras.layers.BatchNormalization()(x2)

# Combined model
x3 = keras.layers.concatenate([x1, x2])
x3 = keras.layers.Dropout(DROPOUT_RATE)(x3)
x3 = keras.layers.Dense(20, activation="relu")(x3)
x3 = keras.layers.Dropout(DROPOUT_RATE)(x3)
outputs = keras.layers.Dense(1, activation="sigmoid")(x3)

model_correct = keras.Model(inputs=[qns_input, ans_input], outputs=outputs)

model_correct.compile(
  optimizer="adam",
  loss="binary_crossentropy",
  metrics=['accuracy']
)
model_correct.summary()

In [18]:
# Model 2
qns_input = keras.layers.Input(shape=(EMBED_DIM+1, )) # Questions
exp_input = keras.layers.Input(shape=(EMBED_DIM+1, )) # Explanations

# Questions model
x1 = keras.layers.Dense(EMBED_DIM, activation="relu")(qns_input)
x1 = keras.layers.Dropout(DROPOUT_RATE)(x1)
x1 = keras.layers.BatchNormalization()(x1)
x1 = keras.layers.Dense(FFN_DIM, activation="relu")(x1)
x1 = keras.layers.Dropout(DROPOUT_RATE)(x1)
x1 = keras.layers.BatchNormalization()(x1)

# Explanations model
x2 = keras.layers.Dense(EMBED_DIM, activation="relu")(exp_input)
x2 = keras.layers.Dropout(DROPOUT_RATE)(x2)
x2 = keras.layers.BatchNormalization()(x2)
x2 = keras.layers.Dense(FFN_DIM, activation="relu")(x2)
x2 = keras.layers.Dropout(DROPOUT_RATE)(x2)
x2 = keras.layers.BatchNormalization()(x2)

# Combined model
x3 = keras.layers.concatenate([x1, x2])
x3 = keras.layers.Dropout(DROPOUT_RATE)(x3)
x3 = keras.layers.Dense(20, activation="relu")(x3)
x3 = keras.layers.Dropout(DROPOUT_RATE)(x3)
outputs = keras.layers.Dense(3, activation="softmax")(x3)

model_error = keras.Model(inputs=[qns_input, exp_input], outputs=outputs)

model_error.compile(
  optimizer="adam",
  loss="sparse_categorical_crossentropy",
  metrics=['accuracy']
)
model_error.summary()

In [20]:
# Model 3
qns_input = keras.layers.Input(shape=(EMBED_DIM+2, )) # Questions
exp_input = keras.layers.Input(shape=(EMBED_DIM+2, )) # Explanations
ans_input = keras.layers.Input(shape=(EMBED_DIM+2, )) # Answers

# Questions model
x1 = keras.layers.Dense(EMBED_DIM, activation="relu")(qns_input)
x1 = keras.layers.Dropout(DROPOUT_RATE)(x1)
x1 = keras.layers.BatchNormalization()(x1)
x1 = keras.layers.Dense(FFN_DIM, activation="relu")(x1)
x1 = keras.layers.Dropout(DROPOUT_RATE)(x1)
x1 = keras.layers.BatchNormalization()(x1)

# Explanations model
x2 = keras.layers.Dense(EMBED_DIM, activation="relu")(exp_input)
x2 = keras.layers.Dropout(DROPOUT_RATE)(x2)
x2 = keras.layers.BatchNormalization()(x2)
x2 = keras.layers.Dense(FFN_DIM, activation="relu")(x2)
x2 = keras.layers.Dropout(DROPOUT_RATE)(x2)
x2 = keras.layers.BatchNormalization()(x2)

# Answers model
x3 = keras.layers.Dense(EMBED_DIM, activation="relu")(ans_input)
x3 = keras.layers.Dropout(DROPOUT_RATE)(x3)
x3 = keras.layers.BatchNormalization()(x3)
x3 = keras.layers.Dense(FFN_DIM, activation="relu")(x3)
x3 = keras.layers.Dropout(DROPOUT_RATE)(x3)
x3 = keras.layers.BatchNormalization()(x3)

# Combined model
x4 = keras.layers.concatenate([x1, x2, x3])
x4 = keras.layers.Dropout(DROPOUT_RATE)(x4)
x4 = keras.layers.Dense(20, activation="relu")(x4)
x4 = keras.layers.Dropout(DROPOUT_RATE)(x4)
outputs = keras.layers.Dense(36, activation="softmax")(x4)

model_misconception = keras.Model(inputs=[qns_input, exp_input, ans_input], outputs=outputs)

model_misconception.compile(
  optimizer="adam",
  loss="sparse_categorical_crossentropy",
  metrics=['accuracy']
)
model_misconception.summary()

# Training Workflow


In [None]:
# 1. Train Correct
model_correct.fit(
  [train_qns, train_ans],
  train_correct,
  batch_size=32,
  epochs=10,
)

Epoch 1/10
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9647 - loss: 0.0838
Epoch 2/10
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9850 - loss: 0.0366
Epoch 3/10
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9861 - loss: 0.0386
Epoch 4/10
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9864 - loss: 0.0329
Epoch 5/10
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9867 - loss: 0.0327
Epoch 6/10
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9871 - loss: 0.0321
Epoch 7/10
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9865 - loss: 0.0327
Epoch 8/10
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9856 - loss: 0.0358
Epoch 9/10
[1m1147/1147

<keras.src.callbacks.history.History at 0x1b0fe05b690>

In [34]:
correct_tensor = tf.convert_to_tensor(train_correct.reshape(-1, 1), dtype=tf.float32)
train_qns_err = tf.concat([train_qns, correct_tensor], axis=1)
train_exp_err = tf.concat([train_exp, correct_tensor], axis=1)

In [36]:
# 2. Train Error
model_error.fit(
  [train_qns_err, train_exp_err],
  train_error,
  batch_size=32,
  epochs=20,
)

Epoch 1/20
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.7210 - loss: 0.6525
Epoch 2/20
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7949 - loss: 0.4867
Epoch 3/20
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8132 - loss: 0.4458
Epoch 4/20
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8227 - loss: 0.4215
Epoch 5/20
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8353 - loss: 0.3999
Epoch 6/20
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8475 - loss: 0.3753
Epoch 7/20
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8623 - loss: 0.3393
Epoch 8/20
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8688 - loss: 0.3232
Epoch 9/20
[1m1147/1147

<keras.src.callbacks.history.History at 0x1b1017b30d0>

In [37]:
error_tensor = tf.convert_to_tensor(train_error.reshape(-1, 1), dtype=tf.float32)
train_qns_mis = tf.concat([train_qns, correct_tensor, error_tensor], axis=1)
train_exp_mis = tf.concat([train_exp, correct_tensor, error_tensor], axis=1)
train_ans_mis = tf.concat([train_ans, correct_tensor, error_tensor], axis=1)

In [38]:
# 3. Train Misconception 
model_misconception.fit(
  [train_qns_mis, train_exp_mis, train_ans_mis],
  train_misconception,
  batch_size=32,
  epochs=10,
)

Epoch 1/10
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8771 - loss: 0.4723
Epoch 2/10
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9770 - loss: 0.0810
Epoch 3/10
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9825 - loss: 0.0586
Epoch 4/10
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9862 - loss: 0.0469
Epoch 5/10
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9862 - loss: 0.0420
Epoch 6/10
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9876 - loss: 0.0382
Epoch 7/10
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9883 - loss: 0.0388
Epoch 8/10
[1m1147/1147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9897 - loss: 0.0342
Epoch 9/10
[1m1147/1147

<keras.src.callbacks.history.History at 0x1b1018ba290>

# Inference Workflow

1. Predict Correct
2. Feed results into Error Model
3. Predict Error
4. Feed results into Misconception Model
5. Predict Misconception
6. Concatenate results

In [40]:
test_qns = embed(test['QuestionText'])
test_exp = embed(test['StudentExplanation'])
test_ans = embed(test['MC_Answer'])
row_id = test['row_id']

In [43]:
# 1. Predict Correct
pred_correct = model_correct.predict([test_qns, test_ans])
pred_correct = np.where(pred_correct.flatten() > 0.5, 1, 0)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step


In [47]:
# 2. Add results for Error model
pred_correct_tf = tf.convert_to_tensor(pred_correct.reshape(-1, 1), dtype=tf.float32)
test_qns_err = tf.concat([test_qns, pred_correct_tf], axis=1)
test_exp_err = tf.concat([test_exp, pred_correct_tf], axis=1)

In [48]:
# 3. Predict Error
pred_error = model_error.predict([test_qns_err, test_exp_err])
pred_error = np.argmax(pred_error, axis=1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step


In [86]:
# 4. Add results to Misconception model
test_mis = test.with_columns(
  Correct=pred_correct,
  Error=pred_error
)

# Split into testing and rule-based
test_actual = test_mis.filter(
  ( pl.col("Correct") != 1 ) | ( pl.col("Error") != 2 )
)
test_rule = test_mis.filter(
  ( pl.col("Correct") == 1 ) & ( pl.col("Error") == 2 )
)

# Predict only those that don't meet the rule
test_qns_mis = embed(test_actual['QuestionText'])
test_exp_mis = embed(test_actual['StudentExplanation'])
test_ans_mis = embed(test_actual['MC_Answer'])

# Combine the Correct and Prediction
pred_error_mis_tf = tf.convert_to_tensor(test_actual['Error'].to_numpy().reshape(-1, 1), dtype=tf.float32)
pred_correct_mis_tf = tf.convert_to_tensor(test_actual['Correct'].to_numpy().reshape(-1, 1), dtype=tf.float32)

test_qns_mis = tf.concat([test_qns_mis, pred_correct_mis_tf, pred_error_mis_tf], axis=1)
test_exp_mis = tf.concat([test_exp_mis, pred_correct_mis_tf, pred_error_mis_tf], axis=1)
test_ans_mis = tf.concat([test_ans_mis, pred_correct_mis_tf, pred_error_mis_tf], axis=1)

In [87]:
# 5. Predict Misconception
pred_misconception = model_misconception.predict([test_qns_mis, test_exp_mis, test_ans_mis])
pred_misconception = np.argmax(pred_misconception, axis=1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step


In [88]:
# 6. Concatenate Results
test_rule = test_rule.with_columns(
  pl.lit(21).alias("Misconception").cast(pl.Int64)
)
test_actual = test_actual.with_columns(
  Misconception=pred_misconception
)
test_preds = pl.concat([test_actual, test_rule])

# Formatted
correct_map = {
  0: "False",
  1: "True"
}
error_map = {
  0: "Neither",
  1: "Misconception",
  2: "Correct"
}

test_preds = test_preds.with_columns(
  pl.col("Correct").replace_strict(correct_map).alias("Correct_text"),
  pl.col("Error").replace_strict(error_map).alias("Error_text"),
  pl.col("Misconception").map_batches(enc.inverse_transform).alias("Misconception_text"),
)
test_preds = test_preds.with_columns(
  pl.format("{}_{}:{}", pl.col("Correct_text"), pl.col("Error_text"), pl.col("Misconception_text")).alias("Category:Misconception")
)
test_submission = test_preds.select(
  "row_id",
  "Category:Misconception"
)

In [89]:
test_submission

row_id,Category:Misconception
i64,str
36697,"""False_Misconception:WNB"""
36698,"""True_Neither:NA"""
36696,"""True_Correct:NA"""


In [None]:
%watermark