Subjective Answer Evaluation using Machine Learning

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the dataset
df = pd.read_excel("D:\\Personal Project\\Data.csv.xlsx")

# Split into train and test
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['Answer Text'], df['Score'], test_size=0.2, random_state=42
)


In [10]:
df.head()

Unnamed: 0,Answer ID,Answer Text,Score
0,1,"""The story was engaging.""",4.5
1,2,"""It was boring and slow.""",2.0
2,3,"""An excellent read overall.""",5.0


In [16]:
from transformers import AutoTokenizer

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

# Tokenize the data
train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=128, return_tensors='pt')
test_encodings = tokenizer(list(test_texts), truncation=True, padding=True, max_length=128, return_tensors='pt')


In [13]:
from transformers import AutoModelForSequenceClassification
import torch

# Load pre-trained BERT with a regression head
model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=1)

# Use GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [17]:
from torch.utils.data import DataLoader, Dataset

class AnswerDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float)
        return item

train_dataset = AnswerDataset(train_encodings, train_labels.tolist())
test_dataset = AnswerDataset(test_encodings, test_labels.tolist())

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)


In [18]:
from torch.optim import AdamW
from torch.nn import MSELoss

# Define optimizer and loss function
optimizer = AdamW(model.parameters(), lr=5e-5)
loss_fn = MSELoss()

# Training loop
epochs = 3
for epoch in range(epochs):
    model.train()
    for batch in train_loader:
        # Move to GPU if available
        batch = {key: val.to(device) for key, val in batch.items()}
        
        # Forward pass
        outputs = model(**batch)
        loss = loss_fn(outputs.logits.squeeze(-1), batch['labels'])
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")


Epoch 1, Loss: 10.593071937561035
Epoch 2, Loss: 7.838629722595215
Epoch 3, Loss: 7.704768180847168


In [19]:
from sklearn.metrics import mean_squared_error

model.eval()
predictions, true_labels = [], []

with torch.no_grad():
    for batch in test_loader:
        batch = {key: val.to(device) for key, val in batch.items()}
        outputs = model(**batch)
        predictions.extend(outputs.logits.squeeze(-1).cpu().numpy())
        true_labels.extend(batch['labels'].cpu().numpy())

# Calculate Mean Squared Error
mse = mean_squared_error(true_labels, predictions)
print(f"Mean Squared Error on Test Set: {mse}")


Mean Squared Error on Test Set: 8.801578521728516


In [20]:
# Save model
model.save_pretrained('./subjective_eval_model')
tokenizer.save_pretrained('./subjective_eval_model')

# Load for inference
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model = AutoModelForSequenceClassification.from_pretrained('./subjective_eval_model')
tokenizer = AutoTokenizer.from_pretrained('./subjective_eval_model')


In [22]:
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Load the pre-trained model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained('./subjective_eval_model')
tokenizer = AutoTokenizer.from_pretrained('./subjective_eval_model')

# Function to make predictions
def evaluate_answer(answer):
    inputs = tokenizer(answer, return_tensors="pt", truncation=True, padding=True, max_length=128)
    model.eval()
    with torch.no_grad():
        inputs = {key: val.to('cpu') for key, val in inputs.items()}  # Use CPU for inference
        output = model(**inputs)
        score = output.logits.squeeze().item()  # Extract score
    return score

# Streamlit UI
st.title("Subjective Answer Evaluation")
st.write("This app evaluates subjective answers and assigns a score.")

# Text input for user's answer
user_answer = st.text_area("Enter the subjective answer:", "")

if st.button("Evaluate Answer"):
    if user_answer.strip():
        score = evaluate_answer(user_answer)
        st.success(f"Predicted Score: {score:.2f}")
    else:
        st.warning("Please enter an answer to evaluate.")

# Footer
st.write("---")
st.write("Powered by BERT and Streamlit")


2024-11-23 15:28:58.160 
  command:

    streamlit run C:\Users\pintu kumar sah\AppData\Roaming\Python\Python310\site-packages\ipykernel_launcher.py [ARGUMENTS]
2024-11-23 15:28:58.170 Session state does not function when running a script without `streamlit run`
