In [1]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.34.0-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m52.1 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m37.5 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m102.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m83.3 MB/s[0m eta [36m0:00:00[0m
Co

In [2]:
!pip install sentencepiece

Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.99


In [3]:
!pip install accelerate

Collecting accelerate
  Downloading accelerate-0.23.0-py3-none-any.whl (258 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/258.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/258.1 kB[0m [31m2.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.1/258.1 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.23.0


In [7]:
import pandas as pd
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch

# Load data into pandas DataFrame
data_df = pd.read_excel("StudentEssays.xlsx")

# Initialize T5 tokenizer and model
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")

# Move the model to the CUDA device if available
if torch.cuda.is_available():
    model.to("cuda")

# Define a list of concepts to predict
concepts_to_predict = ["potential energy", "kinetic energy", "Law of Conservation of Energy"]

# Define possible outcome labels
outcome_labels = ["Acceptable", "Unacceptable", "Insufficient", "Not Found"]

# Create a list to store predictions as dictionaries
predictions_list = []

# Iterate through each row of text data
for index, row in data_df.iterrows():
    text = row['Essay']  # Assuming the text content is in column 'Essay'

    # Initialize predictions dictionary for this row
    predictions = {}

    # Iterate through each concept to predict
    for concept in concepts_to_predict:
        # Define a template for classification
        template = f"According to the following essay, is the student's definition of {concept} Acceptable, Unacceptable, Insufficient, or Not Found? Only use one of these labels for outputs\n{text}"
        # Prepare the input by replacing placeholders
        formatted_input = template
        # Tokenize and classify the text
        input_ids = tokenizer(formatted_input, return_tensors="pt", padding=True, truncation=True).input_ids.to("cuda" if torch.cuda.is_available() else "cpu")
        outputs = model.generate(input_ids, max_length=128)
        decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)  # Remove special tokens

        # Store the prediction in the dictionary
        predictions[concept] = next((label for label in outcome_labels if label.lower() in decoded_output.lower()), "Unknown")

        if predictions[concept] == "Unknown":
          print(len(decoded_output))
          with open('output.txt', 'w') as f:
            f.write(decoded_output)

    # Append the predictions to the list
    predictions_list.append(predictions)

# Convert the list of dictionaries to a DataFrame
predictions_df = pd.DataFrame(predictions_list)

# # Print the predictions
# print(predictions_df)
# Set options to display all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Print the predictions
print(predictions_df)

   potential energy kinetic energy Law of Conservation of Energy
0         Not Found      Not Found                    Acceptable
1         Not Found      Not Found                     Not Found
2      Insufficient   Insufficient                    Acceptable
3      Insufficient   Insufficient                  Insufficient
4      Insufficient   Insufficient                    Acceptable
5      Insufficient   Insufficient                     Not Found
6         Not Found      Not Found                    Acceptable
7      Insufficient   Insufficient                     Not Found
8      Insufficient      Not Found                     Not Found
9      Insufficient      Not Found                    Acceptable
10     Insufficient   Insufficient                     Not Found
11        Not Found      Not Found                     Not Found
12     Insufficient   Insufficient                     Not Found
13     Insufficient   Insufficient                    Acceptable
14     Insufficient   Ins

In [5]:
# Update the original DataFrame with the predictions
data_df["PE"] = predictions_df["potential energy"]
data_df["KE"] = predictions_df["kinetic energy"]
data_df["LCE"] = predictions_df["Law of Conservation of Energy"]

# Save the modified DataFrame to the same Excel file, overwriting the original file
data_df.to_excel("Result.xlsx", index=False)