In [2]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.33.3-py3-none-any.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m30.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)
  Downloading huggingface_hub-0.17.3-py3-none-any.whl (295 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.0/295.0 kB[0m [31m35.6 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m89.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m59.9 MB/s[0m eta [36m0:00:0

In [3]:
!pip install sentencepiece

Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.2/1.3 MB[0m [31m5.4 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━[0m [32m0.6/1.3 MB[0m [31m9.7 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.3/1.3 MB[0m [31m12.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.99


In [4]:
!pip install accelerate

Collecting accelerate
  Downloading accelerate-0.23.0-py3-none-any.whl (258 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/258.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━[0m [32m194.6/258.1 kB[0m [31m5.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.1/258.1 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.23.0


In [10]:
import pandas as pd
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch

# Load data into pandas DataFrame
data_df = pd.read_excel("StudentEssays.xlsx")

# Initialize T5 tokenizer and model
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")

# Move the model to the CUDA device if available
if torch.cuda.is_available():
    model.to("cuda")

# Define a list of concepts to predict
concepts_to_predict = ["potential energy", "kinetic energy", "Law of Conservation of Energy"]

# Define possible outcome labels
outcome_labels = ["Acceptable", "Unacceptable", "Insufficient"]

# Create a list to store predictions as dictionaries
predictions_list = []

# Iterate through each row of text data
for index, row in data_df.iterrows():
    text = row['Essay']  # Assuming the text content is in column 'Essay'

    # Initialize predictions dictionary for this row
    predictions = {}

    # Iterate through each concept to predict
    for concept in concepts_to_predict:
        # Define a template for classification
        template = f"According to the following essay, is the student's definition of {concept} Acceptable, Unacceptable, or Insufficient?\n{text}"

        # Prepare the input by replacing placeholders
        formatted_input = template
        # Tokenize and classify the text
        input_ids = tokenizer(formatted_input, return_tensors="pt", padding=True, truncation=True).input_ids.to("cuda" if torch.cuda.is_available() else "cpu")
        outputs = model.generate(input_ids, max_length=128)
        decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)  # Remove special tokens

        # Store the prediction in the dictionary
        predictions[concept] = next((label for label in outcome_labels if label in decoded_output), "Unknown")

    # Append the predictions to the list
    predictions_list.append(predictions)

# Convert the list of dictionaries to a DataFrame
predictions_df = pd.DataFrame(predictions_list)

# # Print the predictions
# print(predictions_df)
# Set options to display all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Print the predictions
print(predictions_df)

   potential energy kinetic energy Law of Conservation of Energy
0        Acceptable   Insufficient                    Acceptable
1      Insufficient   Insufficient                    Acceptable
2      Insufficient   Insufficient                    Acceptable
3      Insufficient   Insufficient                    Acceptable
4      Insufficient   Insufficient                    Acceptable
5      Insufficient   Insufficient                    Acceptable
6        Acceptable     Acceptable                    Acceptable
7      Unacceptable   Unacceptable                    Acceptable
8      Insufficient   Insufficient                  Insufficient
9        Acceptable     Acceptable                    Acceptable
10     Insufficient   Insufficient                    Acceptable
11     Insufficient   Insufficient                    Acceptable
12     Insufficient   Insufficient                    Acceptable
13       Acceptable     Acceptable                    Acceptable
14     Insufficient     A

In [5]:
# Update the original DataFrame with the predictions
data_df["PE"] = predictions_df["potential energy"]
data_df["KE"] = predictions_df["kinetic energy"]
data_df["LCE"] = predictions_df["Law of Conservation of Energy"]

# Save the modified DataFrame to the same Excel file, overwriting the original file
data_df.to_excel("StudentEssays2.xlsx", index=False)

NameError: ignored