In [1]:
import nest_asyncio
nest_asyncio.apply()

In [None]:
import os
os.environ['OPENAI_API_KEY'] = ""

In [3]:
import transformers  # to install: conda install conda-forge::transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd
import time
from importlib.metadata import version
import torch

In [4]:
# check if GPU is recognized
print("CUDA Available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Using CUDA device:", torch.cuda.get_device_name(0))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using:", device)

CUDA Available: False
Using: cpu


In [5]:
import os

os.makedirs('textconfig', exist_ok=True)
os.makedirs('imageconfig', exist_ok=True)

In [6]:
# Get the current working directory (assumed to be the project directory)
project_dir = os.getcwd()

# Construct the relative model path
model_path = os.path.join(project_dir, "models", "roBERTa_Large_Prompt_Classification_Model")

model = AutoModelForSequenceClassification.from_pretrained(model_path).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [7]:
%%writefile textconfig/config.yml

models:
 - type: main
   engine: openai
   model: gpt-3.5-turbo

instructions:
  - type: general
    content: |
      Below is a conversation between a user and a bot called the DemoBot.
      The bot is designed to aid the user in their requests.
      The bot is to be used in a demo for a classification and guardrails demonstration.

rails:
  input:
    flows:
      - self check input

prompts:
  - task: self_check_input
    content: |
      Your task is to check if the user message below complies with the guidelines for DemoBot.

      Guidelines for the user messages:
      -Should not allow any NSFW prompts
      -Should not allow any gore
      -Should not allow any fruits
      -Should not draw cats

      User message: "{{ user_input }}"

      Question: Should the user message be blocked (Yes or No)?
      Answer:

Overwriting textconfig/config.yml


In [8]:
%%writefile imageconfig/config.yml

models:
 - type: main
   engine: openai
   model: gpt-3.5-turbo

instructions:
  - type: general
    content: |
      Below is a conversation between a user and a bot called the ImageBot.
      The bot is designed to identify if the image the user is requesting meets guidelines.

rails:
  input:
    flows:
      - self check input

prompts:
  - task: self_check_input
    content: |
      Your task is to check if the user message below complies with the guidelines for ImageBot.

      Guidelines for the user messages:
      -Should not allow any NSFW prompts
      -Should not allow any gore
      -Should not allow any apples
      -Should not draw or allow any types of cats, inclusing big cats and house cats

      User message: "{{ user_input }}"

      Question: Should the user message be blocked (Yes or No)?
      Answer:

Overwriting imageconfig/config.yml


In [9]:
#gloabal prompt var
current_prompt = ''
def classify_prompt(prompt: str) -> str:
    global current_prompt
    current_prompt = prompt.strip()
    """Classify the input prompt as text or image."""
    inputs = tokenizer(prompt, padding='max_length', truncation=True, max_length=128, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        probabilities = torch.softmax(logits, dim=1).cpu().numpy().flatten()

    prediction = probabilities.argmax()  # binary classification: 0=image, 1=text
    category = "text" if prediction == 1 else "image"
    score = f"score: {probabilities}"
    return category, score

In [10]:
# ======= hangle misclassified prompts ========
FAILED_PROMPTS_FILE = os.path.join(project_dir, "datasets", "failed_prompts_dataset.csv")

def save_failed_prompt(prompt: str, correct_label: str):
    """Append the misclassified prompt to a CSV file."""
    new_entry = pd.DataFrame([[prompt, correct_label]], columns=['prompt', 'class'])

    if os.path.exists(FAILED_PROMPTS_FILE):
        new_entry.to_csv(FAILED_PROMPTS_FILE, mode='a', header=False, index=False)
    else:
        new_entry.to_csv(FAILED_PROMPTS_FILE, mode='w', header=True, index=False)

    print(f"Saved misclassified prompt to {FAILED_PROMPTS_FILE}\n")


def ask_feedback(prompt: str, predicted_label: str):
    correct_label = predicted_label
    """Ask the user if the classification was correct and record feedback automatically."""
    response = input("Was this classification correct? (y/n): ").strip().lower()
    if response == 'n':
        print(f"label before swap logic: {correct_label}")
        # Flip the label: if 'text' -> 'image', if 'image' -> 'text'
        if predicted_label.startswith("text"):
            correct_label = 'image'
        else:
            correct_label = 'text'
        print(f"label after swap logic: {correct_label}")
        save_failed_prompt(prompt, correct_label)
    elif response != 'y':
        print("Invalid input. Skipping feedback.")
    return(correct_label)

In [11]:
import openai
import os
from nemoguardrails import RailsConfig
from nemoguardrails import LLMRails
from IPython.display import Image


#Creates the rails from the individual config paths

textconfig = RailsConfig.from_path("./textconfig")

textrails = LLMRails(textconfig)

imageconfig = RailsConfig.from_path("./imageconfig")

imagerails = LLMRails(imageconfig)

#Our openai key

openai.api_key = os.getenv("OPENAI_API_KEY")

#Generates the image from the apporved prompt

def generate_image_with_dalle(prompt):
    print("Prompt allowed. Sending to DALL·E...")
    response = openai.images.generate(
            model="dall-e-3",      # You can also try "dall-e-2"
            prompt=prompt,
            n=1,
            size="1024x1024",      # You can also use "512x512" or "256x256" for smaller sizes
            quality="standard"
        )
    image_url = response.data[0].url
    print(f"Generated image URL: {image_url}")
    if image_url.startswith("https"):
        display(Image(url=image_url))
    else:
        print(image_url)
    
    return response

# Feeds the prompt to the text LLM, if the prompt passes the self check, it is then given to dalle
def handle_image_prompt(user_prompt):
    response = imagerails.generate(prompt=user_prompt)

    temp = 0
    info = imagerails.explain()

    #This checks if it made it past the self check
    for calls in info.llm_calls:
      temp += 1
    if temp == 1:
      return response

    return generate_image_with_dalle(user_prompt)

#Simple handling of text prompt
def handle_text_prompt(user_prompt):
    response = textrails.generate(prompt=user_prompt)
    print(response)



Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

In [12]:
#Main loop for demonstrating
if __name__ == "__main__":
    while(True):
      user_input = input("Enter a prompt: ")
      # Exit the loop if the user types 'EXIT'
      if user_input.strip().upper() == 'EXIT':
          print("Exiting the program.")
          break
      # Start timer
      start_time = time.perf_counter() * 1000
      classification, score = classify_prompt(user_input)
      # End timer
      end_time = time.perf_counter() * 1000
      class_time = end_time - start_time
      print(f"The model classified the prompt as: {classification}, with score, {score}")
      print(f"That prompt took: {class_time:.2f} ms to classify\n")

      #predicted_label = classification.split(",")[0].strip()
      classification = ask_feedback(user_input, classification)
      print (f"confirmed classification: {classification}")
      if (classification.startswith("text")):
        result = handle_text_prompt(user_input)
      elif (classification.startswith("image")):
        result = handle_image_prompt(user_input)

      print(result)

Enter a prompt:  can you draw a cat?


The model classified the prompt as: text, with score, score: [3.3161300e-06 9.9999666e-01]
That prompt took: 307.24 ms to classify



Was this classification correct? (y/n):  n


label before swap logic: text
label after swap logic: image
Saved misclassified prompt to D:\senior design\PROJECT_FOLDER_Final\datasets\failed_prompts_dataset.csv

confirmed classification: image
I'm sorry, I can't respond to that.


Enter a prompt:  can you draw a cat


The model classified the prompt as: image, with score, score: [9.9999964e-01 3.6654393e-07]
That prompt took: 181.72 ms to classify



Was this classification correct? (y/n):  y


confirmed classification: image
I'm sorry, I can't respond to that.


Enter a prompt:  draw me an image 


The model classified the prompt as: text, with score, score: [3.3161332e-06 9.9999666e-01]
That prompt took: 206.61 ms to classify



Was this classification correct? (y/n):  n


label before swap logic: text
label after swap logic: image
Saved misclassified prompt to D:\senior design\PROJECT_FOLDER_Final\datasets\failed_prompts_dataset.csv

confirmed classification: image
Prompt allowed. Sending to DALL·E...
Generated image URL: https://oaidalleapiprodscus.blob.core.windows.net/private/org-FIOasVCr6Rb1ZFHGNE0zT8T3/user-WWmX3P1pDbCmhgMtQ3kQtBSH/img-AETDAZwdBlNPzLhtlUR5S8ji.png?st=2025-05-02T20%3A39%3A17Z&se=2025-05-02T22%3A39%3A17Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=8b33a531-2df9-46a3-bc02-d4b1430a422c&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2025-05-01T23%3A16%3A54Z&ske=2025-05-02T23%3A16%3A54Z&sks=b&skv=2024-08-04&sig=tx4thi1nAVOE0ebyICDKnqyuX9vyO8N%2BrhRTKWli/Wg%3D


ImagesResponse(created=1746221957, data=[Image(b64_json=None, revised_prompt='Create an abstract image that is full of bright, vivid colors. It should be a mixture of many colors, with dynamic shapes and patterns scattered across the canvas. The image should evoke a sense of vibrancy and creativity, and it should be highly intricate, with attention paid to every detail. Be sure to use a wide variety of different hues, from cool blues and greens to warm reds and yellows, and everything in between. The final artwork should look like a beautiful kaleidoscope, with every color imaginable represented in some way.', url='https://oaidalleapiprodscus.blob.core.windows.net/private/org-FIOasVCr6Rb1ZFHGNE0zT8T3/user-WWmX3P1pDbCmhgMtQ3kQtBSH/img-AETDAZwdBlNPzLhtlUR5S8ji.png?st=2025-05-02T20%3A39%3A17Z&se=2025-05-02T22%3A39%3A17Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=8b33a531-2df9-46a3-bc02-d4b1430a422c&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2025-05-01T23%3A16%3A54Z&ske=2

Enter a prompt:  draw an image of a dog


The model classified the prompt as: image, with score, score: [9.9999964e-01 3.6654393e-07]
That prompt took: 249.48 ms to classify



Was this classification correct? (y/n):  y


confirmed classification: image
Prompt allowed. Sending to DALL·E...
Generated image URL: https://oaidalleapiprodscus.blob.core.windows.net/private/org-FIOasVCr6Rb1ZFHGNE0zT8T3/user-WWmX3P1pDbCmhgMtQ3kQtBSH/img-Qv2eV5lGErNBEjjPQSTR7Po7.png?st=2025-05-02T20%3A41%3A55Z&se=2025-05-02T22%3A41%3A55Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=8b33a531-2df9-46a3-bc02-d4b1430a422c&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2025-05-01T23%3A19%3A17Z&ske=2025-05-02T23%3A19%3A17Z&sks=b&skv=2024-08-04&sig=NdZiFbd2sjcjyhwID1sPXmLjo8m8KGjGhpkH%2Bkgmny8%3D


ImagesResponse(created=1746222115, data=[Image(b64_json=None, revised_prompt="Generate an image of a domestic dog, displaying its unique and distinctive attributes. The image should show the dog in a candid moment, showcasing its friendly and welcoming demeanor. The dog could be of any breed or color and should be seen in a comfortable, familiar environment like a house or a yard. Additional elements like the dog's favorite toys, a collar, or a leash could be present to add context and depth to the image.", url='https://oaidalleapiprodscus.blob.core.windows.net/private/org-FIOasVCr6Rb1ZFHGNE0zT8T3/user-WWmX3P1pDbCmhgMtQ3kQtBSH/img-Qv2eV5lGErNBEjjPQSTR7Po7.png?st=2025-05-02T20%3A41%3A55Z&se=2025-05-02T22%3A41%3A55Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=8b33a531-2df9-46a3-bc02-d4b1430a422c&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2025-05-01T23%3A19%3A17Z&ske=2025-05-02T23%3A19%3A17Z&sks=b&skv=2024-08-04&sig=NdZiFbd2sjcjyhwID1sPXmLjo8m8KGjGhpkH%2Bkgmny8%3D')], usa

Enter a prompt:  exit


Exiting the program.
