In [1]:
import os
import json
import time
import pandas as pd
from datetime import datetime
from ollama import Client # Import Ollama client
from concurrent.futures import ThreadPoolExecutor, as_completed # For concurrent processing

## Connecting to OpenAI (or Ollama in case of open source)

The next cell is where we load in the environment variables in your `.env` file and connect to OpenAI.  

In [2]:
# Constants
OLLAMA_HOST = "http://localhost:11434" # Default Ollama host
HEADERS = {"Content-Type": "application/json"}
#MODEL = "deepseek-r1:8b"
MODEL = "gpt-oss:20b"

## Create system and user prompts to input to the model
Most models have been trained to receive instructions in a particular way. They expect to receive:

**A system prompt** that tells them what task they are performing and what tone they should use

**A user prompt** -- the conversation starter that they should reply to

In [3]:
# Define our system prompt

# system_prompt = """You are an expert topic and sentiment classifier for student feedback from a vocational education and training institution.
# Your task is to analyse student verbatims and, for each identified topic from a predefined list, assign a sentiment label.

# You must adhere to the following rules:
# 1. Match the verbatim to the most relevant topics from the provided topic list.
# 2. If the verbatim is not relevant to any topic on the list, return 'No Match'.
# 3. You can assign more than one topic if the verbatim covers multiple subjects.
# 4. For each topic identified, you must assign one of the following sentiment labels: 'Positive', 'Negative', 'Neutral'.
# 5. If a topic is mentioned but no clear sentiment is expressed (e.g., a statement of fact), classify it as 'Neutral'.
# 6. Your output must be a single JSON object.
# 7. The JSON object must have two keys: "topics" and "verbatim_text".
# 8. The value for "topics" should be a list of objects. Each object in the list must have two keys: "topic" and "sentiment".
# 9. The value for "topic" should be a string from the provided topic list or the string 'No Match'.
# 10. The value for "sentiment" should be a string from the sentiment labels: 'Positive', 'Negative', 'Neutral'. If the topic is 'No Match', the sentiment should be 'N/A'.
# 11. The value for "verbatim_text" should be the original verbatim you are analysing.

# You need to classify a verbatim from a student from the list of topics and sentiments mentioned below. 

# **Topic List:**
# - Enrolment Process
# - Student Support Services
# - Course Content and Relevance
# - Trainer Quality and Engagement
# - Facilities and Campus Environment
# - Timetable and Scheduling
# - Online Learning Platform
# - Assessment and Feedback
# - Career and Employment Services
# - Technology and Equipment
# - Communication and Information
# - Student Welfare and Wellbeing
# - Course Fees and Payments
# - Recognition of Prior Learning (RPL)
# - Work Placement
# - Graduation and Completion

# **Sentiment Labels:**
# - Positive
# - Negative
# - Neutral"""


system_prompt = """You are an expert topic and sentiment classifier for student feedback.

Your task is to analyze student verbatims and assign topics with sentiment from the predefined lists.

RULES:
1. Match verbatim to relevant topics from the topic list
2. Assign sentiment: Positive, Negative, or Neutral
3. If no topics match, use "No Match" with sentiment "N/A"
4. Output must be valid JSON with this structure:

{
  "topics": [
    {"topic": "Topic Name", "sentiment": "Positive"},
    {"topic": "Topic Name 2", "sentiment": "Negative"}
  ],
  "verbatim_text": "original verbatim text"
}

TOPICS:
- Enrolment Process
- Student Support Services
- Course Content and Relevance
- Trainer Quality and Engagement
- Facilities and Campus Environment
- Timetable and Scheduling
- Online Learning Platform
- Assessment and Feedback
- Career and Employment Services
- Technology and Equipment
- Communication and Information
- Student Welfare and Wellbeing
- Course Fees and Payments
- Recognition of Prior Learning (RPL)
- Work Placement
- Graduation and Completion

SENTIMENT: Positive, Negative, Neutral"""

In [4]:
# A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(verbatim):
    user_prompt = f"""
    You are looking at a verbatim from a student. Based on the list of topics and sentiment labels provided, below are five examples of how to classify a verbatim.

    **Examples for Few-Shot Classification:**

    1. **Verbatim:** "My trainer is great, but the course content is outdated."
       **Topics:** [{{"topic": "Trainer Quality and Engagement", "sentiment": "Positive"}}, {{"topic": "Course Content and Relevance", "sentiment": "Negative"}}]

    2. **Verbatim:** "I'm having trouble with the login for the online portal, and the Wi-Fi on campus is really slow."
       **Topics:** [{{"topic": "Online Learning Platform", "sentiment": "Negative"}}, {{"topic": "Technology and Equipment", "sentiment": "Negative"}}]

    3. **Verbatim:** "The new library on campus is fantastic, and the resources are excellent."
       **Topics:** [{{"topic": "Facilities and Campus Environment", "sentiment": "Positive"}}]

    4. **Verbatim:** "I received an email about my enrolment but it is useless information. I just want to know the status of my application. "
       **Topics:** [{{"topic": "Enrolment Process", "sentiment": "Neutral"}}, {{"topic": "Communication and Information", "sentiment": "Negative"}}]

    5. **Verbatim:** "I've been working as a mechanic for 10 years, and I want to see if I can get credit for my experience towards this course."
       **Topics:** [{{"topic": "Recognition of Prior Learning (RPL)", "sentiment": "Neutral"}}]

    **New Verbatim to Classify:**
    {verbatim}
    """
    
    return user_prompt

**Create Message for the model** : The API from OpenAI expects to receive messages in a particular structure.

```python
[
    {"role": "system", "content": "system message goes here"},
    {"role": "user", "content": "user message goes here"}
]
```

In [5]:
# Create the message structure
def messages_for(verbatim):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(verbatim)}
    ]

## Bring it together

In [6]:
# Create an Ollama client instance
ollama_client = Client(host=OLLAMA_HOST)

# Fixed return_topics function
def return_topics(verbatim_obj):
    verbatim = verbatim_obj['verbatim_text']
    custom_id = verbatim_obj['custom_id']
    
    try:        
        response = ollama_client.chat(
                model=MODEL,
                messages=messages_for(verbatim),
                options={
                    "temperature": 0.0,
                    "top_p": 0.9,
                    "num_predict": 300,
                },
               # format="json"
            )

        # Get Ollama response content
        response_content = response['message']['content']
        
        # Debug: Print the raw response 
        #print(f"Raw response for {custom_id}: {response_content}")
            
        # Clean the string to handle potential model errors
        import re
        match = re.search(r'\{.*\}', response_content, re.DOTALL)
        if match:
            clean_json_string = match.group(0)
        else:
            print(f"Warning: No JSON object found in response for {custom_id}")
            print(f"Raw response was: {response_content}")
            return {
                'custom_id': custom_id,
                'topics_and_sentiments': [{'topic': 'No Match', 'sentiment': 'N/A'}],
                'verbatim_text': verbatim
            }

        # Parse the JSON string from the model's response
        classification_data = json.loads(clean_json_string)
        
        return {
            'custom_id': custom_id,
            'topics_and_sentiments': classification_data.get('topics', []),
            'verbatim_text': classification_data.get('verbatim_text', verbatim)
        }

    except json.JSONDecodeError as e:
        print(f"JSON decoding error for ID {custom_id}: {e}")
        print(f"Raw response: {response_content}")
        return {
            'custom_id': custom_id,
            'topics_and_sentiments': [{'topic': 'Error: JSON Decode Error', 'sentiment': 'N/A'}],
            'verbatim_text': verbatim  # Fixed: was 'verbatim_text'
        }
    except Exception as e:
        print(f"Error processing verbatim ID {custom_id}: {e}")
        return {
            'custom_id': custom_id,
            'topics_and_sentiments': [{'topic': 'Error: API Call Failed', 'sentiment': 'N/A'}],
            'verbatim_text': verbatim  # Fixed: was 'verbatim_text'
        }

## Create Batch 

In [7]:
# Sample list of verbatims
verbatims_to_classify = [
    "My classes are all over the place. I have to come to campus three times a week for just one or two hours each time.",
    "The course material is outdated; we're still learning about software from five years ago.",
    "I tried to get help from the student welfare office, but they were closed.",
    "My final assignment feedback was very vague, and I don't know what to improve on.",
    "I'm not sure if this is the right career path for me after finishing this course.",
    "The computer labs have really old computers, and some don't even work properly.",
    "The process to apply for this course was so confusing, but the website is quite easy to navigate.",
    "My work placement was very unorganized and I felt like I didn't learn anything.",
    "The person who was meant to help me with my enrolment never got back to me.",
    "The cost of the textbooks is way too high, and I'm not sure if I can afford them.",
    "This feedback is not about any of the topics.",
    "I need help with my resume and job applications after I graduate.",
    "The fees for next semester seem to have increased without much warning. The student support staff is trying to be helpful but they cannot do much either",
    "The campus security could be better, I don't feel entirely safe at night.",
    "I really enjoy the practical exercises in this course; they are very relevant to industry.",
]

In [8]:
# Prepare data for concurrent processing
data_for_processing = []
for i, verbatim in enumerate(verbatims_to_classify):
    data_for_processing.append({"custom_id": f"verbatim_{i+1}", "verbatim_text": verbatim})


In [9]:
# --- Concurrent Processing with ThreadPoolExecutor ---

# max_workers should be adjusted based on your system's resources (CPU cores, VRAM)
# and OLLAMA_NUM_PARALLEL setting. Start with a small number like 2-4.
MAX_CONCURRENT_REQUESTS = 4 

In [10]:
print(f"\nStarting batch processing with {MAX_CONCURRENT_REQUESTS} concurrent requests...")

parsed_results = []
with ThreadPoolExecutor(max_workers=MAX_CONCURRENT_REQUESTS) as executor:
    # Submit all verbatims to the executor
    job_queue = {executor.submit(return_topics, item): item for item in data_for_processing}
    
    # As results complete, retrieve them
    for job in as_completed(job_queue):
        result = job.result()
        if result: # Only add if result is not None (in case of total failure)
            parsed_results.append(result)
        # Optional: Add a progress indicator
        print(f"Processed {len(parsed_results)}/{len(data_for_processing)} verbatims...", end='\r')

print("\nBatch processing complete.")


Starting batch processing with 4 concurrent requests...
Raw response was: 
JSON decoding error for ID verbatim_14: Expecting ',' delimiter: line 6 column 6 (char 109)
Raw response: {
  "topics": [
    {
      "topic": "Facilities and Campus Environment",
      "sentiment": "Negative"
    }
  ],
  "verbatim_text": "The campus security could be better, I don't feel entirely safe at night
Processed 15/15 verbatims...
Batch processing complete.


In [11]:
for i, result in enumerate(parsed_results):
    print(f"Verbatim {i+1} : {result['verbatim_text']}")
    
    for row in result['topics_and_sentiments']:
        print(f"\tTopic : {row['topic']}")
        print(f"\tSentiment : {row['sentiment']}\n")
    
    print("")    

Verbatim 1 : My final assignment feedback was very vague, and I don't know what to improve on.
	Topic : Assessment and Feedback
	Sentiment : Negative


Verbatim 2 : I tried to get help from the student welfare office, but they were closed.
	Topic : Student Welfare and Wellbeing
	Sentiment : Negative


Verbatim 3 : The course material is outdated; we're still learning about software from five years ago.
	Topic : Course Content and Relevance
	Sentiment : Negative


Verbatim 4 : My classes are all over the place. I have to come to campus three times a week for just one or two hours each time.
	Topic : Timetable and Scheduling
	Sentiment : Negative


Verbatim 5 : I'm not sure if this is the right career path for me after finishing this course.
	Topic : Career and Employment Services
	Sentiment : Negative


Verbatim 6 : The computer labs have really old computers, and some don't even work properly.
	Topic : No Match
	Sentiment : N/A


Verbatim 7 : The process to apply for this course was so

In [12]:
def format_topics_and_sentiments(topics_list):
    if len(topics_list)==0:
        return ""
    
    # Create formatted string
    formatted_pairs = [f"{item['topic']} ({item['sentiment']})" for item in topics_list]
    
    # Join values with comma and space
    return ", ".join(formatted_pairs)


In [13]:
# Convert result to dataframe
df = pd.DataFrame(parsed_results)

# Format topics list. If blank, then show No Match
df['topics_and_sentiments'] = df['topics_and_sentiments'].apply(format_topics_and_sentiments)
df['topics_and_sentiments'] = df['topics_and_sentiments'].apply(lambda x: 'No Match' if len(x)==0 else x)

print("\n--- Final Results DataFrame ---")
display(df[['verbatim_text','topics_and_sentiments']]) # Using display to render the DataFrame


--- Final Results DataFrame ---


Unnamed: 0,verbatim_text,topics_and_sentiments
0,"My final assignment feedback was very vague, a...",Assessment and Feedback (Negative)
1,I tried to get help from the student welfare o...,Student Welfare and Wellbeing (Negative)
2,The course material is outdated; we're still l...,Course Content and Relevance (Negative)
3,My classes are all over the place. I have to c...,Timetable and Scheduling (Negative)
4,I'm not sure if this is the right career path ...,Career and Employment Services (Negative)
5,"The computer labs have really old computers, a...",No Match (N/A)
6,The process to apply for this course was so co...,"Enrolment Process (Negative), Online Learning ..."
7,My work placement was very unorganized and I f...,Work Placement (Negative)
8,The person who was meant to help me with my en...,"Enrolment Process (Negative), Communication an..."
9,"The cost of the textbooks is way too high, and...",Course Fees and Payments (Negative)


In [14]:
# Test
test_verbatim = {"custom_id": "test_1", "verbatim_text": "My trainer is great, but the course content is outdated."}
result = return_topics(test_verbatim)
print("Test result:", result)

Test result: {'custom_id': 'test_1', 'topics_and_sentiments': [{'topic': 'Trainer Quality and Engagement', 'sentiment': 'Positive'}, {'topic': 'Course Content and Relevance', 'sentiment': 'Negative'}], 'verbatim_text': 'My trainer is great, but the course content is outdated.'}
