# Task2:  LLM Model

In [7]:
import os
import pandas as pd
# from google import genai
import time
import ollama
from ollama import chat
from ollama import ChatResponse

import pandas as pd


# Feedback data

In [8]:
# Load Dataset
df = pd.read_csv("Datasets/dummyFeedbackdataWithclasses.csv")  # Ensure the dataset has 'text' and 'label' columns
print(len(df))
# Preprocessing
def preprocess_data(text):
    return text.lower().strip()

# Check for null values in the DataFrame
null_values = df.isnull().sum()

# Print the columns with null values
print(null_values[null_values > 0])
#remove null values    
df = df.dropna(subset=['Comments'])
print(len(df))

2920
Comments                        23
llm_response                  2910
Teaching Pedagogy_terms       2914
Teaching Pedagogy_polarity    2914
Knowledge_terms               2916
Knowledge_polarity            2916
Experience_terms              2919
Experience_polarity           2919
dtype: int64
2897


In [9]:
# Filter rows where the 'Comments' column contains only a single word
single_word_data = df[df['Comments'].str.split().str.len() == 1]
print(len(single_word_data))
# Display the filtered data
single_word_data.head(20)

847


Unnamed: 0,FacultyName,Course,Comments,Target,Class,llm_response,Teaching Pedagogy_terms,Teaching Pedagogy_polarity,Knowledge_terms,Knowledge_polarity,Experience_terms,Experience_polarity
0,NIMRA MUGHAL,Programming Fundamentals(Pr),.,Teacher,Class 1,,,,,,,
2,NIMRA MUGHAL,Programming Fundamentals(Pr),bestttt,Course,Class 1,,,,,,,
3,NIMRA MUGHAL,Programming Fundamentals(Pr),best,Course,Class 1,,,,,,,
5,NIMRA MUGHAL,Programming Fundamentals(Pr),Good,Course,Class 1,,,,,,,
6,NIMRA MUGHAL,Programming Fundamentals(Pr),hard,Course,Class 1,,,,,,,
19,NIMRA MUGHAL,Programming Fundamentals(Pr),Informative,Course,Class 1,,,,,,,
24,NIMRA MUGHAL,Programming Fundamentals(Pr),Knowledgeable,Course,Class 1,,,,,,,
26,NIMRA MUGHAL,Programming Fundamentals(Pr),knowledgable,Course,Class 1,,,,,,,
27,NIMRA MUGHAL,Programming Fundamentals(Pr),good,Course,Class 1,,,,,,,
30,NIMRA MUGHAL,Programming Fundamentals(Pr),good,Course,Class 2,,,,,,,


In [10]:
#Remove the single_word_data from the original DataFrame
df = df[~df['Comments'].isin(single_word_data['Comments'])]
print(len(df))


2050


## LLM Prompt

In [11]:
system_prompt = """
You are an expert aspect based sentiment analysis model. Given a review about a teacher, extract relevant information based on the following aspect categories:

-Teaching Pedagogy

-Knowledge

-Fair in Assessment

-Experience

-Behavior

If the aspect has been mentioned in the review, provide:

Extracted Phrase or Evidence from Review

Polarity: One of {Positive, Negative, Neutral}

If an aspect is not mentioned, do not include it in the output.
The output should be in JSON format with the following structure:
```json
{
  "Teaching Pedagogy": {
    "Extracted Phrase": "...",
    "Polarity": "..."
  },
  "Knowledge": {
    "Extracted Phrase": "...",
    "Polarity": "..."
  },  
  "Fair in Assessment
": {
    "Extracted Phrase": "...",
    "Polarity": "..."
  },
...
}

If any of the aspects are not mentioned in the review, do not include them in the output.
"""

In [12]:
system_prompt = """
    You are an expert in Aspect-Based Sentiment Analysis (ABSA). Your task is to analyze teacher reviews and extract aspect-specific information based on the following predefined aspect categories:

    - Teaching Pedagogy  
    - Knowledge  
    - Fair in Assessment  
    - Experience  
    - Behavior  

    For each aspect that is **explicitly or implicitly mentioned** in the review:

    1. Identify and extract the **aspect term(s) or phrase(s)** used in the review that are related to the aspect category.
    2. Determine the **sentiment polarity** expressed toward that aspect. Choose one of: {Positive, Negative, Neutral}.

    If an aspect is not mentioned in the review, **do not include it in the output**.

    Return the output in a structured JSON format as follows:
    ```json
    {
    "Aspect Category": {
        "Aspect Terms": ["..."],
        "Extracted Phrase": "...",
        "Polarity": "..."
    },
    ...
    }
"""

## Select Data sample

## OLamma other models

In [13]:
import pandas as pd
import json

# Your existing Ollama call function
def ask_ollama(input_content, system_prompt, model_name="mistral"):
    response = ollama.chat(model=model_name, messages=[
        {'role': 'system', 'content': system_prompt},
        {'role': 'user', 'content': input_content}
    ])
    response_text = response['message']['content'].strip()
    return response_text



### Mistral

In [14]:
# Example usage with the ask_deepseek function
input_content = "The teacher is very good in terms of knowledge and wisdom. She always prepared for teaching. She doesn't do favouritism."
response = ask_ollama(input_content, system_prompt, 'mistral')
print(type(response))
print(response)

<class 'str'>
```json
{
    "Knowledge": {
        "Aspect Terms": ["knowledge", "wisdom"],
        "Extracted Phrase": "The teacher is very good in terms of knowledge and wisdom.",
        "Polarity": "Positive"
    },
    "Teaching Pedagogy": {
        "Aspect Terms": ["prepared for teaching"],
        "Extracted Phrase": "She always prepared for teaching.",
        "Polarity": "Positive"
    },
    "Fair in Assessment": {
        "Aspect Terms": ["does not do favouritism"],
        "Extracted Phrase": "She doesn't do favouritism.",
        "Polarity": "Positive"
    }
}
```


In [15]:
import json

def parse_json_safe(response_text):
    try:
        # Try to extract just the JSON part if it's embedded in extra text
        json_start = response_text.find('{')
        json_end = response_text.rfind('}') + 1
        json_substring = response_text[json_start:json_end]
        return json.loads(json_substring)
    except Exception as e:
        print(f"JSON parsing failed: {e}\nRaw response: {response_text}")
        return None


In [16]:
# Define aspects and related columns
aspects = ["Teaching Pedagogy", "Knowledge", "Fair in Assessment", "Experience", "Behavior"]
term_columns = [f"{aspect}_terms" for aspect in aspects]
polarity_columns = [f"{aspect}_polarity" for aspect in aspects]

# Add empty columns
for col in term_columns + polarity_columns:
    if col not in df.columns:
        df[col] = ""

# Get indices to update
indices_to_process = df[df['Target'] == 'Teacher'].index

# Initialize batch buffer
batch_records = []
batch_size = 10


In [17]:
model_name = 'mistral'  # Replace with your model name
for count, idx in enumerate(indices_to_process, start=1):
    feedback = df.at[idx, 'Comments']
    try:
        result_json = ask_ollama(feedback, system_prompt)
        df.at[idx, "llm_response"] = result_json  # Save raw LLM response
        
        result_dict = parse_json_safe(result_json)

        if result_dict:
            for aspect in aspects:
                if aspect in result_dict:
                    df.at[idx, f"{aspect}_terms"] = result_dict[aspect].get("Extracted Phrase", "")
                    df.at[idx, f"{aspect}_polarity"] = result_dict[aspect].get("Polarity", "")
    except Exception as e:
        print(f"Error at index {idx}: {e}")
        continue

    if count % batch_size == 0 or count == len(indices_to_process):
        df.loc[indices_to_process].to_csv("Datasets/checkpoint_feedback_" + model_name +"2.csv", index=False)
        print(f"Checkpoint saved after {count} rows.")


Checkpoint saved after 10 rows.
Checkpoint saved after 20 rows.
Error at index 62: 'NoneType' object has no attribute 'get'
JSON parsing failed: Expecting property name enclosed in double quotes: line 7 column 1 (char 143)
Raw response: ```json
{
  "Teaching Pedagogy": {
      "Aspect Terms": ["teacher"],
      "Extracted Phrase": "very good teacher",
      "Polarity": "Positive"
  },
}
```

In this example, since the review explicitly mentions the teacher and expresses a positive sentiment, only the Teaching Pedagogy aspect is included in the output. The other aspects (Knowledge, Fair in Assessment, Experience, Behavior) are not mentioned or implied in the given review, so they are not returned.
Checkpoint saved after 30 rows.
JSON parsing failed: Expecting ',' delimiter: line 25 column 29 (char 1013)
Raw response: ```json
{
  "Teaching Pedagogy": {
      "Aspect Terms": ["effective teaching methods", "innovative lesson plans"],
      "Extracted Phrase": "The teacher used effective te

In [18]:
# # Add a 'Class' column
# df = pd.read_csv("Datasets/dummyFeedbackdata_nimra.csv")
# df['Class'] = None

# # Group by teacher and subject
# grouped = df.groupby(['FacultyName', 'Course'])

# # Assign class names
# for (teacher, subject), group in grouped:
#     num_records = len(group)
#     num_classes = (num_records + 29) // 30  # Calculate the number of classes (30 students per class)
#     class_names = [f"Class {i+1}" for i in range(num_classes)]  # Generate class names

#     # Assign class names to each record
#     class_assignments = []
#     for i, record in enumerate(group.index):
#         class_assignments.append(class_names[i // 30])  # Assign class based on record index

#     df.loc[group.index, 'Class'] = class_assignments

# # Save the updated DataFrame
# df.to_csv("Datasets/dummyFeedbackdataWithclasses.csv", index=False)

### Qwen

In [None]:
df = pd.read_csv("Datasets/dummyFeedbackdataWithclasses.csv")
model_name = 'qwen2.5-coder:3b'  # Replace with your model name
for count, idx in enumerate(indices_to_process, start=1):
    feedback = df.at[idx, 'Comments']
    try:
        result_json = ask_ollama(feedback, system_prompt, 'qwen2.5-coder:3b')
        df.at[idx, "llm_response"] = result_json  # Save raw LLM response
        
        result_dict = parse_json_safe(result_json)

        if result_dict:
            for aspect in aspects:
                if aspect in result_dict:
                    df.at[idx, f"{aspect}_terms"] = result_dict[aspect].get("Extracted Phrase", "")
                    df.at[idx, f"{aspect}_polarity"] = result_dict[aspect].get("Polarity", "")
    except Exception as e:
        print(f"Error at index {idx}: {e}")
        continue

    if count % batch_size == 0 or count == len(indices_to_process):
        df.loc[indices_to_process].to_csv("Datasets/checkpoint_feedback_Qwen2.csv", index=False)
        print(f"Checkpoint saved after {count} rows.")


Error at index 48: 'NoneType' object has no attribute 'get'
JSON parsing failed: Invalid control character at: line 10 column 22 (char 202)
Raw response: {
  "Teaching Pedagogy": {
    "Extracted Phrase": "Very good teacher",
    "Polarity": "Positive"
  },
  "Knowledge": {
    "Extracted Phrase": "...",
    "Polarity": "..."
  },  
  "Fair in Assessment
": {
    "Extracted Phrase": "...",
    "Polarity": "..."
  },
  "Experience": {
    "Extracted Phrase": "...",
    "Polarity": "..."
  },
  "Behavior": {
    "Extracted Phrase": "...",
    "Polarity": "..."
  }
}
Checkpoint saved after 10 rows.
Checkpoint saved after 20 rows.
JSON parsing failed: Invalid control character at: line 10 column 22 (char 215)
Raw response: ```json
{
  "Teaching Pedagogy": {
    "Extracted Phrase": "being there in your consulting hours",
    "Polarity": "Positive"
  },
  "Knowledge": {
    "Extracted Phrase": "",
    "Polarity": ""
  },  
  "Fair in Assessment
": {
    "Extracted Phrase": "",
    "Polarity"