In [2]:
# Install Dependencies
%pip install ollama
%pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [3]:
# Import
from collections import Counter
from ollama import AsyncClient
import json
import asyncio
import base64
import os
import re
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

In [4]:
# Regex for extracting labeled folder containing sentiment and model output
def extract_sentiment(target):
  pattern = r'(Positive|Negative|Neutral)'
  match = re.search(pattern, target, re.IGNORECASE)
  return match.group(1) if match else None

In [5]:
# Function to encode the image
def encode_b64image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode("utf-8")

In [6]:
async def image_input(img_path, model, temperature, top_p, prompt, examples):
    base64_image = encode_b64image(img_path) 
    system_prompt = {
        'role': 'system',
        'content': prompt
    }
    user_prompt = {
        'role': 'user',
        'content': '<task>Analyze the primary sentiment of the person in the provided image.</task>',
        'images': [base64_image]
    }
    
    # Await the single, complete response from the API
    response = await AsyncClient().chat(
        model=model,
        options={
            'temperature': temperature,
            'top_p': top_p,
        },
        messages = [system_prompt] + examples + [user_prompt],
    )
    
    content = extract_sentiment(response['message']['content'])
    
    return content

In [7]:
# Iterating images in the dataset
def image_iterator(dataset_path, label):
  files_labelled = []

  for root, dir, files in os.walk(dataset_path):

    # Process image files
    sentiment = extract_sentiment(root)

    if sentiment == label:
      dir.sort()
      files.sort()

      for file in files:
        if file.endswith('.jpg'):
          file_path = os.path.join(root, file)
          files_labelled.append({
              'path': file_path,
              'label': sentiment
          })

  return files_labelled

In [8]:
def get_majority_vote(predictions, fallback='Neutral'):
    """
    Determines the majority vote from a list of predictions.
    If there is a tie, it returns the fallback value.
    """
    valid_predictions = [p for p in predictions if p]
    if not valid_predictions:
        return fallback

    votes = Counter(valid_predictions)
    most_common_votes = votes.most_common(2)

    if len(most_common_votes) == 1 or most_common_votes[0][1] > most_common_votes[1][1]:
        return most_common_votes[0][0]
    else:
        print('Warning: Fallback Occurred.')
        return fallback

In [9]:
async def image_prediction(image_path, model, temperature, top_p, prompt, examples, num_runs=3):
    # Create and run 'num_runs' tasks for the same image concurrently
    tasks = [image_input(image_path, model, temperature, top_p, prompt, examples) for _ in range(num_runs)]
    raw_predictions = await asyncio.gather(*tasks)
    return raw_predictions

async def api_request(dataset, model, temperature, top_p, prompt, examples=None, num_runs=3):
    processed_results = []
    if examples is None:
        examples = []

    # Iterate through the dataset with a standard for loop
    for i, result in enumerate(dataset):
        path = '/'.join(result['path'].split('/')[-4:])
        print(f'It{i+1}: Processing {path} for {num_runs} runs...')

        try:
            raw_predictions = await image_prediction(result['path'], model, temperature, top_p, prompt, examples, num_runs)
            
            # Determine the final prediction using the majority vote logic
            final_prediction = get_majority_vote(raw_predictions, fallback='Neutral')
            
            result['predictions_raw'] = raw_predictions
            result['prediction'] = final_prediction
            
            print(f"It{i+1}: Raw: {raw_predictions} -> Final: {final_prediction} for {path}")

        except Exception as e:
            print(f'Error processing {path}: {e}')
            result['predictions_raw'] = []
            result['prediction'] = 'Error'
        
        processed_results.append(result)

    return processed_results

In [10]:
dataset_path = './Dataset/Face'

print(os.listdir(dataset_path))

dataset_positive = image_iterator(dataset_path, 'Positive')
dataset_negative = image_iterator(dataset_path, 'Negative')
dataset_neutral = image_iterator(dataset_path, 'Neutral')

print(len(dataset_positive))
print(len(dataset_negative))
print(len(dataset_neutral))

FileNotFoundError: [Errno 2] No such file or directory: './Dataset/Face'

In [10]:
# Split positive
dataset_positive_train, dataset_positive_test = train_test_split(
    dataset_positive, test_size=0.20, shuffle=True, random_state=0)

# Split negative
dataset_negative_train, dataset_negative_test = train_test_split(
    dataset_negative, test_size=0.20, shuffle=True, random_state=0)

# Split neutral
dataset_neutral_train, dataset_neutral_test = train_test_split(
    dataset_neutral, test_size=0.20, shuffle=True, random_state=0)

print(len(dataset_positive_train))
print(len(dataset_positive_test))
print(len(dataset_negative_train))
print(len(dataset_negative_test))
print(len(dataset_neutral_train))
print(len(dataset_neutral_test))

2159
540
4364
1092
3222
806


In [11]:
dataset_test = dataset_positive_test + dataset_negative_test + dataset_neutral_test

print(len(dataset_test))

2438


In [12]:
simple_prompt = '<output_format>Respond with exactly one word from these options: Positive, Negative, or Neutral</output_format>'
system_prompt = """
You are an expert at analyzing facial expressions and body language to determine emotional sentiment.

<instructions>
Focus on these visual cues:
- Facial expressions (smile, frown, eyebrow position, eye expression)
- Body posture and positioning  
- Overall emotional cues visible in the image
</instructions>

<output_format>
Respond with exactly one word from these options: Positive, Negative, or Neutral
</output_format>
"""

facs_prompt = """
You are an expert at analyzing facial expressions using the Facial Action Coding System (F.A.C.S.) to determine emotional sentiment.

<facs_guidelines>
Analyze these specific Facial Action Units (AUs) to determine sentiment:

**POSITIVE SENTIMENT INDICATORS:**
- AU 6 (Cheek Raiser): Raised cheeks, crow's feet around eyes
- AU 12 (Lip Corner Puller): Corners of mouth pulled upward
- AU 25 (Lips Part): Relaxed, slightly parted lips
- AU 26 (Jaw Drop): Slight jaw opening in genuine smile
- Combination: AU 6 + AU 12 = Genuine happiness (Duchenne smile)

**NEGATIVE SENTIMENT INDICATORS:**
- AU 1 (Inner Brow Raiser): Inner portions of eyebrows raised
- AU 4 (Brow Lowerer): Entire brow pulled down and together
- AU 15 (Lip Corner Depressor): Corners of mouth pulled downward
- AU 17 (Chin Raiser): Lower lip pushed up, chin wrinkled
- AU 20 (Lip Stretcher): Lips stretched horizontally
- Combination: AU 1 + AU 4 + AU 15 = Clear negative sentiment

**NEUTRAL SENTIMENT INDICATORS:**
- AU 0: Absence of significant facial muscle activation
- Relaxed facial state with minimal AU activation
- Slight AU activation that doesn't clearly indicate positive or negative

**MIXED/AMBIGUOUS PATTERNS:**
- Conflicting AUs (e.g., AU 12 + AU 4): Analyze dominant pattern
- Micro-expressions: Brief, subtle AU activations
- Masked emotions: Deliberate suppression of natural expression
</facs_guidelines>

<analysis_process>
1. Systematically examine each facial region (brows, eyes, cheeks, mouth, chin)
2. Identify active Action Units based on visible muscle contractions
3. Apply F.A.C.S. combination rules to determine sentiment
4. Consider intensity and genuineness of expressions
5. Account for cultural and individual variations in expression
</analysis_process>

<output_format>
Respond with exactly one word from these options: Positive, Negative, or Neutral
</output_format>
"""

In [13]:
examples_3 = [
    {
        'role': 'user',
        'content': '''<task>Analyze the primary sentiment of the person in this image.</task>
        <image_description>Person with clear positive facial expression</image_description>''',
        'images': [encode_b64image('./Dataset/Face/Positive/01-01-03-02-02-02-11/20.jpg')]
    },
    {
        'role': 'assistant', 
        'content': '<sentiment>Positive</sentiment>', 
    },
    {
        'role': 'user', 
        'content': '''<task>Analyze the primary sentiment of the person in this image.</task>
        <image_description>Person with clear negative facial expression</image_description>''', 
        'images': [encode_b64image('./Dataset/Face/Negative/01-01-05-02-02-02-07/12.jpg')]
    },
    {
        'role': 'assistant', 
        'content': '<sentiment>Negative</sentiment>',
    },
    {
        'role': 'user', 
        'content': '''<task>Analyze the primary sentiment of the person in this image.</task>
        <image_description>Person with neutral facial expression</image_description>''', 
        'images': [encode_b64image('./Dataset/Face/Neutral/01-01-02-01-02-01-11/1.jpg')]
    },
    {
        'role': 'assistant', 
        'content': '<sentiment>Neutral</sentiment>', 
    }
]

In [26]:
# Vision Model List
#
# llama4:scout
# qwen2.5vl:32b-q8_0
# mistral-small3.2:24b-instruct-2506-q8_0
# gemma3:27b-it-q8_0
# llama3.2-vision:11b-instruct-q8_0

model = 'llama4:scout'
temperature = 0 # 0, 0.8
top_p = 0.07 # 0.07, 1

results = await api_request(\
    dataset=dataset_test, model=model, temperature=temperature, top_p=top_p, prompt=facs_prompt, num_runs=3)

# results = await api_request(\
#     dataset=dataset_test, model=model, temperature=temperature, top_p=top_p, prompt=system_prompt, examples=examples_3, num_runs=3)


It1: Processing Face/Positive/01-01-03-02-02-02-04/3.jpg for 3 runs...
It1: Raw: ['positive', 'positive', 'positive'] -> Final: positive for Face/Positive/01-01-03-02-02-02-04/3.jpg
It2: Processing Face/Positive/01-01-03-02-02-02-07/17.jpg for 3 runs...
It2: Raw: ['positive', 'positive', 'positive'] -> Final: positive for Face/Positive/01-01-03-02-02-02-07/17.jpg
It3: Processing Face/Positive/01-01-03-01-02-01-13/7.jpg for 3 runs...
It3: Raw: ['neutral', 'neutral', 'neutral'] -> Final: neutral for Face/Positive/01-01-03-01-02-01-13/7.jpg
It4: Processing Face/Positive/01-01-03-02-02-02-07/11.jpg for 3 runs...
It4: Raw: ['positive', 'positive', 'positive'] -> Final: positive for Face/Positive/01-01-03-02-02-02-07/11.jpg
It5: Processing Face/Positive/01-01-03-02-01-02-06/4.jpg for 3 runs...
It5: Raw: ['positive', 'positive', 'positive'] -> Final: positive for Face/Positive/01-01-03-02-01-02-06/4.jpg
It6: Processing Face/Positive/01-01-03-02-02-02-19/10.jpg for 3 runs...
It6: Raw: ['negati

In [1]:
results

NameError: name 'results' is not defined

In [22]:
# Export results to .csv
def export_to_csv(results, examples=None, few_shot=False, prefix='face_results'):
    if few_shot is True:
        examples_count = int(len(examples) / 2)
        filename = f'{prefix}_{examples_count}_shot_{model}_{temperature}_{top_p}.csv'
    else:
        filename = f'{prefix}_{model}_{temperature}_{top_p}.csv'
    df = pd.DataFrame(results)
    df.to_csv(filename, index=False)
    print(f"Results exported to {filename}")

In [23]:
# export_to_csv(results, examples_3, few_shot=True)
# export_to_csv(results)
export_to_csv(results, prefix='face_results_facs')

Results exported to face_results_facs_qwen2.5vl:32b-q8_0_0.8_1.csv


In [24]:
def results_report_and_export_json(results, examples=None, few_shot=False, prefix='face_metrics'): 

    # Extract ground truth labels and predictions
    y_true = [item['label'] for item in results]
    y_pred = [item['prediction'] for item in results]

    # Calculate basic metrics
    accuracy = accuracy_score(y_true, y_pred)

    # Get unique labels for multi-class metrics
    labels = list(set(y_true + y_pred))

    # Calculate precision, recall, F1 with different averaging methods (macro and micro)
    precision_macro = precision_score(y_true, y_pred, average='macro', zero_division=0)
    recall_macro = recall_score(y_true, y_pred, average='macro', zero_division=0)
    f1_macro = f1_score(y_true, y_pred, average='macro', zero_division=0)

    precision_micro = precision_score(y_true, y_pred, average='micro', zero_division=0)
    recall_micro = recall_score(y_true, y_pred, average='micro', zero_division=0)
    f1_micro = f1_score(y_true, y_pred, average='micro', zero_division=0)

    # Calculate per-class metrics
    precision_per_class = precision_score(y_true, y_pred, average=None, zero_division=0, labels=labels)
    recall_per_class = recall_score(y_true, y_pred, average=None, zero_division=0, labels=labels)
    f1_per_class = f1_score(y_true, y_pred, average=None, zero_division=0, labels=labels)

    # Create metrics dictionary
    metrics = {
        'accuracy': accuracy,
        'precision_macro': precision_macro,
        'recall_macro': recall_macro,
        'f1_macro': f1_macro,
        'precision_micro': precision_micro,
        'recall_micro': recall_micro,
        'f1_micro': f1_micro,
        'per_class_metrics': {
            label: {
                'precision': precision_per_class[i],
                'recall': recall_per_class[i],
                'f1': f1_per_class[i]
            } for i, label in enumerate(labels)
        }
    }

    print("="*50)
    print("ACCURACY METRICS REPORT")
    print("="*50)

    print(f"Total samples: {len(results)}")
    print(f"Accuracy: {metrics['accuracy']:.4f}")
    print()

    print("Macro-averaged metrics:")
    print(f"  Precision: {metrics['precision_macro']:.4f}")
    print(f"  Recall: {metrics['recall_macro']:.4f}")
    print(f"  F1-score: {metrics['f1_macro']:.4f}")
    print()

    print("Micro-averaged metrics:")
    print(f"  Precision: {metrics['precision_micro']:.4f}")
    print(f"  Recall: {metrics['recall_micro']:.4f}")
    print(f"  F1-score: {metrics['f1_micro']:.4f}")
    print()

    print("Per-class metrics:")
    for label, class_metrics in metrics['per_class_metrics'].items():
        print(f"  {label}:")
        print(f"    Precision: {class_metrics['precision']:.4f}")
        print(f"    Recall: {class_metrics['recall']:.4f}")
        print(f"    F1-score: {class_metrics['f1']:.4f}")
    print()

    print("Detailed Classification Report:")
    print(classification_report(y_true, y_pred))

    print("Confusion Matrix:")
    cm = confusion_matrix(y_true, y_pred)
    labels = sorted(list(set(y_true + y_pred)))
    print(f"Labels: {labels}")
    print(cm)

    print("="*50)

    if few_shot is True:
        examples_count = int(len(examples) / 2)
        filename = f'{prefix}_{examples_count}_shot_{model}_{temperature}_{top_p}.json'
    else:
        filename = f'{prefix}_{model}_{temperature}_{top_p}.json'
    
    with open(filename, 'w') as f:
        json.dump(metrics, f, indent=2)
    
    print(f"Exported to {filename}")

In [25]:
# results_report_and_export_json(results, examples_3, few_shot=True)
# results_report_and_export_json(results)
results_report_and_export_json(results, prefix='face_metrics_facs')

ACCURACY METRICS REPORT
Total samples: 2438
Accuracy: 0.5373

Macro-averaged metrics:
  Precision: 0.6684
  Recall: 0.6067
  F1-score: 0.5464

Micro-averaged metrics:
  Precision: 0.5373
  Recall: 0.5373
  F1-score: 0.5373

Per-class metrics:
  Neutral:
    Precision: 0.4055
    Recall: 0.7320
    F1-score: 0.5219
  Negative:
    Precision: 0.9527
    Recall: 0.2399
    F1-score: 0.3833
  Positive:
    Precision: 0.6469
    Recall: 0.8481
    F1-score: 0.7340

Detailed Classification Report:
              precision    recall  f1-score   support

    Negative       0.95      0.24      0.38      1092
     Neutral       0.41      0.73      0.52       806
    Positive       0.65      0.85      0.73       540

    accuracy                           0.54      2438
   macro avg       0.67      0.61      0.55      2438
weighted avg       0.70      0.54      0.51      2438

Confusion Matrix:
Labels: ['Negative', 'Neutral', 'Positive']
[[262 784  46]
 [ 12 590 204]
 [  1  81 458]]
Exported to fa