In [None]:
# 07_Baseline_Gemma3_Testing.ipynb
import os
import sys
import json
import pandas as pd
import numpy as np
from pathlib import Path
from tqdm.notebook import tqdm
import time
import subprocess
from dotenv import load_dotenv
import google.generativeai as genai
from IPython.display import display, Markdown

# Add project directory to path for imports
sys.path.append('..')

# Load environment variables
load_dotenv()

True

In [2]:
# Configure Google Generative AI
genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))

## Load Test Dataset

In [3]:
# Load test dataset
print("Loading test dataset...")
data_dir = Path("../Data/GeneratedCases")
test_file = data_dir / "test_cases.parquet"

if not test_file.exists():
    print(f"⚠️ Could not find {test_file}")
    # Find available parquet files
    parquet_files = list(data_dir.glob("*test*.parquet"))
    if parquet_files:
        print(f"Found these test parquet files: {[f.name for f in parquet_files]}")
        test_file = parquet_files[0]
        print(f"Using: {test_file}")
    else:
        # Try to use the main evaluated file and split it
        eval_file = data_dir / "evaluated_cases.parquet"
        if eval_file.exists():
            print(f"Using {eval_file} and splitting it into train/test...")
            df = pd.read_parquet(eval_file)
            from sklearn.model_selection import train_test_split
            _, test_df = train_test_split(df, test_size=0.2, random_state=42)
            print(f"Created test set with {len(test_df)} cases")
        else:
            raise FileNotFoundError(f"No test dataset files found in {data_dir}")
else:
    test_df = pd.read_parquet(test_file)
    
print(f"Loaded {len(test_df)} test cases")

# Display a sample of the test dataset
print("\nSample test case:")
sample = test_df.iloc[0]
print(f"Case ID: {sample['case_id']}")
print(f"Title: {sample['title']}")
print(f"Enhanced case (first 200 chars): {sample['enhanced_case'][:200]}...")

Loading test dataset...
Loaded 62 test cases

Sample test case:
Case ID: case-20250329-001124-k553a3
Title: Baltic Bottleneck: Navigating Congestion and Compliance in Alpha Shippings Asia-Europe Route
Enhanced case (first 200 chars): **Case Title: Baltic Bottleneck: Navigating Congestion and Compliance in Alpha Shipping's Asia-Europe Route** Alpha Shipping, a medium-sized container shipping company based in Hong Kong, specializes ...


In [4]:
test_df.head()

Unnamed: 0,case_id,title,enhanced_case,solution,file_path,enhanced_case_length,solution_length,realism_score,complexity_score,educational_value,solution_quality,overall_qualification,evaluation_summary,improvement_suggestions,case_for_embedding
183,case-20250329-001124-k553a3,Baltic Bottleneck: Navigating Congestion and C...,**Case Title: Baltic Bottleneck: Navigating Co...,## Executive Summary Alpha Shipping faces chal...,/Users/max/Documents/Code/magdeburg25/Output/G...,5821,85737,8.0,7.0,8.0,7.0,QUALIFIED,The case study presents a realistic scenario o...,[The solution could benefit from a more detail...,Baltic Bottleneck: Navigating Congestion and C...
60,case-20250329-082454-w9yo73,Gdansk Gridlock: Nordic SeaLines Struggle for ...,**Case Title: Gdansk Gridlock: Nordic SeaLines...,## Executive Summary Nordic SeaLines faces a s...,/Users/max/Documents/Code/magdeburg25/Output/G...,6872,82054,8.0,7.0,8.0,7.0,QUALIFIED,The case study presents a realistic scenario o...,[The solution could benefit from a more detail...,Gdansk Gridlock: Nordic SeaLines Struggle for ...
124,case-20250330-081631-9ahbtr,**Baltic Squeeze: Navigating Congestion and Su...,**Case Study: Baltic Squeeze: Navigating Conge...,## Executive Summary Baltic Breeze Seafoods (B...,/Users/max/Documents/Code/magdeburg25/Output/G...,7054,8970,8.0,7.0,9.0,8.0,QUALIFIED,The case study presents a realistic scenario o...,[Expand on the specific KPIs (Key Performance ...,**Baltic Squeeze: Navigating Congestion and Su...
93,case-20250329-210212-zq31iw,**OceanExs Baltic Gamble: Navigating Supply Ch...,"**Scenario:** OceanEx Logistics, a rapidly gro...",## Executive Summary OceanEx Logistics is faci...,/Users/max/Documents/Code/magdeburg25/Output/G...,7074,83931,9.0,8.0,9.0,8.0,QUALIFIED,The case study presents a highly realistic sce...,[Expand on the risk assessment for using Klaip...,**OceanExs Baltic Gamble: Navigating Supply Ch...
63,case-20250330-073349-b2bxaz,Navigating Arctic Ambitions: A Polar Silk Rout...,"**Scenario:** Klaus Weber, Head of Route Optim...",## Executive Summary Nordic Star Logistics fac...,/Users/max/Documents/Code/magdeburg25/Output/G...,8644,113848,8.0,9.0,9.0,8.0,QUALIFIED,The case study presents a realistic and comple...,[The cost-benefit analysis could be enhanced b...,Navigating Arctic Ambitions: A Polar Silk Rout...


## Prepare Cases for Testing

In [5]:
# Prepare cases for testing - extract only what we need
test_cases = []
for _, row in test_df.iterrows():
    test_cases.append({
        "case_id": row["case_id"],
        "title": row["title"],
        "case_text": row["enhanced_case"]
    })

print(f"\nPrepared {len(test_cases)} cases for testing")


Prepared 62 cases for testing


## Set Up Gemma 3 Model

In [6]:
# Set up Gemma 3 model
print("\nSetting up Gemma 3 model...")

# List available models to confirm Gemma 3 access
models = [m.name for m in genai.list_models() 
         if 'generateContent' in m.supported_generation_methods]

print("Available models:")
for model in models:
    print(f" - {model}")

# Select the appropriate Gemma 3 model
gemma_models = [m for m in models if 'gemma' in m.lower()]
if gemma_models:
    model_name = gemma_models[0]  # Use the first available Gemma model
    for m in gemma_models:
        if '3' in m and ('27b' in m or '27-b' in m):  # Prefer Gemma 3 27B if available
            model_name = m
            break
    print(f"\nUsing model: {model_name}")
else:
    print("⚠️ No Gemma models found. Using default model.")
    model_name = "gemini-1.5-pro"  # Fallback option


Setting up Gemma 3 model...
Available models:
 - models/gemini-1.0-pro-vision-latest
 - models/gemini-pro-vision
 - models/gemini-1.5-pro-latest
 - models/gemini-1.5-pro-001
 - models/gemini-1.5-pro-002
 - models/gemini-1.5-pro
 - models/gemini-1.5-flash-latest
 - models/gemini-1.5-flash-001
 - models/gemini-1.5-flash-001-tuning
 - models/gemini-1.5-flash
 - models/gemini-1.5-flash-002
 - models/gemini-1.5-flash-8b
 - models/gemini-1.5-flash-8b-001
 - models/gemini-1.5-flash-8b-latest
 - models/gemini-1.5-flash-8b-exp-0827
 - models/gemini-1.5-flash-8b-exp-0924
 - models/gemini-2.5-pro-exp-03-25
 - models/gemini-2.0-flash-exp
 - models/gemini-2.0-flash
 - models/gemini-2.0-flash-001
 - models/gemini-2.0-flash-exp-image-generation
 - models/gemini-2.0-flash-lite-001
 - models/gemini-2.0-flash-lite
 - models/gemini-2.0-flash-lite-preview-02-05
 - models/gemini-2.0-flash-lite-preview
 - models/gemini-2.0-pro-exp
 - models/gemini-2.0-pro-exp-02-05
 - models/gemini-exp-1206
 - models/gemin

## Create Results Directory and Solution Generator Function

In [7]:
# Create results directory
results_dir = Path("../Results/Baseline_Gemma3")
results_dir.mkdir(parents=True, exist_ok=True)

# Function to generate solution using Gemma 3
def generate_solution(case_data, model_name=model_name):
    """Generate a solution for a case using Gemma 3"""
    # Create prompt
    prompt = f"""Please provide a solution for the following business case:

Title: {case_data['title']}

CASE Description:
{case_data['case_text']}
"""
    
    try:
        # Generate response
        model = genai.GenerativeModel(model_name=model_name)
        response = model.generate_content(
            prompt,
            generation_config=genai.types.GenerationConfig(
                temperature=0.7,
                max_output_tokens=8192,
            )
        )
        
        # Return results
        return {
            "case_id": case_data["case_id"],
            "title": case_data["title"],
            "solution": response.text,
            "prompt": prompt,
            "model": model_name
        }
    except Exception as e:
        print(f"Error generating solution for case {case_data['case_id']}: {e}")
        return {
            "case_id": case_data["case_id"],
            "title": case_data["title"],
            "solution": f"ERROR: {str(e)}",
            "prompt": prompt,
            "model": model_name
        }

## Test with a Single Case

In [8]:
# Test with a single case first
print("\nTesting with a single case...")
test_case = test_cases[0]
test_result = generate_solution(test_case)

print(f"\nTest result for case: {test_result['title']}")
print(f"Solution (first 200 chars): {test_result['solution'][:200]}...")


Testing with a single case...

Test result for case: Baltic Bottleneck: Navigating Congestion and Compliance in Alpha Shippings Asia-Europe Route
Solution (first 200 chars): ## Alpha Shipping: Baltic Bottleneck - Solution Proposal

Here's a comprehensive solution for Alpha Shipping, addressing the challenges outlined in the case study. This solution employs a multi-pronge...


## Process All Cases

In [9]:
# Check if caffeinate is already running, if not start it
if 'caffeinate_process' not in locals() or caffeinate_process.poll() is not None:
    print("Starting caffeinate to prevent Mac from sleeping...")
    caffeinate_process = subprocess.Popen(["caffeinate", "-i", "-d", "-m"], 
                                         stdout=subprocess.DEVNULL, 
                                         stderr=subprocess.DEVNULL)
else:
    print("Caffeinate already running...")

print(f"\nRunning baseline test on all {len(test_cases)} cases...")
results = []

try:
    for i, case in enumerate(tqdm(test_cases)):
        # Generate solution
        result = generate_solution(case)
        results.append(result)
        
        # Save the individual result
        case_file = results_dir / f"{case['case_id']}_baseline.json"
        with open(case_file, 'w') as f:
            json.dump(result, f, indent=2)
        
        # Pause between requests to avoid rate limits
        if i < len(test_cases) - 1:  # Don't sleep after last case
            time.sleep(5)
    
    # The following should be OUTSIDE the loop:
    # Save all results to a single file (once at the end, not after each case)
    all_results_file = results_dir / "all_baseline_results.json"
    with open(all_results_file, 'w') as f:
        json.dump(results, f, indent=2)
    
    # Also save as a parquet file for easier analysis
    results_df = pd.DataFrame(results)
    results_df.to_parquet(results_dir / "baseline_results.parquet")
    
    print(f"\n✅ Completed baseline testing for {len(results)} cases")
    print(f"Results saved to {results_dir}")

finally:
    # Make sure to terminate caffeinate
    if 'caffeinate_process' in locals():
        print("Terminating caffeinate - your Mac can sleep again...")
        caffeinate_process.terminate()

Starting caffeinate to prevent Mac from sleeping...

Running baseline test on all 62 cases...


  0%|          | 0/62 [00:00<?, ?it/s]


✅ Completed baseline testing for 62 cases
Results saved to ../Results/Baseline_Gemma3
Terminating caffeinate - your Mac can sleep again...


In [10]:
results_df.head()

Unnamed: 0,case_id,title,solution,prompt,model
0,case-20250329-001124-k553a3,Baltic Bottleneck: Navigating Congestion and C...,## Baltic Bottleneck: Alpha Shipping - Solutio...,Please provide a solution for the following bu...,models/gemma-3-27b-it
1,case-20250329-082454-w9yo73,Gdansk Gridlock: Nordic SeaLines Struggle for ...,## Nordic SeaLines: Gdansk Gridlock - Strategi...,Please provide a solution for the following bu...,models/gemma-3-27b-it
2,case-20250330-081631-9ahbtr,**Baltic Squeeze: Navigating Congestion and Su...,## Baltic Breeze Seafoods: Solution - Navigati...,Please provide a solution for the following bu...,models/gemma-3-27b-it
3,case-20250329-210212-zq31iw,**OceanExs Baltic Gamble: Navigating Supply Ch...,## OceanExs Baltic Gamble: Solution Proposal f...,Please provide a solution for the following bu...,models/gemma-3-27b-it
4,case-20250330-073349-b2bxaz,Navigating Arctic Ambitions: A Polar Silk Rout...,## Navigating Arctic Ambitions: A Polar Silk R...,Please provide a solution for the following bu...,models/gemma-3-27b-it
