# ================================================================
# SECTION 1: OLLAMA SETUP FOR SENTIMENT ANALYSIS
# ================================================================

In [None]:
!curl -fsSL https://ollama.com/install.sh | sh
!pip install -qq pyngrok ollama

🚀 Installing Ollama and required packages...
This will enable us to run Llama3 locally for sentiment classification
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
✅ Installation complete!


In [None]:
import subprocess
import os
import time

def start_ollama_server_with_gpu():
    """Starts Ollama server optimized for 2x T4 GPU setup."""    

    # Set GPU environment variables for optimal performance
    gpu_env = os.environ.copy()
    gpu_env['CUDA_VISIBLE_DEVICES'] = '0,1'
    gpu_env['OLLAMA_NUM_PARALLEL'] = '2'
    gpu_env['OLLAMA_MAX_LOADED_MODELS'] = '1'
    
    # Ollama installation path
    ollama_path = '/usr/local/bin/ollama'
    if not os.path.exists(ollama_path):
        print("⚠️  Ollama not found at /usr/local/bin, trying system PATH...")
        ollama_path = 'ollama'
    
    try:
        # Check if server is already running
        try:
            subprocess.run(['pgrep', '-f', 'ollama serve'], check=True, 
                          stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            print("🔄 Ollama server already running, checking GPU status...")
        except subprocess.CalledProcessError:
            print("🚀 Starting Ollama server with GPU optimization...")
            # Start with GPU environment
            subprocess.Popen([ollama_path, 'serve'], env=gpu_env)
            print("✅ Ollama server started with GPU acceleration!")
        
        print("🕐 Initializing GPU memory and model cache...")
        
    except Exception as e:
        print(f"❌ Error starting Ollama server: {e}")
        return False
    
    return True

def verify_gpu_setup():
    """Verify GPU detection and availability."""
    try:
        # Check NVIDIA GPUs
        result = subprocess.run(['nvidia-smi', '--query-gpu=index,name,memory.total,memory.free', 
                               '--format=csv,noheader,nounits'], 
                               capture_output=True, text=True)
        
        if result.returncode == 0:
            gpu_info = result.stdout.strip().split('\n')
            print("📊 Detected GPUs:")
            for i, gpu in enumerate(gpu_info):
                print(f"   GPU {i}: {gpu}")
            
            if len(gpu_info) >= 2:
                print("✅ Dual GPU setup confirmed for optimal large model performance")
                return True
            else:
                print("⚠️  Less than 2 GPUs detected")
        else:
            print("❌ No NVIDIA GPUs detected")
            
    except Exception as e:
        print(f"❌ GPU verification failed: {e}")
    
    return False

# Start optimized server
if start_ollama_server_with_gpu():
    # Verify GPU setup
    gpu_ok = verify_gpu_setup()
    
    # Extended initialization time for large models
    initialization_time = 12 if gpu_ok else 8
    print(f"⏱️  Waiting {initialization_time}s for GPU initialization...")
    time.sleep(initialization_time)
    
    if gpu_ok:
        print("🎯 Server ready for GPU-accelerated sentiment analysis!")
        print("🚀 Optimized for Qwen2.5:14b and other large models")
    else:
        print("⚠️  Server started but GPU optimization may be limited")
else:
    print("💥 Failed to start server - check installation and GPU drivers")

🔧 Starting Ollama server with GPU acceleration for Banglish sentiment analysis...
🎯 GPU Configuration:
   • CUDA_VISIBLE_DEVICES: 0,1 (Both T4 GPUs)
   • OLLAMA_NUM_PARALLEL: 2 (Parallel processing)
   • Optimized for 14B parameter models
🚀 Starting Ollama server with GPU optimization...
✅ Ollama server started with GPU acceleration!
🕐 Initializing GPU memory and model cache...

🔍 GPU Verification:
Couldn't find '/root/.ollama/id_ed25519'. Generating new private key.
Your new public key is: 

ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHs2NBN8sD4C7jnfS6tGvlrbITiBz7CqaHCk6AcHQonY

📊 Detected GPUs:
   GPU 0: 0, Tesla T4, 15360, 15095
   GPU 1: 1, Tesla T4, 15360, 15095
✅ Dual GPU setup confirmed for optimal large model performance
⏱️  Waiting 12s for GPU initialization...


time=2025-07-30T11:12:17.432Z level=INFO source=routes.go:1235 msg="server config" env="map[CUDA_VISIBLE_DEVICES:0,1 GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:4096 OLLAMA_DEBUG:INFO OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://127.0.0.1:11434 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:1 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/root/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:2 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_SCHED_SPREAD:false ROCR_VISIBLE_DEV

🎯 Server ready for GPU-accelerated sentiment analysis!
🚀 Optimized for Qwen2.5:14b and other large models


# ==========================================================================
# SECTION 2: GPU DETECTION & ADVANCED MODEL SETUP FOR MULTILINGUAL SENTIMENT
# ==========================================================================

In [None]:
# Check GPU detection
!nvidia-smi

print("\n📊 GPU Memory and Configuration:")
!nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv

# Pull Qwen2.5:14b - Proven model with superior Bangla understanding
!ollama pull qwen2.5:14b

# Alternative models to consider if Qwen2.5:14b is not available:
print("\n📋 Model Information:")
print("Primary: qwen2.5:14b (14B parameters) - Proven excellent for Banglish")
print("Fallback: llama3.1:8b (8B parameters) - Good multilingual support")
print("GPU Optimized: Uses both T4 GPUs for faster inference")

🔥 Checking GPU availability for enhanced performance...
Wed Jul 30 11:12:29 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.03              Driver Version: 560.35.03      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   41C    P8              9W /   70W |       3MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+------

time=2025-07-30T11:12:30.614Z level=INFO source=download.go:177 msg="downloading 2049f5674b1e in 16 561 MB part(s)"


[?2026h[?25l[1Gpulling manifest ⠇ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest [K
pulling 2049f5674b1e:   0% ▕                  ▏  11 MB/9.0 GB                  [K[?25h[?2026l[?2026h[?25l[A[1Gpulling manifest [K
pulling 2049f5674b1e:   1% ▕                  ▏  78 MB/9.0 GB                  [K[?25h[?2026l[?2026h[?25l[A[1Gpulling manifest [K
pulling 2049f5674b1e:   1% ▕                  ▏ 116 MB/9.0 GB                  [K[?25h[?2026l[?2026h[?25l[A[1Gpulling manifest [K
pulling 2049f5674b1e:   2% ▕                  ▏ 200 MB/9.0 GB                  [K[?25h[?2026l[?2026h[?25l[A[1Gpulling manifest [K
pulling 2049f5674b1e:   3% ▕                  ▏ 277 MB/9.0 GB                  [K[?25h[?2026l[?2026h[?25l[A[1Gpulling manifest [K
pulling 2049f5674b1e:   4% ▕                  ▏ 323 MB/9.0 GB                  [K[?25h[?2026l[?2026h[?25l[A[1Gpulling manifest [K
pulling 2049f5674b1e:   4% ▕                  ▏ 403 MB/9.0 GB           

time=2025-07-30T11:12:49.775Z level=INFO source=download.go:177 msg="downloading 66b9ea09bd5b in 1 68 B part(s)"


[?2026h[?25l[A[1Gpulling manifest [K
pulling 2049f5674b1e: 100% ▕██████████████████▏ 9.0 GB                         [K
pulling 66b9ea09bd5b: 100% ▕██████████████████▏   68 B                         [K[?25h[?2026l[?2026h[?25l[A[A[1Gpulling manifest [K
pulling 2049f5674b1e: 100% ▕██████████████████▏ 9.0 GB                         [K
pulling 66b9ea09bd5b: 100% ▕██████████████████▏   68 B                         [K[?25h[?2026l[?2026h[?25l[A[A[1Gpulling manifest [K
pulling 2049f5674b1e: 100% ▕██████████████████▏ 9.0 GB                         [K
pulling 66b9ea09bd5b: 100% ▕██████████████████▏   68 B                         [K[?25h[?2026l[?2026h[?25l[A[A[1Gpulling manifest [K
pulling 2049f5674b1e: 100% ▕██████████████████▏ 9.0 GB                         [K
pulling 66b9ea09bd5b: 100% ▕██████████████████▏   68 B                         [K[?25h[?2026l[?2026h[?25l[A[A[1Gpulling manifest [K
pulling 2049f5674b1e: 100% ▕██████████████████▏ 9.0 G

time=2025-07-30T11:12:50.932Z level=INFO source=download.go:177 msg="downloading eb4402837c78 in 1 1.5 KB part(s)"


[?2026h[?25l[A[A[1Gpulling manifest [K
pulling 2049f5674b1e: 100% ▕██████████████████▏ 9.0 GB                         [K
pulling 66b9ea09bd5b: 100% ▕██████████████████▏   68 B                         [K
pulling eb4402837c78: 100% ▕██████████████████▏ 1.5 KB                         [K[?25h[?2026l[?2026h[?25l[A[A[A[1Gpulling manifest [K
pulling 2049f5674b1e: 100% ▕██████████████████▏ 9.0 GB                         [K
pulling 66b9ea09bd5b: 100% ▕██████████████████▏   68 B                         [K
pulling eb4402837c78: 100% ▕██████████████████▏ 1.5 KB                         [K[?25h[?2026l[?2026h[?25l[A[A[A[1Gpulling manifest [K
pulling 2049f5674b1e: 100% ▕██████████████████▏ 9.0 GB                         [K
pulling 66b9ea09bd5b: 100% ▕██████████████████▏   68 B                         [K
pulling eb4402837c78: 100% ▕██████████████████▏ 1.5 KB                         [K[?25h[?2026l[?2026h[?25l[A[A[A[1Gpulling manifest [K
pulling 2049f56

time=2025-07-30T11:12:57.619Z level=INFO source=download.go:177 msg="downloading 832dd9e00a68 in 1 11 KB part(s)"


[?2026h[?25l[A[A[A[1Gpulling manifest [K
pulling 2049f5674b1e: 100% ▕██████████████████▏ 9.0 GB                         [K
pulling 66b9ea09bd5b: 100% ▕██████████████████▏   68 B                         [K
pulling eb4402837c78: 100% ▕██████████████████▏ 1.5 KB                         [K
pulling 832dd9e00a68: 100% ▕██████████████████▏  11 KB                         [K[?25h[?2026l[?2026h[?25l[A[A[A[A[1Gpulling manifest [K
pulling 2049f5674b1e: 100% ▕██████████████████▏ 9.0 GB                         [K
pulling 66b9ea09bd5b: 100% ▕██████████████████▏   68 B                         [K
pulling eb4402837c78: 100% ▕██████████████████▏ 1.5 KB                         [K
pulling 832dd9e00a68: 100% ▕██████████████████▏  11 KB                         [K[?25h[?2026l[?2026h[?25l[A[A[A[A[1Gpulling manifest [K
pulling 2049f5674b1e: 100% ▕██████████████████▏ 9.0 GB                         [K
pulling 66b9ea09bd5b: 100% ▕██████████████████▏   68 B             

time=2025-07-30T11:12:58.809Z level=INFO source=download.go:177 msg="downloading db59b814cab7 in 1 488 B part(s)"


[?2026h[?25l[A[A[A[A[1Gpulling manifest [K
pulling 2049f5674b1e: 100% ▕██████████████████▏ 9.0 GB                         [K
pulling 66b9ea09bd5b: 100% ▕██████████████████▏   68 B                         [K
pulling eb4402837c78: 100% ▕██████████████████▏ 1.5 KB                         [K
pulling 832dd9e00a68: 100% ▕██████████████████▏  11 KB                         [K
pulling db59b814cab7: 100% ▕██████████████████▏  488 B                         [K[?25h[?2026l[?2026h[?25l[A[A[A[A[A[1Gpulling manifest [K
pulling 2049f5674b1e: 100% ▕██████████████████▏ 9.0 GB                         [K
pulling 66b9ea09bd5b: 100% ▕██████████████████▏   68 B                         [K
pulling eb4402837c78: 100% ▕██████████████████▏ 1.5 KB                         [K
pulling 832dd9e00a68: 100% ▕██████████████████▏  11 KB                         [K
pulling db59b814cab7: 100% ▕██████████████████▏  488 B                         [K[?25h[?2026l[?2026h[?25l[A[A[A[A[A

# =================================================
# GPU & MODEL VERIFICATION FOR OPTIMAL PERFORMANCE
# =================================================

In [None]:
print("🔍 Verifying GPU detection and model availability:")
print("=" * 60)

# Check Ollama models
print("📋 Available Ollama Models:")
!ollama list

print("\n🎯 GPU Detection by Ollama:")
# Check if Ollama detects GPUs properly
import subprocess
try:
    result = subprocess.run(['nvidia-smi', '-L'], capture_output=True, text=True)
    gpu_info = result.stdout
    print("🖥️ GPU Detection:")
    print(gpu_info)
    
    gpu_count = gpu_info.count('GPU')
    print(f"📊 Total GPUs detected: {gpu_count}")
    
    if gpu_count >= 2:
        print("✅ Dual GPU setup confirmed - Optimal for large model inference")
    else:
        print("⚠️  Expected 2 GPUs, but found fewer")
        
except Exception as e:
    print(f"❌ Error checking GPU: {e}")

🔍 Verifying GPU detection and model availability:
📋 Available Ollama Models:
[GIN] 2025/07/30 - 11:13:29 | 200 |      29.465µs |       127.0.0.1 | HEAD     "/"
[GIN] 2025/07/30 - 11:13:29 | 200 |     543.302µs |       127.0.0.1 | GET      "/api/tags"
NAME           ID              SIZE      MODIFIED               
qwen2.5:14b    7cdf5a0187d5    9.0 GB    Less than a second ago    

🎯 GPU Detection by Ollama:
🖥️ GPU Detection:
GPU 0: Tesla T4 (UUID: GPU-97839682-2d9e-bd50-4ad4-b7a7624c74fa)
GPU 1: Tesla T4 (UUID: GPU-4bec2bd8-e0b7-2179-83ae-7af77196e664)

📊 Total GPUs detected: 4
✅ Dual GPU setup confirmed - Optimal for large model inference

🔥 Model Readiness Check:
✅ Qwen3:14b should be listed above for optimal Banglish sentiment analysis
🎯 This latest model provides superior performance for:
   • Advanced Bengali script recognition
   • Enhanced code-switching detection
   • Superior contextual sentiment understanding
   • Advanced emoji-aware analysis


# =========================================
# SECTION 3: COMPETITION DATA EXPLORATION  
# =========================================

In [None]:
print("📊 Loading and exploring competition datasets...")

# Load the datasets
try:
    # Load example dataset (with labels for understanding format)
    if os.path.exists('/kaggle/input/binary-biplob-can-you-decode-emotions/bangla/example.csv'):
        example_df = pd.read_csv('/kaggle/input/binary-biplob-can-you-decode-emotions/bangla/example.csv')
        print("✅ Example dataset loaded successfully!")
        print(f"📈 Example dataset shape: {example_df.shape}")
        print("\n🔍 Example data preview:")
        print(example_df.head())
        if 'label' in example_df.columns:
            print(f"\n📊 Label distribution in examples:")
            print(example_df['label'].value_counts())
    
    # Load test dataset (what we need to predict)
    if os.path.exists('/kaggle/input/binary-biplob-can-you-decode-emotions/bangla/test.csv'):
        test_df = pd.read_csv('/kaggle/input/binary-biplob-can-you-decode-emotions/bangla/test.csv')
        print(f"\n✅ Test dataset loaded successfully!")
        print(f"� Test dataset shape: {test_df.shape}")
        print("\n🔍 Test data preview:")
        print(test_df.head())
        
        # Analyze text characteristics
        print(f"\n📏 Text length statistics:")
        test_df['text_length'] = test_df['text'].str.len()
        print(test_df['text_length'].describe())
        
    else:
        print("⚠️  Test dataset not found - using sample data")
        test_df = pd.DataFrame({
            'id': ['sample_1', 'sample_2', 'sample_3'], 
            'text': ['আজকে weather nice 😊', 'feeling sad today 😢', 'kaj cholche normally']
        })

except Exception as e:
    print(f"❌ Error loading datasets: {e}")
    print("📝 Creating sample data for testing...")
    test_df = pd.DataFrame({
        'id': ['sample_1', 'sample_2', 'sample_3'], 
        'text': ['আজকে weather nice 😊', 'feeling sad today 😢', 'kaj cholche normally']
    })

print(f"\n� Ready to process {len(test_df)} samples for sentiment prediction!")

📊 Loading and exploring competition datasets...
✅ Example dataset loaded successfully!
📈 Example dataset shape: (7, 3)

🔍 Example data preview:
           id                               text predicted_sentiment
0  sample_799        Rate deri kore ghumiyechi 👎            negative
1  sample_825  Bagane phul phuteche onek sundor             positive
2  sample_226          Database এ error দেখাচ্ছে            negative
3    sample_9          Sondhyay parke halte jabo             neutral
4   sample_16        রান্নাঘরে মা কাজ করছেন 🤦‍♂️            negative

✅ Test dataset loaded successfully!
� Test dataset shape: (120, 2)

🔍 Test data preview:
           id                                text
0  sample_798          I bought a নতুন বই to read
1  sample_141  Bondhudero sathe ghurte giyechilam
2  sample_675           Bazare aj onek bhir chilo
3  sample_574  এই movie টা really interesting ছিল
4  sample_488        সন্ধ্যায় পার্কে হাঁটতে যাবো

📏 Text length statistics:
count    120.000000
mean 

# ===============================================
# DATASET CONFIGURATION
# ===============================================

In [None]:
print("🎯 BANGLISH SENTIMENT CHALLENGE - DATASET CONFIGURATION")
print("=" * 60)

# Using the specified test dataset path
test_dataset_path = '/kaggle/input/binary-biplob-can-you-decode-emotions/bangla/test.csv'
print(f"📊 Test Dataset Path: {test_dataset_path}")

# Verify dataset accessibility
import os
if os.path.exists(test_dataset_path):
    print("✅ Test dataset found and accessible")
    try:
        # Quick preview without loading full dataset
        import pandas as pd
        sample_df = pd.read_csv(test_dataset_path, nrows=3)
        print(f"📋 Dataset columns: {list(sample_df.columns)}")
        print(f"🔍 Sample entries:")
        for _, row in sample_df.iterrows():
            print(f"   ID: {row['id']}, Text preview: {str(row['text'])[:50]}...")
    except Exception as e:
        print(f"⚠️  Preview error: {e}")
else:
    print("⚠️  Test dataset not found at specified path")
    print("🔧 Ensure the dataset is available at the Kaggle input location")

print(f"\n🎯 Model Configuration:")
print("• Primary Model: qwen3:14b (Latest and best for Banglish)")
print("• Fallback Model: llama3.1:8b")
print("• Output Format: id,label (competition standard)")
print("• Labels: positive, negative, neutral")

print(f"\n🚀 Ready to process the full dataset with advanced sentiment analysis!")

🎯 BANGLISH SENTIMENT CHALLENGE - DATASET CONFIGURATION
📊 Test Dataset Path: /kaggle/input/binary-biplob-can-you-decode-emotions/bangla/test.csv
✅ Test dataset found and accessible
📋 Dataset columns: ['id', 'text']
🔍 Sample entries:
   ID: sample_798, Text preview: I bought a নতুন বই to read...
   ID: sample_141, Text preview: Bondhudero sathe ghurte giyechilam...
   ID: sample_675, Text preview: Bazare aj onek bhir chilo...

🎯 Model Configuration:
• Primary Model: qwen3:14b (Latest and best for Banglish)
• Fallback Model: llama3.1:8b
• Output Format: id,label (competition standard)
• Labels: positive, negative, neutral

🚀 Ready to process the full dataset with advanced sentiment analysis!


# =======================================================================
# SECTION 4: ADVANCED BANGLISH SENTIMENT CLASSIFICATION WITH QWEN2.5:14B
# =======================================================================

In [None]:
import pandas as pd
import ollama
import re
from tqdm.auto import tqdm

print("🎯 BANGLISH SENTIMENT CHALLENGE - QWEN2.5:14B ENHANCED SOLUTION")
print("=" * 60)

def get_banglish_sentiment(text_input, client):
    
    # Expert-level prompt with clear examples for qwen2.5:14b
    prompt = f"""You are a sentiment analysis expert. Classify the sentiment of the given sentence into one of the three categories:

- positive: if the text expresses happiness, satisfaction, love, excitement, or any good emotion.
- negative: if the text expresses anger, sadness, frustration, hate, or any bad emotion.
- neutral: if the text is factual, unclear in tone, or expresses both good and bad equally.

The sentence may be written in Bangla, English, or a mix of both (Banglish). It may also contain emojis or transliterated Bangla in Roman script.

💡 Emojis can strongly influence sentiment. For example, a neutral sentence can become negative if it includes 😢 or 😡.

After your analysis, put your final answer inside //{{}} brackets.

Example 1: Ajke amar mon khub bhalo lagchhe 😊  
Analysis: The sentence is in transliterated Bangla and the emoji 😊 reinforces a clearly positive emotion.  
//{{positive}}

Example 2: আজকে weather খুব boring 😒  
Analysis: This is a Banglish sentence; the word "boring" and emoji 😒 indicate a negative mood.  
//{{negative}}

Example 3: I just finished lunch. এখন একটু বিশ্রাম নিচ্ছি 🙂  
Analysis: This is a mix of English and Bangla; the content is factual and the emoji 🙂 is calm but not overly emotional.  
//{{neutral}}

Now classify this sentence: {text_input}"""

    try:
        response = client.generate(
            model='qwen2.5:14b',
            prompt=prompt,
            options={
                'temperature': 0.02,
                'num_predict': 2,
                'top_p': 0.7,
                'repeat_penalty': 1.2,
                'num_ctx': 4096,
                'num_gpu': 2
            }
        )
        
        sentiment = response['response'].strip().lower()
        
        # Look for sentiment inside //{{}} brackets
        bracket_match = re.search(r'//\{\{(.*?)\}\}', sentiment)
        
        if bracket_match:
            extracted_sentiment = bracket_match.group(1).strip()
            if extracted_sentiment in ['positive', 'negative', 'neutral']:
                return extracted_sentiment
        
        # Fallback parsing with strict matching
        if 'positive' in sentiment or sentiment.startswith('pos'):
            return "positive"
        elif 'negative' in sentiment or sentiment.startswith('neg'):
            return "negative"
        elif 'neutral' in sentiment or sentiment.startswith('neut'):
            return "neutral"
        else:
            # Advanced fallback analysis
            return analyze_advanced_fallback(text_input)
                
    except Exception as e:
        print(f"⚠️  Error with Qwen2.5:14b: {e}")
        # Fallback to alternative model if primary fails
        return fallback_with_alternative_model(text_input, client)

def analyze_advanced_fallback(text):
    """Advanced fallback with comprehensive Banglish analysis."""
    text_lower = text.lower()
    
    # Comprehensive emoji analysis (weighted heavily)
    positive_emojis = [
        '😊', '😀', '😃', '😄', '😁', '🙂', '🤗', '😍', '🥰', '😘', '😋', 
        '😎', '🤩', '🥳', '❤️', '💕', '💖', '💗', '💝', '💘',
        '👍', '👌', '👏', '🙌', '✨', '🎉', '🎊', '🌟', '⭐', '🎈', '🎁', 
        '🏆', '🥇', '🌈', '☀️', '🌺', '🌸', '🌻', '🌷', '🌹', '💐'
    ]
    
    negative_emojis = [
        '😢', '😞', '😔', '😟', '😕', '😠', '😡', '😤', '😰', '😨', '😧', 
        '😦', '😩', '😫', '😭', '🤢', '🤮', 
        '💀', '👹', '👺', '😈', '👿', '⚡', '💥', '🌩️', '💔', '👎'
    ]
    
    neutral_emojis = ['😐', '🤔', '😑', '😶', '🙄', '😏', '🤷', '🤨', '😒']
    
    # Advanced Bengali keyword analysis
    positive_bengali = [
        # Bengali script
        'ভাল', 'ভালো', 'সুন্দর', 'চমৎকার', 'দারুণ', 'মজা', 'খুশি', 'আনন্দ',
        'ভালোবাসা', 'পছন্দ', 'অসাধারণ', 'দুর্দান্ত', 'মধুর',
        # Transliterated
        'bhalo', 'valo', 'sundor', 'darun', 'khushi', 'anondo', 'moja', 
        'bhalolaga', 'pochhondo', 'chomtkar', 'oshadharon', 'durdanto'
    ]
    
    negative_bengali = [
        # Bengali script
        'খারাপ', 'দুঃখ', 'রাগ', 'বিরক্ত', 'ভয়ানক', 'বাজে', 'ঘৃণা', 'কষ্ট',
        'দুশ্চিন্তা', 'হতাশা', 'ক্লান্ত', 'অসুখ', 'ব্যথা',
        # Transliterated  
        'kharap', 'dukkho', 'rag', 'birokto', 'baje', 'ghrina', 'koshto',
        'hotasha', 'dushchinta', 'klanto', 'oshukh', 'betha'
    ]
    
    neutral_bengali = [
        # Bengali script
        'ঠিক', 'ঠিক আছে', 'স্বাভাবিক', 'সাধারণ', 'মোটামুটি', 'চলে',
        # Transliterated
        'thik', 'thik ache', 'shobhabik', 'shadharon', 'motamuti', 'chole'
    ]
    
    # Scoring system with advanced weights
    pos_score = neg_score = neu_score = 0
    
    # Emoji analysis (highest weight)
    for emoji in positive_emojis:
        if emoji in text:
            pos_score += 4
    for emoji in negative_emojis:
        if emoji in text:
            neg_score += 4
    for emoji in neutral_emojis:
        if emoji in text:
            neu_score += 3
    
    # Bengali keyword analysis (medium weight)
    for word in positive_bengali:
        if word in text_lower:
            pos_score += 2
    for word in negative_bengali:
        if word in text_lower:
            neg_score += 2
    for word in neutral_bengali:
        if word in text_lower:
            neu_score += 1
    
    # English keywords (standard weight)
    english_pos = ['good', 'great', 'nice', 'awesome', 'amazing', 'excellent', 
                   'wonderful', 'happy', 'love', 'beautiful', 'perfect', 'fantastic']
    english_neg = ['bad', 'terrible', 'awful', 'horrible', 'sad', 'angry', 
                   'hate', 'worst', 'disgusting', 'annoying', 'frustrated']
    
    for word in english_pos:
        if word in text_lower:
            pos_score += 1
    for word in english_neg:
        if word in text_lower:
            neg_score += 1
    
    # Decision logic
    if pos_score > neg_score and pos_score > neu_score:
        return "positive"
    elif neg_score > pos_score and neg_score > neu_score:
        return "negative"
    else:
        return "neutral"

def fallback_with_alternative_model(text_input, client):
    """Fallback using llama3.1:8b if qwen2.5:14b fails."""
    try:
        print("🔄 Falling back to Llama3.1:8b...")
        response = client.generate(
            model='llama3.1:8b',
            prompt=f"Analyze sentiment of this Banglish text. Reply only: positive, negative, or neutral\n\nText: {text_input}\n\nSentiment:",
            options={'temperature': 0.1, 'num_predict': 2}
        )
        sentiment = response['response'].strip().lower()
        
        if 'positive' in sentiment:
            return "positive"
        elif 'negative' in sentiment:
            return "negative"
        else:
            return "neutral"
    except:
        return analyze_advanced_fallback(text_input)

# Initialize Ollama client with GPU optimization
try:
    client = ollama.Client(host='http://127.0.0.1:11434')
    models = client.list()
    print("✅ Connected to Ollama server with GPU acceleration")
    
    # Verify our target model is available
    try:
        # Handle different response formats from Ollama API
        if hasattr(models, 'models'):
            model_list = models.models
        elif isinstance(models, dict) and 'models' in models:
            model_list = models['models']
        else:
            model_list = models
        
        # Extract model names safely
        model_names = []
        for model in model_list:
            if hasattr(model, 'name'):
                model_names.append(model.name)
            elif isinstance(model, dict) and 'name' in model:
                model_names.append(model['name'])
            elif hasattr(model, 'model'):
                model_names.append(model.model)
            elif isinstance(model, dict) and 'model' in model:
                model_names.append(model['model'])
        
        print(f"🔍 Available models: {model_names}")
        
        if 'qwen2.5:14b' in model_names:
            print("🎯 Qwen2.5:14b model confirmed - Optimal for Banglish analysis")
        else:
            print("⚠️  Qwen2.5:14b not found, will attempt to use available models")
            
    except Exception as model_error:
        print(f"⚠️  Model verification error: {model_error}")
        print("🔄 Proceeding with available models...")
    
except Exception as e:
    print(f"❌ Cannot connect to Ollama: {e}")
    print("🔧 Make sure Ollama server is running with GPU support!")
    raise

# Load test data from specified path
try:
    test_df = pd.read_csv('/kaggle/input/binary-biplob-can-you-decode-emotions/bangla/test.csv')
    print(f"📊 Loaded test data: {test_df.shape[0]} samples")
    print(f"📋 Columns: {list(test_df.columns)}")
    print(f"🔍 Sample data preview:")
    print(test_df.head())
    
except FileNotFoundError:
    print(f"❌ Test dataset not found at: /kaggle/input/binary-biplob-can-you-decode-emotions/bangla/test.csv")
    print("🔧 Please ensure the file exists at the specified path")
    raise
except Exception as e:
    print(f"❌ Error loading test dataset: {e}")
    raise

print(f"\n🚀 Starting Qwen2.5:14b enhanced sentiment analysis...")
print(f"⚡ Processing {len(test_df)} samples with optimal GPU utilization...")

# Apply advanced sentiment analysis
tqdm.pandas(desc="🧠 Qwen2.5:14b Analysis")
test_df['predicted_label'] = test_df['text'].progress_apply(
    lambda x: get_banglish_sentiment(x, client)
)

# Results summary
print(f"\n📊 ADVANCED SENTIMENT ANALYSIS COMPLETE!")
print("=" * 50)
print("📈 Prediction distribution:")
print(test_df['predicted_label'].value_counts())

print(f"\n🔍 Sample predictions with Qwen2.5:14b:")
for _, row in test_df.head(10).iterrows():
    text_preview = row['text'][:70] + "..." if len(row['text']) > 70 else row['text']
    print(f"📝 {row['predicted_label'].upper()}: {text_preview}")

# Prepare submission with exact format requested
submission_df = test_df[['id', 'predicted_label']].copy()
submission_df = submission_df.rename(columns={'predicted_label': 'label'})
submission_df.to_csv('submission.csv', index=False)

print(f"\n✅ SUBMISSION.CSV GENERATED!")
print(f"💾 Format: id,label")
print(f"📊 Total predictions: {len(submission_df)}")
print(f"🎯 Using Qwen2.5:14b for optimal Banglish sentiment analysis")

# Show sample of final submission format
print(f"\n📄 SUBMISSION.CSV PREVIEW:")
print("id,label")
for _, row in submission_df.head(5).iterrows():
    print(f"{row['id']},{row['label']}")

print("🏆 Ready for competition submission!")

🎯 BANGLISH SENTIMENT CHALLENGE - QWEN2.5:14B ENHANCED SOLUTION
[GIN] 2025/07/30 - 11:13:31 | 200 |     469.685µs |       127.0.0.1 | GET      "/api/tags"
✅ Connected to Ollama server with GPU acceleration
🔍 Available models: ['qwen2.5:14b']
🎯 Qwen2.5:14b model confirmed - Optimal for Banglish analysis
📊 Loaded test data: 120 samples
📋 Columns: ['id', 'text']
🔍 Sample data preview:
           id                                text
0  sample_798          I bought a নতুন বই to read
1  sample_141  Bondhudero sathe ghurte giyechilam
2  sample_675           Bazare aj onek bhir chilo
3  sample_574  এই movie টা really interesting ছিল
4  sample_488        সন্ধ্যায় পার্কে হাঁটতে যাবো

🚀 Starting Qwen2.5:14b enhanced sentiment analysis...
⚡ Processing 120 samples with optimal GPU utilization...


🧠 Qwen2.5:14b Analysis:   0%|          | 0/120 [00:00<?, ?it/s]

time=2025-07-30T11:13:31.997Z level=INFO source=sched.go:788 msg="new model will fit in available VRAM in single GPU, loading" model=/root/.ollama/models/blobs/sha256-2049f5674b1e92b4464e5729975c9689fcfbf0b0e4443ccf10b5339f370f9a54 gpu=GPU-97839682-2d9e-bd50-4ad4-b7a7624c74fa parallel=2 available=15720382464 required="1.9 GiB"
time=2025-07-30T11:13:32.221Z level=INFO source=server.go:135 msg="system memory" total="31.4 GiB" free="30.1 GiB" free_swap="0 B"
time=2025-07-30T11:13:32.223Z level=INFO source=server.go:175 msg=offload library=cuda layers.requested=2 layers.model=49 layers.offload=2 layers.split="" memory.available="[14.6 GiB]" memory.gpu_overhead="0 B" memory.required.full="11.0 GiB" memory.required.partial="1.9 GiB" memory.required.kv="1.5 GiB" memory.required.allocations="[1.9 GiB]" memory.weights.total="8.0 GiB" memory.weights.repeating="7.4 GiB" memory.weights.nonrepeating="609.1 MiB" memory.graph.full="676.0 MiB" memory.graph.partial="916.1 MiB"
llama_model_loader: loade

[GIN] 2025/07/30 - 11:13:39 | 200 |  7.845579446s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2025/07/30 - 11:13:47 | 200 |  8.009069015s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2025/07/30 - 11:13:53 | 200 |  6.455134081s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2025/07/30 - 11:14:02 | 200 |  8.396490019s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2025/07/30 - 11:14:05 | 200 |    2.8172987s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2025/07/30 - 11:14:13 | 200 |   8.13572232s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2025/07/30 - 11:14:21 | 200 |  8.160052997s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2025/07/30 - 11:14:29 | 200 |  8.340312487s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2025/07/30 - 11:14:37 | 200 |  8.002178156s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2025/07/30 - 11:14:40 | 200 |  2.801106076s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2025/07/30 - 11:14:43 | 200 |  2.755336774s 

# ==================================================
# SECTION 5: SUBMISSION VALIDATION & FINAL CHECKS
# ==================================================

In [None]:
print("🔍 VALIDATING SUBMISSION FOR BANGLISH SENTIMENT CHALLENGE")
print("=" * 60)

# Load and validate submission file
try:
    submission_check = pd.read_csv('submission.csv')
    
    print("✅ Submission file loaded successfully!")
    print(f"📊 Submission shape: {submission_check.shape}")
    print(f"📋 Columns: {list(submission_check.columns)}")
    
    # Validate required columns
    required_cols = ['id', 'label']
    missing_cols = [col for col in required_cols if col not in submission_check.columns]
    
    if missing_cols:
        print(f"❌ Missing required columns: {missing_cols}")
    else:
        print("✅ All required columns present")
    
    # Validate label values
    valid_labels = {'positive', 'negative', 'neutral'}
    unique_labels = set(submission_check['label'].unique())
    invalid_labels = unique_labels - valid_labels
    
    if invalid_labels:
        print(f"⚠️  Invalid labels found: {invalid_labels}")
        print("🔧 Valid labels are: positive, negative, neutral")
    else:
        print("✅ All labels are valid")
    
    # Check for missing values
    missing_count = submission_check.isnull().sum().sum()
    if missing_count > 0:
        print(f"⚠️  Found {missing_count} missing values")
    else:
        print("✅ No missing values")
    
    # Summary statistics
    print(f"\n📊 FINAL SUBMISSION SUMMARY:")
    print(f"📝 Total predictions: {len(submission_check)}")
    print(f"📈 Label distribution:")
    label_counts = submission_check['label'].value_counts()
    for label, count in label_counts.items():
        percentage = (count / len(submission_check)) * 100
        print(f"   {label}: {count} ({percentage:.1f}%)")
    
    # Show sample submission format
    print(f"\n📄 SAMPLE SUBMISSION FORMAT:")
    print("id,label")
    for _, row in submission_check.head(5).iterrows():
        print(f"{row['id']},{row['label']}")
    
    # Calculate macro F1 readiness
    print(f"\n🎯 COMPETITION READINESS:")
    print("✅ Format matches submission requirements (id,label)")
    print("✅ Uses macro-averaged F1-score evaluation")
    print("✅ Zero-shot approach with Qwen3:14b (latest model)")
    print("✅ Handles Banglish code-switching")
    
    print(f"\n🏆 SUBMISSION READY FOR UPLOAD!")
    print("📤 File: submission.csv")
    print("🎪 Competition: Banglish Sentiment Challenge")
    
except FileNotFoundError:
    print("❌ Submission file not found!")
    print("🔧 Run the previous cells to generate predictions")
    
except Exception as e:
    print(f"❌ Error validating submission: {e}")

print("\n" + "=" * 60)
print("🎯 BANGLISH SENTIMENT CHALLENGE SOLUTION COMPLETE")
print("=" * 60)

🔍 VALIDATING SUBMISSION FOR BANGLISH SENTIMENT CHALLENGE
✅ Submission file loaded successfully!
📊 Submission shape: (120, 2)
📋 Columns: ['id', 'label']
✅ All required columns present
✅ All labels are valid
✅ No missing values

📊 FINAL SUBMISSION SUMMARY:
📝 Total predictions: 120
📈 Label distribution:
   neutral: 74 (61.7%)
   positive: 34 (28.3%)
   negative: 12 (10.0%)

📄 SAMPLE SUBMISSION FORMAT:
id,label
sample_798,neutral
sample_141,neutral
sample_675,neutral
sample_574,neutral
sample_488,neutral

🎯 COMPETITION READINESS:
✅ Format matches submission requirements (id,label)
✅ Uses macro-averaged F1-score evaluation
✅ Zero-shot approach with Qwen3:14b (latest model)
✅ Handles Banglish code-switching

🏆 SUBMISSION READY FOR UPLOAD!
📤 File: submission.csv
🎪 Competition: Banglish Sentiment Challenge

🎯 BANGLISH SENTIMENT CHALLENGE SOLUTION COMPLETE
