In [2]:
# Cell 1: Setup OpenAI for Scam Detection
import openai
import json
import re
from datetime import datetime

# Set your OpenAI API key
import openai

class ScamDetector:
    def __init__(self, api_key: str):
        self.client = openai.Client(api_key=api_key)
        # … rest of init …

# Then when creating:
detector = ScamDetector(api_key="sk-…")
openai.api_key = "your-openai-api-key-here"

class ScamDetector:
    def __init__(self):
        self.client = openai.Client()
        
    def get_scam_prompt(self, language="english"):
        """Get language-specific scam detection prompt"""
        prompts = {
            "english": """
            You are an expert scam call detection system. Analyze this phone conversation transcript for scam indicators.
            
            COMMON SCAM PATTERNS TO DETECT:
            1. URGENCY: "act now", "limited time", "expires today"
            2. FEAR TACTICS: threats, consequences, "you're in trouble"  
            3. PERSONAL INFO REQUESTS: SSN, passwords, banking details, codes
            4. PAYMENT DEMANDS: wire transfers, gift cards, immediate payment
            5. IMPERSONATION: claiming to be bank, IRS, tech support, family
            6. TOO GOOD TO BE TRUE: lottery wins, free money, guaranteed returns
            7. SECRECY: "don't tell anyone", "keep this between us"
            8. PRESSURE: not allowing time to think or verify
            
            Respond ONLY with valid JSON:
            {
                "risk_level": "SAFE|SUSPICIOUS|SCAM",
                "confidence": 0.85,
                "detected_patterns": ["list of specific patterns found"],
                "scam_type": "tech_support|impersonation|financial|romance|phishing|unknown",
                "reasoning": "brief explanation"
            }
            """,
            
            "spanish": """
            Eres un sistema experto de detección de estafas telefónicas. Analiza esta transcripción...
            
            PATRONES COMUNES DE ESTAFA:
            1. URGENCIA: "actúa ahora", "tiempo limitado"
            2. TÁCTICAS DE MIEDO: amenazas, "estás en problemas"
            3. SOLICITUD DE INFO PERSONAL: códigos, contraseñas, datos bancarios
            ...
            """,
            
            "french": """
            Vous êtes un système expert de détection d'arnaques téléphoniques...
            
            MODÈLES D'ARNAQUE COURANTS:
            1. URGENCE: "agissez maintenant", "temps limité"
            2. TACTIQUES DE PEUR: menaces, "vous avez des ennuis"
            ...
            """
        }
        return prompts.get(language, prompts["english"])
    
    def analyze_transcript(self, transcript, language="english"):
        """Analyze transcript for scam patterns"""
        try:
            response = self.client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {"role": "system", "content": self.get_scam_prompt(language)},
                    {"role": "user", "content": f"TRANSCRIPT: {transcript}"}
                ],
                temperature=0.1,
                max_tokens=400
            )
            
            # Parse JSON response
            content = response.choices[0].message.content
            result = json.loads(content)
            
            return result
            
        except Exception as e:
            print(f"❌ Scam detection error: {e}")
            return {
                "risk_level": "UNKNOWN",
                "confidence": 0.0,
                "detected_patterns": [],
                "scam_type": "unknown",
                "reasoning": f"Analysis failed: {str(e)}"
            }

# Test the scam detector
detector = ScamDetector()

# Test with sample transcripts
test_transcripts = [
    {
        "text": "Hello, this is John from Microsoft technical support. We've detected a virus on your computer. I need you to give me remote access immediately to fix this urgent problem.",
        "language": "english"
    },
    {
        "text": "Hi mom, it's me your grandson. I'm in jail and need you to wire $2000 immediately. Don't tell my parents, this is embarrassing.",
        "language": "english"  
    },
    {
        "text": "Good morning, how are you today? I'm calling to remind you about your appointment next Tuesday.",
        "language": "english"
    }
]

print("🔍 Testing Scam Detection:")
print("="*60)

for i, test in enumerate(test_transcripts, 1):
    print(f"\nTest {i}:")
    print(f"Transcript: {test['text']}")
    
    result = detector.analyze_transcript(test['text'], test['language'])
    
    print(f"🚨 Risk Level: {result['risk_level']}")
    print(f"📊 Confidence: {result['confidence']:.2f}")
    print(f"🎯 Scam Type: {result['scam_type']}")
    print(f"⚠️ Patterns: {', '.join(result['detected_patterns'])}")
    print(f"💭 Reasoning: {result['reasoning']}")
    print("-" * 40)


OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [4]:
# Cell 1: Setup and Imports
import os
import openai
import json
import re
from datetime import datetime

# Set your OpenAI API key here or ensure OPENAI_API_KEY is set in your environment
OPENAI_API_KEY = "sk-…"  
openai.api_key = OPENAI_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

class ScamDetector:
    def __init__(self):
        # Pass the key explicitly to ensure the client picks it up
        self.client = openai.Client(api_key=openai.api_key)

    def get_scam_prompt(self, language="english"):
        """Return the system prompt for the specified language."""
        prompts = {
            "english": """
You are an expert scam call detection system. Analyze this phone conversation transcript for scam indicators.

COMMON SCAM PATTERNS TO DETECT:
1. URGENCY: "act now", "limited time", "expires today"
2. FEAR TACTICS: threats, consequences, "you're in trouble"
3. PERSONAL INFO REQUESTS: SSN, passwords, banking details, codes
4. PAYMENT DEMANDS: wire transfers, gift cards, immediate payment
5. IMPERSONATION: claiming to be bank, IRS, tech support, family
6. TOO GOOD TO BE TRUE: lottery wins, free money, guaranteed returns
7. SECRECY: "don't tell anyone", "keep this between us"
8. PRESSURE: not allowing time to think or verify

Respond ONLY with valid JSON:
{
  "risk_level": "SAFE|SUSPICIOUS|SCAM",
  "confidence": 0.85,
  "detected_patterns": ["list of specific patterns found"],
  "scam_type": "tech_support|impersonation|financial|romance|phishing|unknown",
  "reasoning": "brief explanation"
}
""",
        }
        return prompts.get(language, prompts["english"])

    def analyze_transcript(self, transcript: str, language="english") -> dict:
        """Analyze a transcript and return a dict with scam analysis."""
        try:
            response = self.client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {"role": "system", "content": self.get_scam_prompt(language)},
                    {"role": "user",   "content": f"TRANSCRIPT: {transcript}"}
                ],
                temperature=0.1,
                max_tokens=400
            )
            content = response.choices[0].message.content
            return json.loads(content)

        except Exception as e:
            # On error, return a safe default structure
            return {
                "risk_level": "UNKNOWN",
                "confidence": 0.0,
                "detected_patterns": [],
                "scam_type": "unknown",
                "reasoning": f"Analysis failed: {e}"
            }


# Cell 2: Testing the ScamDetector
detector = ScamDetector()

test_transcripts = [
    {
        "text": "Hello, this is John from Microsoft technical support. We've detected a virus on your computer. I need you to give me remote access immediately to fix this urgent problem.",
        "language": "english"
    },
    {
        "text": "Hi mom, it's me your grandson. I'm in jail and need you to wire $2000 immediately. Don't tell my parents, this is embarrassing.",
        "language": "english"
    },
    {
        "text": "Good morning, how are you today? I'm calling to remind you about your appointment next Tuesday.",
        "language": "english"
    }
]

print("🔍 Testing Scam Detection:")
print("=" * 60)
for i, test in enumerate(test_transcripts, start=1):
    print(f"\nTest {i}:")
    print(f"Transcript: {test['text']}")
    result = detector.analyze_transcript(test['text'], test['language'])
    print(f"🚨 Risk Level: {result['risk_level']}")
    print(f"📊 Confidence: {result['confidence']:.2f}")
    print(f"🎯 Scam Type: {result['scam_type']}")
    print(f"⚠️ Patterns: {', '.join(result['detected_patterns'])}")
    print(f"💭 Reasoning: {result['reasoning']}")
    print("-" * 40)


🔍 Testing Scam Detection:

Test 1:
Transcript: Hello, this is John from Microsoft technical support. We've detected a virus on your computer. I need you to give me remote access immediately to fix this urgent problem.
🚨 Risk Level: UNKNOWN
📊 Confidence: 0.00
🎯 Scam Type: unknown
⚠️ Patterns: 
💭 Reasoning: Analysis failed: 'ascii' codec can't encode character '\u2026' in position 10: ordinal not in range(128)
----------------------------------------

Test 2:
Transcript: Hi mom, it's me your grandson. I'm in jail and need you to wire $2000 immediately. Don't tell my parents, this is embarrassing.
🚨 Risk Level: UNKNOWN
📊 Confidence: 0.00
🎯 Scam Type: unknown
⚠️ Patterns: 
💭 Reasoning: Analysis failed: 'ascii' codec can't encode character '\u2026' in position 10: ordinal not in range(128)
----------------------------------------

Test 3:
Transcript: Good morning, how are you today? I'm calling to remind you about your appointment next Tuesday.
🚨 Risk Level: UNKNOWN
📊 Confidence: 0.00
🎯 Sca

In [5]:
test_scenarios = [
    {
        "name": "Tech Support Scam",
        "script": "Hello, this is Microsoft technical support. Your computer has a virus. Please give me remote access immediately.",
        "expected_risk": "SCAM"
    },
    {
        "name": "Family Emergency Scam",
        "script": "Grandma, it's me, your grandson. I'm in jail and need you to send $2000 right away. Don't tell my parents.",
        "expected_risk": "SCAM"
    },
    {
        "name": "Legitimate Business Call",
        "script": "Hi, this is Sarah from ABC Insurance. I'm calling to follow up on your inquiry about auto insurance quotes.",
        "expected_risk": "SAFE"
    }
]


In [6]:
def run_tests(detector, test_cases):
    print("🔍 Running Scam Detection Tests:")
    print("="*50)
    for case in test_cases:
        print(f"\nTest Case: {case['name']}")
        result = detector.analyze_transcript(case['script'])
        predicted_risk = result.get('risk_level', 'UNKNOWN')
        expected_risk = case['expected_risk']
        
        # Display results
        print(f"Transcript: {case['script']}")
        print(f"Predicted Risk: {predicted_risk}")
        print(f"Expected Risk: {expected_risk}")
        print(f"Confidence: {result.get('confidence', 0.0):.2f}")
        print(f"Detected Patterns: {', '.join(result.get('detected_patterns', []))}")
        print(f"Reasoning: {result.get('reasoning', '')}")
        
        # Check if test passed
        passed = (predicted_risk == expected_risk)
        print(f"Test Result: {'PASS' if passed else 'FAIL'}")
        print("-" * 60)

# Example usage:
detector = ScamDetector()  # Make sure this is instantiated from your detection notebook
run_tests(detector, test_scenarios)


🔍 Running Scam Detection Tests:

Test Case: Tech Support Scam
Transcript: Hello, this is Microsoft technical support. Your computer has a virus. Please give me remote access immediately.
Predicted Risk: UNKNOWN
Expected Risk: SCAM
Confidence: 0.00
Detected Patterns: 
Reasoning: Analysis failed: 'ascii' codec can't encode character '\u2026' in position 10: ordinal not in range(128)
Test Result: FAIL
------------------------------------------------------------

Test Case: Family Emergency Scam
Transcript: Grandma, it's me, your grandson. I'm in jail and need you to send $2000 right away. Don't tell my parents.
Predicted Risk: UNKNOWN
Expected Risk: SCAM
Confidence: 0.00
Detected Patterns: 
Reasoning: Analysis failed: 'ascii' codec can't encode character '\u2026' in position 10: ordinal not in range(128)
Test Result: FAIL
------------------------------------------------------------

Test Case: Legitimate Business Call
Transcript: Hi, this is Sarah from ABC Insurance. I'm calling to follow