# Testing Google Gemini API Integration

This notebook tests the Google Gemini API for podcast script generation with a single article.

What we'll test:
- Configuration loading and API key validation
- Google Gemini API connectivity
- Podcast script generation from a single article
- Error handling and debugging

In [1]:
# Setup: paths and imports
import sys, os
from pathlib import Path
import asyncio

notebook_dir = Path().resolve()
src_dir = notebook_dir.parent / 'src'
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))
    print('Added src to path:', src_dir)
    
print('Notebook dir:', notebook_dir)
print('Src dir:', src_dir, 'exists:', src_dir.exists())

Added src to path: /home/santi/Projects/UBMI-IFC-Podcast/src
Notebook dir: /home/santi/Projects/UBMI-IFC-Podcast/notebooks
Src dir: /home/santi/Projects/UBMI-IFC-Podcast/src exists: True


In [16]:
# Load configuration and check Google API setup
from utils.config import load_config
from utils.logger import setup_logger, get_logger

setup_logger(level='INFO')
logger = get_logger('gemini_test')
config = load_config()

print('LLM provider:', config['llm']['provider'])
print('LLM model:', config['llm']['model'])
print('Temperature:', config['llm']['temperature'])
print('Max tokens:', config['llm']['max_tokens'])

# Diagnostic: Check entire config structure
print('\n🔍 Config diagnostic:')
print('api_keys section exists:', 'api_keys' in config)
if 'api_keys' in config:
    print('api_keys content:', list(config['api_keys'].keys()))
    print('google key exists:', 'google' in config['api_keys'])
    if 'google' in config['api_keys']:
        google_key = config['api_keys']['google']
        print('google key value:', repr(google_key))
        print('google key length:', len(google_key))
        print('google key is empty string:', google_key == '')

# Check if Google API key is available
google_api_key = config['api_keys'].get('google', '')
if google_api_key:
    print(f'✅ Google API key found: {google_api_key[:8]}...')
else:
    print('❌ No Google API key found in config!')

LLM provider: google
LLM model: gemini-2.5-flash
Temperature: 0.7
Max tokens: 4000

🔍 Config diagnostic:
api_keys section exists: True
api_keys content: ['openai', 'anthropic', 'elevenlabs']
google key exists: False
❌ No Google API key found in config!


In [17]:
# Fetch a real article from PubMed instead of using dummy data
from pubmed.searcher import PubMedSearcher

print('🔍 Fetching real article from PubMed...')

# Initialize PubMed searcher
pubmed_searcher = PubMedSearcher(config)

async def get_real_article():
    """Get a real recent article from PubMed for testing"""
    try:
        # Search for recent neuroscience/biomedical articles
        search_terms = ['neuroscience', 'brain', 'neural']
        print(f'Searching PubMed for: {search_terms}')
        
        # Get recent article IDs
        pmids = await pubmed_searcher.search_recent_articles(
            query_terms=search_terms,
            days_back=30,  # Last 30 days
            max_results=10  # Get several options
        )
        
        if not pmids:
            print('⚠️  No recent articles found, trying broader search...')
            # Fallback to broader search
            pmids = await pubmed_searcher.search_recent_articles(
                query_terms=None,  # Default broad biomedical search
                days_back=7,
                max_results=5
            )
        
        if pmids:
            print(f'✅ Found {len(pmids)} articles')
            print(f'Sample PMIDs: {pmids[:3]}')
            
            # Get detailed information for the articles
            articles = await pubmed_searcher.fetch_article_details(pmids[:3])
            
            if articles:
                # Pick the first article with a good abstract
                selected_article = None
                for article in articles:
                    if article.abstract and len(article.abstract) > 200:
                        selected_article = article
                        break
                
                if not selected_article:
                    selected_article = articles[0]  # Use first one as fallback
                
                print(f'\\n📄 Selected article:')
                print(f'PMID: {selected_article.pmid}')
                print(f'Title: {selected_article.title}')
                print(f'Authors: {", ".join(selected_article.authors[:3])}{"..." if len(selected_article.authors) > 3 else ""}')
                print(f'Journal: {selected_article.journal}')
                print(f'Publication Date: {selected_article.publication_date}')
                print(f'Abstract length: {len(selected_article.abstract) if selected_article.abstract else 0} chars')
                
                # Convert to our test format
                real_article = {
                    'title': selected_article.title,
                    'abstract': selected_article.abstract or 'No abstract available',
                    'authors': selected_article.authors or [],
                    'journal': selected_article.journal or 'Unknown Journal',
                    'publication_date': selected_article.publication_date or 'Unknown Date',
                    'doi': selected_article.doi or 'No DOI',
                    'pmid': selected_article.pmid,
                    'score': 1.0  # Real article gets perfect score
                }
                
                return real_article
            else:
                print('❌ Could not retrieve article details')
                return None
        else:
            print('❌ No articles found in PubMed search')
            return None
            
    except Exception as e:
        print(f'❌ Error fetching PubMed article: {e}')
        import traceback
        traceback.print_exc()
        return None

# Get the real article
test_article = await get_real_article()

if test_article:
    print('\\n✅ Using real PubMed article for testing:')
    print(f"Title: {test_article['title']}")
    print(f"Authors: {', '.join(test_article['authors'][:3])}{'...' if len(test_article['authors']) > 3 else ''}")
    print(f"Journal: {test_article['journal']}")
    print(f"PMID: {test_article['pmid']}")
    print(f"Abstract: {test_article['abstract'][:200]}...")
else:
    print('\\n⚠️  Falling back to dummy article...')
    # Fallback to dummy article if PubMed fails
    test_article = {
        'title': 'Deregulation of interferon-gamma receptor 1 expression and its implications for lung adenocarcinoma progression',
        'abstract': 'Interferon-gamma (IFN-γ) plays a crucial role in immune surveillance and has dual roles in cancer development and progression. This study investigates the dysregulation of IFN-γ receptor 1 (IFNGR1) expression in lung adenocarcinoma and its downstream signaling pathways. We analyzed tissue samples from 150 patients and found significant downregulation of IFNGR1 in tumor tissues compared to normal lung tissue. Our results suggest that IFNGR1 deregulation contributes to immune evasion and tumor progression through altered JAK-STAT signaling.',
        'authors': ['Smith JA', 'Johnson BE', 'Garcia ML'],
        'journal': 'Nature Cancer',
        'publication_date': '2024-08-15',
        'doi': '10.5306/wjco.v15.i2.195',
        'pmid': 'dummy',
        'score': 0.95
    }
    print('Using fallback dummy article for testing')

[32m2025-09-17 18:17:46[0m | [1mINFO[0m | [36mpubmed.searcher[0m:[36msearch_recent_articles[0m:[36m94[0m - [1mSearching PubMed with query: "neuroscience"[Abstract] OR "brain"[Abstract] OR "neural"[Abstract][0m


🔍 Fetching real article from PubMed...
Searching PubMed for: ['neuroscience', 'brain', 'neural']


[32m2025-09-17 18:17:46[0m | [1mINFO[0m | [36mpubmed.searcher[0m:[36msearch_recent_articles[0m:[36m118[0m - [1mFound 10 articles[0m


✅ Found 10 articles
Sample PMIDs: ['28728020', '25771946', '34381347']


[32m2025-09-17 18:17:46[0m | [1mINFO[0m | [36mpubmed.searcher[0m:[36mfetch_article_details[0m:[36m167[0m - [1mRetrieved details for 3 articles[0m


\n📄 Selected article:
PMID: 28728020
Title: Neuroscience-Inspired Artificial Intelligence.
Authors: Demis Hassabis, Dharshan Kumaran, Christopher Summerfield...
Journal: Neuron
Publication Date: 2017-Jul-19
Abstract length: 641 chars
\n✅ Using real PubMed article for testing:
Title: Neuroscience-Inspired Artificial Intelligence.
Authors: Demis Hassabis, Dharshan Kumaran, Christopher Summerfield...
Journal: Neuron
PMID: 28728020
Abstract: The fields of neuroscience and artificial intelligence (AI) have a long and intertwined history. In more recent times, however, communication and collaboration between the two fields has become less c...


In [18]:
# Force reload config and check raw YAML
import yaml
from pathlib import Path

# Load config directly from YAML file
config_path = Path('../config/config.yaml')
print('Config file exists:', config_path.exists())

if config_path.exists():
    with open(config_path, 'r') as f:
        raw_config = yaml.safe_load(f)
    
    print('Raw YAML api_keys section:')
    if 'api_keys' in raw_config:
        for key, value in raw_config['api_keys'].items():
            masked_value = f"{value[:8]}..." if value else "empty"
            print(f"  {key}: {masked_value}")
    
    # Update our config variable
    config = raw_config
    google_api_key = config['api_keys'].get('google', '')
    
    if google_api_key:
        print(f'\\n✅ Google API key found after reload: {google_api_key[:8]}...')
    else:
        print('\\n❌ Still no Google API key found!')
else:
    print('Config file not found!')

Config file exists: True
Raw YAML api_keys section:
  google: AIzaSyCj...
  openai: empty
  anthropic: empty
  elevenlabs: empty
\n✅ Google API key found after reload: AIzaSyCj...


In [20]:
# Display detailed information about the selected article
print('📊 Article Details for Script Generation:')
print('=' * 60)
print(f"📋 Title: {test_article['title']}")
print(f"👥 Authors: {', '.join(test_article['authors']) if test_article['authors'] else 'No authors listed'}")
print(f"📚 Journal: {test_article['journal']}")
print(f"📅 Publication Date: {test_article['publication_date']}")
print(f"🔗 DOI: {test_article['doi']}")
if test_article.get('pmid') and test_article['pmid'] != 'dummy':
    print(f"🆔 PubMed ID: {test_article['pmid']}")
    print(f"🌐 PubMed URL: https://pubmed.ncbi.nlm.nih.gov/{test_article['pmid']}/")

print(f"\\n📝 Abstract ({len(test_article['abstract'])} characters):")
print('-' * 60)
print(test_article['abstract'])
print('-' * 60)

# Quality assessment for podcast generation
print('\\n🎯 Article Quality Assessment:')
word_count = len(test_article['abstract'].split()) if test_article['abstract'] else 0
print(f"Abstract word count: {word_count}")
print(f"Has sufficient content for podcast: {'✅ Yes' if word_count > 50 else '❌ No'}")
print(f"Article type: {'📊 Real PubMed article' if test_article.get('pmid') != 'dummy' else '🧪 Test article'}")

📊 Article Details for Script Generation:
📋 Title: Neuroscience-Inspired Artificial Intelligence.
👥 Authors: Demis Hassabis, Dharshan Kumaran, Christopher Summerfield, Matthew Botvinick
📚 Journal: Neuron
📅 Publication Date: 2017-Jul-19
🔗 DOI: 10.1016/j.neuron.2017.06.011
🆔 PubMed ID: 28728020
🌐 PubMed URL: https://pubmed.ncbi.nlm.nih.gov/28728020/
\n📝 Abstract (641 characters):
------------------------------------------------------------
The fields of neuroscience and artificial intelligence (AI) have a long and intertwined history. In more recent times, however, communication and collaboration between the two fields has become less commonplace. In this article, we argue that better understanding biological brains could play a vital role in building intelligent machines. We survey historical interactions between the AI and neuroscience fields and emphasize current advances in AI that have been inspired by the study of neural computation in humans and other animals. We conclude by highli

In [21]:
# Install google-generativeai if needed
try:
    import google.generativeai as genai
    print('✅ google-generativeai is already installed')
except ImportError:
    print('Installing google-generativeai...')
    import subprocess
    import sys
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'google-generativeai'])
    import google.generativeai as genai
    print('✅ google-generativeai installed successfully')

✅ google-generativeai is already installed


In [22]:
# Create a Google Gemini provider class following Google's latest API recommendations
try:
    # Try the new import style first (from google import genai)
    from google import genai
    print('✅ Using new google.genai import style')
    NEW_API = True
except ImportError:
    try:
        # Fallback to older import style  
        import google.generativeai as genai
        print('✅ Using legacy google.generativeai import style')
        NEW_API = False
    except ImportError:
        print('❌ google-generativeai package not installed')
        genai = None
        NEW_API = False

from typing import Dict, List

class GoogleProvider:
    """Google Gemini provider using latest API recommendations"""
    
    def __init__(self, api_key: str, model: str = "gemini-2.5-flash"):
        self.api_key = api_key
        self.model = model
        self.logger = get_logger(__name__)
        
        if NEW_API:
            # Use new Client-based API
            self.client = genai.Client(api_key=self.api_key)
        else:
            # Use legacy configure-based API
            genai.configure(api_key=self.api_key)
            self.client = None
        
    async def generate_response(self, prompt: str, **kwargs) -> str:
        """Generate response using Google Gemini API"""
        try:
            if NEW_API and self.client:
                # New API style
                response = self.client.models.generate_content(
                    model=self.model,
                    contents=prompt
                )
                return response.text
            else:
                # Legacy API style
                model = genai.GenerativeModel(self.model)
                response = model.generate_content(
                    prompt,
                    generation_config=genai.types.GenerationConfig(
                        temperature=kwargs.get('temperature', 0.7),
                        max_output_tokens=kwargs.get('max_tokens', 4000)
                    )
                )
                return response.text
                
        except Exception as e:
            self.logger.error(f"Google Gemini API error: {str(e)}")
            raise

# Test instantiation
if genai and google_api_key:
    try:
        google_provider = GoogleProvider(google_api_key, config['llm']['model'])
        print('✅ Google provider initialized successfully')
    except Exception as e:
        print(f'❌ Error initializing Google provider: {e}')
        import traceback
        traceback.print_exc()
        google_provider = None
else:
    print('❌ Cannot initialize Google provider - missing dependencies or API key')
    google_provider = None

✅ Using legacy google.generativeai import style
✅ Google provider initialized successfully


In [23]:
# Test basic API connectivity with a simple prompt
if google_provider:
    simple_test_prompt = "Hello! Please respond with 'API connection successful' if you can read this."
    
    try:
        print('Testing basic API connectivity...')
        response = await google_provider.generate_response(simple_test_prompt)
        print('✅ API Response:')
        print(response)
    except Exception as e:
        print(f'❌ API connectivity test failed: {e}')
else:
    print('❌ Skipping connectivity test - no provider available')

Testing basic API connectivity...
✅ API Response:
API connection successful
✅ API Response:
API connection successful


In [24]:
# Create a simple podcast script generator for testing
class SimplePodcastScriptGenerator:
    """Simplified podcast script generator for testing Google Gemini"""
    
    def __init__(self, provider, config):
        self.provider = provider
        self.config = config
        self.logger = get_logger(__name__)
        
    def _prepare_article_summary(self, article: Dict) -> str:
        """Prepare a summary of the article for the prompt"""
        summary = f"""
Article:
Title: {article.get('title', 'N/A')}
Authors: {', '.join(article.get('authors', [])) if article.get('authors') else 'N/A'}
Journal: {article.get('journal', 'N/A')}
Publication Date: {article.get('publication_date', 'N/A')}
DOI: {article.get('doi', 'N/A')}

Abstract:
{article.get('abstract', 'No abstract available')}
"""
        return summary.strip()
    
    def _build_podcast_prompt(self, article_summary: str) -> str:
        """Build the prompt for podcast script generation"""
        template = self.config['llm']['podcast_prompt_template']
        return template.format(articles=article_summary)
    
    async def generate_podcast_script(self, article: Dict) -> str:
        """Generate podcast script from a single article"""
        self.logger.info(f"Generating podcast script for article: {article.get('title', 'Unknown')}")
        
        # Prepare article summary
        article_summary = self._prepare_article_summary(article)
        
        # Build prompt
        prompt = self._build_podcast_prompt(article_summary)
        
        print('--- Generated Prompt ---')
        print(prompt[:500] + '...' if len(prompt) > 500 else prompt)
        print('\n--- Sending to Gemini API ---')
        
        # Generate script
        script = await self.provider.generate_response(
            prompt,
            temperature=self.config['llm']['temperature'],
            max_tokens=self.config['llm']['max_tokens']
        )
        
        self.logger.info("Podcast script generated successfully")
        return script

# Initialize the generator
if google_provider:
    script_generator = SimplePodcastScriptGenerator(google_provider, config)
    print('✅ Script generator initialized')
else:
    print('❌ Cannot initialize script generator without provider')
    script_generator = None

✅ Script generator initialized


In [25]:
# Generate podcast script from the test article
if script_generator:
    try:
        print('Generating podcast script...')
        podcast_script = await script_generator.generate_podcast_script(test_article)
        
        print('\n' + '='*60)
        print('GENERATED PODCAST SCRIPT')
        print('='*60)
        print(podcast_script)
        print('='*60)
        
        # Basic quality checks
        word_count = len(podcast_script.split())
        print(f'\n📊 Script Statistics:')
        print(f'Word count: {word_count}')
        print(f'Character count: {len(podcast_script)}')
        print(f'Estimated reading time: {word_count / 150:.1f} minutes')
        
        # Check if key elements are present
        key_elements = ['introduction', 'findings', 'implications', 'conclusion']
        found_elements = [elem for elem in key_elements if elem.lower() in podcast_script.lower()]
        print(f'Key elements found: {found_elements}')
        
    except Exception as e:
        print(f'❌ Error generating podcast script: {e}')
        import traceback
        traceback.print_exc()
else:
    print('❌ Skipping script generation - no script generator available')

[32m2025-09-17 18:23:46[0m | [1mINFO[0m | [36m__main__[0m:[36mgenerate_podcast_script[0m:[36m32[0m - [1mGenerating podcast script for article: Neuroscience-Inspired Artificial Intelligence.[0m


Generating podcast script...
--- Generated Prompt ---
You are a science communicator creating a podcast script about recent research.
Create an engaging 5-minute podcast script summarizing these research articles:

Article:
Title: Neuroscience-Inspired Artificial Intelligence.
Authors: Demis Hassabis, Dharshan Kumaran, Christopher Summerfield, Matthew Botvinick
Journal: Neuron
Publication Date: 2017-Jul-19
DOI: 10.1016/j.neuron.2017.06.011

Abstract:
The fields of neuroscience and artificial intelligence (AI) have a long and intertwined history. In...

--- Sending to Gemini API ---


[32m2025-09-17 18:24:10[0m | [1mINFO[0m | [36m__main__[0m:[36mgenerate_podcast_script[0m:[36m51[0m - [1mPodcast script generated successfully[0m



GENERATED PODCAST SCRIPT
**(Intro Music fades in and out)**

**Host:** Welcome to "Mind Bytes," the podcast where we unpack the latest breakthroughs in science and technology. I'm your host, [Your Name], and today, we're diving into a fascinating area where the past is inspiring the future: the powerful reunion of neuroscience and artificial intelligence.

We often hear about AI achieving incredible feats, from beating grandmasters in chess to powering our smart assistants. But where do these intelligent machines get their best ideas? As it turns out, many of them come from the most sophisticated computer we know: the human brain.

Today, we're looking back at a pivotal paper from 2017 titled "Neuroscience-Inspired Artificial Intelligence," penned by a team of brilliant minds including Demis Hassabis, the co-founder of DeepMind, and published in the prestigious journal *Neuron*. This article isn't just a historical overview; it's a powerful argument for why understanding biological br

In [15]:
# Test error handling and edge cases
if google_provider:
    print('Testing error handling...')
    
    # Test with empty article
    empty_article = {'title': '', 'abstract': ''}
    
    try:
        empty_script = await script_generator.generate_podcast_script(empty_article)
        print('✅ Handled empty article gracefully')
        print(f'Response length: {len(empty_script)} characters')
    except Exception as e:
        print(f'❌ Error with empty article: {e}')
    
    # Test with very long prompt (if needed)
    # This helps understand API limits
    print('\n✅ Error handling tests completed')
else:
    print('❌ Skipping error handling tests - no provider available')

[32m2025-09-17 18:02:47[0m | [1mINFO[0m | [36m__main__[0m:[36mgenerate_podcast_script[0m:[36m32[0m - [1mGenerating podcast script for article: [0m


Testing error handling...
--- Generated Prompt ---
You are a science communicator creating a podcast script about recent research.
Create an engaging 5-minute podcast script summarizing these research articles:

Article:
Title: 
Authors: N/A
Journal: N/A
Publication Date: N/A
DOI: N/A

Abstract:

Format: Introduction, main findings, implications, conclusion.
Tone: Professional but accessible to a general audience.


--- Sending to Gemini API ---


[32m2025-09-17 18:03:14[0m | [1mINFO[0m | [36m__main__[0m:[36mgenerate_podcast_script[0m:[36m51[0m - [1mPodcast script generated successfully[0m


✅ Handled empty article gracefully
Response length: 4641 characters

✅ Error handling tests completed


In [15]:
# Summary and next steps
print('\\n' + '='*60)
print('TEST SUMMARY')
print('='*60)

if google_provider:
    print('✅ Google Gemini API integration: SUCCESS')
    print('✅ Podcast script generation: SUCCESS')
    print('✅ Configuration loading: SUCCESS')
    
    # Check if we used a real article
    if test_article.get('pmid') and test_article['pmid'] != 'dummy':
        print('✅ PubMed integration: SUCCESS')
        print(f'✅ Used real PubMed article: PMID {test_article["pmid"]}')
    else:
        print('⚠️  PubMed integration: Used fallback dummy article')
    
    print('\\n📝 Next steps:')
    print('1. Test with multiple real articles from different research areas')
    print('2. Integrate with IFC scraper for institutional articles')
    print('3. Test the full pipeline with embeddings and similarity search')
    print('4. Optimize prompt template for different article types')
    print('5. Add more error handling and retry logic')
    print('6. Test with articles from specific research domains (neuroscience, cancer, etc.)')
else:
    print('❌ Google Gemini API integration: FAILED')
    print('\\n🔧 Required fixes:')
    print('1. Check Google API key in config.yaml')
    print('2. Verify internet connectivity')
    print('3. Check API quotas and billing')
    print('4. Install google-generativeai package')

print('\\n📋 Configuration used:')
print(f'Provider: {config["llm"]["provider"]}')
print(f'Model: {config["llm"]["model"]}')
print(f'Temperature: {config["llm"]["temperature"]}')
print(f'Max tokens: {config["llm"]["max_tokens"]}')

if test_article.get('pmid'):
    print('\\n📄 Article used:')
    print(f'Title: {test_article["title"][:80]}{"..." if len(test_article["title"]) > 80 else ""}')
    print(f'Journal: {test_article["journal"]}')
    print(f'PubMed ID: {test_article.get("pmid", "N/A")}')
    print(f'Abstract length: {len(test_article["abstract"])} characters')

TEST SUMMARY
✅ Google Gemini API integration: SUCCESS
✅ Podcast script generation: SUCCESS
✅ Configuration loading: SUCCESS
✅ PubMed integration: SUCCESS
✅ Used real PubMed article: PMID 28728020
\n📝 Next steps:
1. Test with multiple real articles from different research areas
2. Integrate with IFC scraper for institutional articles
3. Test the full pipeline with embeddings and similarity search
4. Optimize prompt template for different article types
5. Add more error handling and retry logic
6. Test with articles from specific research domains (neuroscience, cancer, etc.)
\n📋 Configuration used:
Provider: google
Model: gemini-2.5-flash
Temperature: 0.7
Max tokens: 4000
\n📄 Article used:
Title: Neuroscience-Inspired Artificial Intelligence.
Journal: Neuron
PubMed ID: 28728020
Abstract length: 641 characters
