In [1]:
!pip install e2b_code_interpreter google-generativeai requests beautifulsoup4 pandas matplotlib seaborn

Collecting e2b_code_interpreter
  Downloading e2b_code_interpreter-1.1.1-py3-none-any.whl.metadata (2.5 kB)
Collecting e2b<2.0.0,>=1.0.4 (from e2b_code_interpreter)
  Downloading e2b-1.2.1-py3-none-any.whl.metadata (2.5 kB)
Downloading e2b_code_interpreter-1.1.1-py3-none-any.whl (12 kB)
Downloading e2b-1.2.1-py3-none-any.whl (95 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.4/95.4 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: e2b, e2b_code_interpreter
Successfully installed e2b-1.2.1 e2b_code_interpreter-1.1.1


In [2]:
import os
import json
import requests
import logging
import re
import urllib.parse
from bs4 import BeautifulSoup
from e2b_code_interpreter import Sandbox
import google.generativeai as genai
import base64
import matplotlib.pyplot as plt
from io import BytesIO
import datetime
import seaborn as sns
import numpy as np

In [5]:
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

class ResearchAgent:
    def __init__(self, gemini_api_key, e2b_api_key=None):
        self.research_results = {
            "topic": "",
            "summary": "",
            "findings": [],
            "data_analysis": {},
            "visualizations": [],
            "data_sources": [],
            "analysis": {}
        }

        if e2b_api_key:
            os.environ["E2B_API_KEY"] = e2b_api_key

        genai.configure(api_key=gemini_api_key)
        self.gemini_model = genai.GenerativeModel("gemini-1.5-flash")
        self.sandbox = Sandbox()

    def enhanced_web_search(self, query):
        """Vyhledávání s podporou E2B Sandbox"""
        search_results = self.direct_web_search(query)
        academic_results = self.search_academic_sources(query)
        return search_results + academic_results

    def direct_web_search(self, query):
        """Webový vyhledávač využívající E2B sandbox"""
        search_code = f"""
    import requests
    import urllib.parse
    from bs4 import BeautifulSoup
    import json

    query = "{query}"
    headers = {{
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US,en;q=0.9',
    }}

    engines = [
        ("google", f"https://www.google.com/search?q={{urllib.parse.quote_plus(query)}}"),
        ("bing", f"https://www.bing.com/search?q={{urllib.parse.quote_plus(query)}}")
    ]

    selectors = {{
        "google": {{
            "result": "div.g",
            "title": "h3",
            "url": "a",
            "snippet": "div.VwiC3b"
        }},
        "bing": {{
            "result": "li.b_algo",
            "title": "h2",
            "url": "h2 a",
            "snippet": "p"
        }}
    }}

    results = []
    for engine, url in engines:
        try:
            response = requests.get(url, headers=headers, timeout=15)
            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                # Parse search results based on engine
                for result in soup.select(selectors[engine]["result"]):
                    title_elem = result.select_one(selectors[engine]["title"])
                    link_elem = result.select_one(selectors[engine]["url"])
                    snippet_elem = result.select_one(selectors[engine]["snippet"])

                    if title_elem and link_elem:
                        title = title_elem.text.strip()
                        link = link_elem.get('href', '')
                        snippet = snippet_elem.text.strip() if snippet_elem else ""

                        if engine == "google" and link.startswith('/url?q='):
                            link = link.split('/url?q=')[1].split('&')[0]

                        results.append({{
                            "title": title,
                            "url": link,
                            "snippet": snippet,
                            "engine": engine
                        }})
        except Exception as e:
            print(f"{{engine.capitalize()}} search failed: {{e}}")

    print(json.dumps(results[:10]))
    """
        result = self.execute_in_sandbox(search_code)
        try:
            return json.loads(result) if result else []
        except:
            return []


    def parse_search_results(self, soup, engine):
        """Unifikovaný parser výsledků pro různé vyhledávače"""
        selectors = {
            "google": {
                "result": "div.g",
                "title": "h3",
                "url": "a",
                "snippet": "div.VwiC3b"
            },
            "bing": {
                "result": "li.b_algo",
                "title": "h2",
                "url": "h2 a",
                "snippet": "p"
            }
        }

        results = []
        for result in soup.select(selectors[engine]["result"]):
            title_elem = result.select_one(selectors[engine]["title"])
            link_elem = result.select_one(selectors[engine]["url"])
            snippet_elem = result.select_one(selectors[engine]["snippet"])

            if title_elem and link_elem:
                title = title_elem.text.strip()
                link = link_elem.get('href', '')
                snippet = snippet_elem.text.strip() if snippet_elem else ""

                if engine == "google" and link.startswith('/url?q='):
                    link = link.split('/url?q=')[1].split('&')[0]

                results.append({
                    "title": title,
                    "url": link,
                    "snippet": snippet,
                    "engine": engine
                })

        return results

    def search_academic_sources(self, query):
        """Akademické vyhledávání využívající E2B sandbox"""
        search_code = f"""
    import requests
    import urllib.parse
    from bs4 import BeautifulSoup
    import json

    query = "{query}"
    results = []

    try:
        arxiv_url = f"http://export.arxiv.org/api/query?search_query=all:{{urllib.parse.quote_plus(query)}}&start=0&max_results=10"
        response = requests.get(arxiv_url, timeout=10)

        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'xml')
            for entry in soup.find_all('entry'):
                results.append({{
                    "title": entry.find('title').text.strip(),
                    "url": entry.find('id').text.strip(),
                    "snippet": entry.find('summary').text.strip() if entry.find('summary') else "",
                    "source": "arXiv"
                }})
    except Exception as e:
        print(f"ArXiv search error: {{e}}")

    print(json.dumps(results))
    """
        result = self.execute_in_sandbox(search_code)
        try:
            return json.loads(result) if result else []
        except:
            return []


    def execute_in_sandbox(self, code):
        try:
            execution = self.sandbox.run_code(code)
            return execution.text if execution else None
        except Exception as sandbox_error:
            logging.error(f"Sandbox failure: {sandbox_error}")
            return json.dumps({"error": str(sandbox_error)})

    def advanced_analysis(self, data):
        """Rozšířená analýza s využitím E2B Sandbox"""
        analysis_code = f"""
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

text = '''{' '.join([d.get('title', '') + ' ' + d.get('snippet', '') for d in data])}'''

# Základní frekvenční analýza
words = [word.lower() for word in text.split() if len(word) > 3]
word_counts = Counter(words).most_common(20)

# Vytvoření vizualizace
plt.figure(figsize=(12, 8))
pd.DataFrame(word_counts, columns=['Word', 'Count']).plot.barh(x='Word', y='Count')
plt.title('Word Frequency Analysis')
plt.tight_layout()
plt.savefig('/home/user/analysis.png')

print(json.dumps({{
    "word_counts": word_counts,
    "unique_words": len(set(words)),
    "total_words": len(words)
}}))
"""
        return self.execute_in_sandbox(analysis_code)

    def generate_report(self, topic, data):
        """Generování zprávy s využitím Gemini API"""
        prompt = f"""
Generate comprehensive technical report on: {topic}
Include sections:
1. Executive Summary
2. Methodology
3. Key Findings
4. Technical Analysis
5. Recommendations

Base your analysis on following data:
{json.dumps(data[:5], indent=2)}
"""
        try:
            response = self.gemini_model.generate_content(prompt)
            return response.text
        except Exception as e:
            logging.error(f"Report generation failed: {e}")
            return "Error generating report"

    def full_research_flow(self, topic):
        """Kompletní výzkumný workflow"""
        try:
            # Fáze 1: Sběr dat
            search_results = self.enhanced_web_search(topic)

            # Fáze 2: Analýza
            analysis_results = self.advanced_analysis(search_results)

            # Fáze 3: Generování reportu
            return self.generate_report(topic, search_results)

            vis_data = agent.generate_visualizations(search_results)
            display_visualizations(vis_data)

            question_input.disabled = False
            ask_button.disabled = False

        except Exception as e:
            logging.error(f"Research failed: {e}")
            return f"Research error: {str(e)}"

class EnhancedResearchAgent(ResearchAgent):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.research_results["visualizations"] = []
        self.research_results["qa_history"] = []

    def generate_concept_network(self, data):
        """Creates a network visualization of related concepts"""
        network_code = """
        import networkx as nx
        import matplotlib.pyplot as plt
        import pandas as pd
        from collections import Counter
        import numpy as np
        from sklearn.feature_extraction.text import CountVectorizer
        import re

        # Extract text from data
        text = ' '.join([d.get('title', '') + ' ' + d.get('snippet', '') for d in data])

        # Extract key terms (entities)
        def extract_entities(text):
            # Basic entity extraction using regex patterns
            # Could be enhanced with NER models
            patterns = [
                r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)', # Proper nouns
                r'([a-z]+(?:[ -][a-z]+)*)', # Common terms
            ]
            entities = []
            for pattern in patterns:
                matches = re.finditer(pattern, text)
                for match in matches:
                    entity = match.group(0)
                    if len(entity) > 4:
                        entities.append(entity.lower())
            return [e for e in entities if e not in ['the', 'and', 'for', 'with', 'this', 'that']]

        entities = extract_entities(text)
        entity_counts = Counter(entities)
        top_entities = [e for e, c in entity_counts.most_common(15)]

        # Create co-occurrence matrix
        sentences = re.split(r'[.!?]', text)
        entity_matrix = np.zeros((len(top_entities), len(top_entities)))

        for sentence in sentences:
            present_entities = [e for e in top_entities if e in sentence.lower()]
            for i, e1 in enumerate(present_entities):
                for j, e2 in enumerate(present_entities):
                    if i != j:
                        entity_matrix[top_entities.index(e1)][top_entities.index(e2)] += 1

        # Create and visualize network
        G = nx.Graph()

        # Add nodes
        for entity in top_entities:
            G.add_node(entity, weight=entity_counts[entity])

        # Add edges
        for i, e1 in enumerate(top_entities):
            for j, e2 in enumerate(top_entities):
                if i < j and entity_matrix[i][j] > 0:
                    G.add_edge(e1, e2, weight=entity_matrix[i][j])

        # Visualization
        plt.figure(figsize=(12, 10))
        pos = nx.spring_layout(G, seed=42)

        # Node sizes based on frequency
        node_sizes = [G.nodes[node]['weight'] * 100 for node in G.nodes()]

        # Edge widths based on co-occurrence
        edge_widths = [G.edges[edge]['weight'] * 0.5 for edge in G.edges()]

        # Draw the network
        nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color='skyblue', alpha=0.8)
        nx.draw_networkx_edges(G, pos, width=edge_widths, alpha=0.5, edge_color='gray')
        nx.draw_networkx_labels(G, pos, font_size=10, font_family='sans-serif')

        plt.title('Concept Relationship Network', fontsize=16)
        plt.axis('off')
        plt.tight_layout()
        plt.savefig('/home/user/concept_network.png', dpi=300, bbox_inches='tight')

        # Return as encoded image
        import base64
        from io import BytesIO

        buffer = BytesIO()
        plt.savefig(buffer, format='png', dpi=300, bbox_inches='tight')
        buffer.seek(0)
        image_png = buffer.getvalue()

        encoded = base64.b64encode(image_png).decode('utf-8')
        return encoded
        """
        return self.execute_in_sandbox(network_code)

    def generate_topic_clusters(self, data):
        """Creates topic cluster visualization using dimensionality reduction"""
        cluster_code = """
        import pandas as pd
        import numpy as np
        import matplotlib.pyplot as plt
        from sklearn.feature_extraction.text import TfidfVectorizer
        from sklearn.decomposition import PCA
        from sklearn.cluster import KMeans
        import base64
        from io import BytesIO

        # Extract text data
        documents = [d.get('title', '') + ' ' + d.get('snippet', '') for d in data if 'title' in d and 'snippet' in d]

        if len(documents) < 5:
            # Not enough data for meaningful clustering
            plt.figure(figsize=(8, 6))
            plt.text(0.5, 0.5, 'Insufficient data for topic clustering',
                     horizontalalignment='center', fontsize=14)
            plt.axis('off')

            buffer = BytesIO()
            plt.savefig(buffer, format='png')
            buffer.seek(0)
            return base64.b64encode(buffer.getvalue()).decode('utf-8')

        # Create TF-IDF features
        vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
        X = vectorizer.fit_transform(documents)

        # Determine optimal number of clusters (simple method)
        max_clusters = min(8, len(documents) - 1)
        inertias = []
        for k in range(2, max_clusters + 1):
            kmeans = KMeans(n_clusters=k, random_state=42)
            kmeans.fit(X)
            inertias.append(kmeans.inertia_)

        # Find elbow point or use default of 4
        optimal_clusters = 4
        if len(inertias) > 2:
            diffs = np.diff(inertias)
            diffs_of_diffs = np.diff(diffs)
            if len(diffs_of_diffs) > 0:
                elbow_idx = np.argmax(diffs_of_diffs) + 2
                optimal_clusters = elbow_idx + 2  # +2 because we started from 2

        optimal_clusters = min(optimal_clusters, max_clusters)

        # Perform k-means clustering
        kmeans = KMeans(n_clusters=optimal_clusters, random_state=42)
        clusters = kmeans.fit_predict(X)

        # Dimensionality reduction for visualization
        pca = PCA(n_components=2)
        coords = pca.fit_transform(X.toarray())

        # Create a DataFrame for plotting
        df = pd.DataFrame({
            'x': coords[:, 0],
            'y': coords[:, 1],
            'cluster': clusters,
            'document': [d[:50] + '...' if len(d) > 50 else d for d in documents]
        })

        # Determine cluster centers
        centers = []
        for i in range(optimal_clusters):
            subset = df[df['cluster'] == i]
            centers.append((subset['x'].mean(), subset['y'].mean()))

        # Extract top terms for each cluster
        def get_top_terms(kmeans, vectorizer, n_terms=5):
            order_centroids = kmeans.cluster_centers_.argsort()[:, ::-1]
            terms = vectorizer.get_feature_names_out()
            cluster_terms = []
            for i in range(kmeans.n_clusters):
                top_terms = [terms[ind] for ind in order_centroids[i, :n_terms]]
                cluster_terms.append(', '.join(top_terms))
            return cluster_terms

        top_terms = get_top_terms(kmeans, vectorizer)

        # Visualization
        plt.figure(figsize=(12, 10))

        # Plot points
        colors = plt.cm.tab10(np.linspace(0, 1, optimal_clusters))
        for i in range(optimal_clusters):
            subset = df[df['cluster'] == i]
            plt.scatter(subset['x'], subset['y'], c=[colors[i]], label=f'Cluster {i+1}', s=80, alpha=0.7)

        # Add cluster labels with top terms
        for i, (x, y) in enumerate(centers):
            plt.annotate(f'Cluster {i+1}:\n{top_terms[i]}',
                        (x, y),
                        fontsize=9,
                        bbox=dict(boxstyle="round,pad=0.5", fc='white', alpha=0.7),
                        ha='center')

        plt.title('Topic Clusters in Research Results', fontsize=16)
        plt.legend(loc='best')
        plt.grid(True, linestyle='--', alpha=0.5)

        # Remove axes ticks as they don't have interpretable meaning after PCA
        plt.xticks([])
        plt.yticks([])

        plt.tight_layout()

        # Convert to base64
        buffer = BytesIO()
        plt.savefig(buffer, format='png', dpi=300)
        buffer.seek(0)

        return base64.b64encode(buffer.getvalue()).decode('utf-8')
        """
        return self.execute_in_sandbox(cluster_code)

    def generate_sentiment_visualization(self, data):
        """Creates sentiment analysis visualization of research results"""
        sentiment_code = """
        import pandas as pd
        import matplotlib.pyplot as plt
        import numpy as np
        from textblob import TextBlob
        import base64
        from io import BytesIO

        # Extract text
        texts = [d.get('title', '') + '. ' + d.get('snippet', '') for d in data]
        sources = [d.get('engine', 'unknown') if 'engine' in d else d.get('source', 'unknown') for d in data]

        # Analyze sentiment
        sentiments = []
        for text in texts:
            if text:
                analysis = TextBlob(text)
                sentiments.append({
                    'polarity': analysis.sentiment.polarity,
                    'subjectivity': analysis.sentiment.subjectivity
                })
            else:
                sentiments.append({
                    'polarity': 0,
                    'subjectivity': 0
                })

        # Create DataFrame
        df = pd.DataFrame({
            'text': [t[:50] + '...' if len(t) > 50 else t for t in texts],
            'source': sources,
            'polarity': [s['polarity'] for s in sentiments],
            'subjectivity': [s['subjectivity'] for s in sentiments]
        })

        # Create visualization
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

        # Scatter plot: polarity vs. subjectivity
        sources_unique = df['source'].unique()
        colors = plt.cm.tab10(np.linspace(0, 1, len(sources_unique)))
        source_color_map = {source: color for source, color in zip(sources_unique, colors)}

        for source in sources_unique:
            subset = df[df['source'] == source]
            ax1.scatter(subset['polarity'], subset['subjectivity'],
                       label=source, alpha=0.7,
                       c=[source_color_map[source]] * len(subset))

        ax1.set_xlim([-1.1, 1.1])
        ax1.set_ylim([-0.1, 1.1])
        ax1.set_xlabel('Polarity (Negative ← → Positive)', fontsize=12)
        ax1.set_ylabel('Subjectivity (Factual ← → Opinion)', fontsize=12)
        ax1.axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)
        ax1.axvline(x=0, color='gray', linestyle='--', alpha=0.5)
        ax1.grid(True, linestyle='--', alpha=0.3)
        ax1.set_title('Sentiment Analysis of Research Results', fontsize=14)

        # Add quadrant labels
        ax1.text(0.8, 0.9, 'Positive & Subjective', fontsize=9, ha='center')
        ax1.text(-0.8, 0.9, 'Negative & Subjective', fontsize=9, ha='center')
        ax1.text(0.8, 0.1, 'Positive & Factual', fontsize=9, ha='center')
        ax1.text(-0.8, 0.1, 'Negative & Factual', fontsize=9, ha='center')

        # Source comparison
        source_stats = df.groupby('source')[['polarity', 'subjectivity']].mean()
        source_counts = df['source'].value_counts()
        source_stats = source_stats.join(source_counts.rename('count'))

        # Sort for better visualization
        source_stats = source_stats.sort_values('polarity')

        # Bar chart for average sentiment by source
        bars = ax2.bar(source_stats.index, source_stats['polarity'],
                      alpha=0.7,
                      color=[source_color_map[s] for s in source_stats.index])

        # Add subjectivity as line
        ax2_twin = ax2.twinx()
        ax2_twin.plot(source_stats.index, source_stats['subjectivity'], 'ro-', alpha=0.6)
        ax2_twin.set_ylabel('Avg. Subjectivity', color='r', fontsize=12)
        ax2_twin.tick_params(axis='y', colors='r')

        # Customize bar chart
        ax2.set_xlabel('Source', fontsize=12)
        ax2.set_ylabel('Avg. Polarity', fontsize=12)
        ax2.set_title('Sentiment by Source', fontsize=14)
        ax2.grid(True, axis='y', linestyle='--', alpha=0.3)

        # Add count annotations
        for i, (idx, row) in enumerate(source_stats.iterrows()):
            ax2.text(i, row['polarity'] + (0.1 if row['polarity'] >= 0 else -0.1),
                    f'n={row["count"]}', ha='center', va='center', fontsize=9)

        plt.tight_layout()

        # Convert to base64
        buffer = BytesIO()
        plt.savefig(buffer, format='png', dpi=300)
        buffer.seek(0)

        return base64.b64encode(buffer.getvalue()).decode('utf-8')
        """
        return self.execute_in_sandbox(sentiment_code)

    def generate_results_dashboard(self, data):
        """Creates an integrated dashboard of research metrics"""
        dashboard_code = """
        try:
         import pandas as pd
         import matplotlib.pyplot as plt
         import matplotlib.gridspec as gridspec
         import numpy as np
         from collections import Counter
         import base64
         from io import BytesIO
         import re
         from datetime import datetime

         # Extract data
         all_text = ' '.join([d.get('title', '') + ' ' + d.get('snippet', '') for d in data])

         # Setup the dashboard
         fig = plt.figure(figsize=(15, 12))
         gs = gridspec.GridSpec(3, 2, height_ratios=[1, 1, 1.2])

         # 1. Content type distribution
         ax1 = plt.subplot(gs[0, 0])

         # Categorize content
         def categorize_content(item):
             text = (item.get('title', '') + ' ' + item.get('snippet', '')).lower()
             if any(term in text for term in ['study', 'research', 'paper', 'journal']):
                 return 'Academic'
             elif any(term in text for term in ['news', 'article', 'report', 'today', 'latest']):
                 return 'News'
             elif any(term in text for term in ['blog', 'opinion', 'view', 'perspective']):
                 return 'Blog/Opinion'
             elif any(term in text for term in ['wiki', 'encyclopedia']):
                 return 'Wiki/Reference'
             else:
                 return 'Other'

         content_types = [categorize_content(d) for d in data]
         type_counts = Counter(content_types)

         # Plot pie chart
         type_labels = list(type_counts.keys())
         type_values = list(type_counts.values())
         type_colors = plt.cm.Pastel1(range(len(type_counts)))

         wedges, texts, autotexts = ax1.pie(
             type_values,
             labels=type_labels,
             colors=type_colors,
             autopct='%1.1f%%',
             startangle=90,
             wedgeprops={'edgecolor': 'w', 'linewidth': 1}
         )
         for text in texts:
             text.set_fontsize(9)
         for autotext in autotexts:
             autotext.set_fontsize(9)
             autotext.set_color('black')

         ax1.set_title('Content Type Distribution', fontsize=12)

         # 2. Source distribution
         ax2 = plt.subplot(gs[0, 1])

         # Get sources
         sources = [d.get('engine', 'unknown') if 'engine' in d else d.get('source', 'unknown') for d in data]
         source_counts = Counter(sources)

         # Sort for better visualization
         source_df = pd.DataFrame({
             'source': list(source_counts.keys()),
             'count': list(source_counts.values())
         }).sort_values('count', ascending=True)

         bars = ax2.barh(source_df['source'], source_df['count'],
                        color=plt.cm.Blues(np.linspace(0.4, 0.8, len(source_df))))

         ax2.set_xlabel('Number of Results', fontsize=10)
         ax2.set_title('Results by Source', fontsize=12)
         ax2.grid(axis='x', linestyle='--', alpha=0.7)

         # Add count labels
         for i, v in enumerate(source_df['count']):
             ax2.text(v + 0.1, i, str(v), va='center', fontsize=9)

         # 3. Keyword frequency
         ax3 = plt.subplot(gs[1, :])

         # Process text
         words = re.findall(r'\\b[a-zA-Z]{4,}\\b', all_text.lower())
         word_counts = Counter(words)

         # Remove common stopwords
         stopwords = ['this', 'that', 'with', 'from', 'have', 'they', 'will', 'what',
                     'when', 'make', 'like', 'time', 'there', 'their', 'your']
         for word in stopwords:
             if word in word_counts:
                 del word_counts[word]

         word_df = pd.DataFrame({
             'word': list(word_counts.keys()),
             'count': list(word_counts.values())
         }).sort_values('count', ascending=False).head(15)

         bars = ax3.bar(word_df['word'], word_df['count'],
                      color=plt.cm.viridis(np.linspace(0, 0.8, len(word_df))))

         ax3.set_ylabel('Frequency', fontsize=10)
         ax3.set_title('Top Keywords in Research Results', fontsize=12)
         ax3.grid(axis='y', linestyle='--', alpha=0.7)
         plt.xticks(rotation=45, ha='right', fontsize=9)

         # Add count labels
         for i, v in enumerate(word_df['count']):
             ax3.text(i, v + 0.5, str(v), ha='center', fontsize=8)

         # 4. Research metrics dashboard
         ax4 = plt.subplot(gs[2, :])

         # Calculate metrics
         metrics = {
             'Total Results': len(data),
             'Avg. Title Length': np.mean([len(d.get('title', '')) for d in data if 'title' in d]),
             'Avg. Snippet Length': np.mean([len(d.get('snippet', '')) for d in data if 'snippet' in d]),
             'Unique Keywords': len(set(words)),
             'Total Words': len(words),
             'Diversity Score': len(set(words)) / len(words) if words else 0,
         }

         # Create a table
         ax4.axis('tight')
         ax4.axis('off')

         metric_names = list(metrics.keys())
         metric_values = [metrics[m] if not isinstance(metrics[m], float) else f"{metrics[m]:.2f}"
                         for m in metric_names]

         table = ax4.table(
             cellText=[[m, v] for m, v in zip(metric_names, metric_values)],
             colLabels=['Metric', 'Value'],
             loc='center',
             cellLoc='left',
             colWidths=[0.5, 0.3]
         )

         table.auto_set_font_size(False)
         table.set_fontsize(10)
         table.scale(1, 1.5)

         # Style the table
         for key, cell in table.get_celld().items():
             if key[0] == 0:  # Header row
                 cell.set_text_props(fontweight='bold', color='white')
                 cell.set_facecolor('#4472C4')
             else:  # Data rows
                 cell.set_facecolor('#E6F0FF' if key[0] % 2 == 0 else '#D4E4FC')

         # Dashboard title
         plt.suptitle('Research Results Dashboard', fontsize=16, y=0.98)
         current_time = datetime.now().strftime('%Y-%m-%d %H:%M')
         plt.figtext(0.5, 0.01, f'Generated on {current_time}', ha='center', fontsize=8)

         plt.tight_layout(rect=[0, 0.03, 1, 0.95])

         # Convert to base64
         buffer = BytesIO()
         plt.savefig(buffer, format='png', dpi=300, bbox_inches='tight')
         buffer.seek(0)
         return base64.b64encode(buffer.getvalue()).decode('utf-8')
        except Exception as e:
         logging.error(f"Error generating dashboard: {e}")
         return None
        """
        return self.execute_in_sandbox(dashboard_code)

    def generate_visualizations(self, data):
        visualizations = []

        try:
            dashboard_img = self.generate_results_dashboard(data)
            if dashboard_img:
                visualizations.append({
                    'type': 'research_dashboard',
                    'title': 'Research Results Dashboard',
                    'image': dashboard_img
                })

            network_img = self.generate_concept_network(data)
            if network_img:
                visualizations.append({
                    'type': 'concept_network',
                    'title': 'Concept Relationship Network',
                    'image': network_img
                })

            clusters_img = self.generate_topic_clusters(data)
            if clusters_img:
                visualizations.append({
                    'type': 'topic_clusters',
                    'title': 'Topic Clusters',
                    'image': clusters_img
                })

            sentiment_img = self.generate_sentiment_visualization(data)
            if sentiment_img:
                visualizations.append({
                    'type': 'sentiment_analysis',
                    'title': 'Sentiment Analysis',
                    'image': sentiment_img
                })
        except Exception as e:
            logging.error(f"Error generating visualizations: {e}")

        self.research_results["visualizations"] = visualizations
        return visualizations



    def answer_question(self, question):
        """Enhanced Q&A system with context awareness"""
        context = f"""
        Research Topic: {self.research_results['topic']}
        Key Findings: {self.research_results.get('findings', [])[:3]}
        Data Sources: {self.research_results.get('data_sources', [])[:5]}
        """

        prompt = f"""
        You are a research assistant analyzing: {self.research_results['topic']}
        Answer this question based on the research context below:
        Question: {question}

        Context:
        {context}

        Provide a detailed, technical answer with supporting evidence from the research.
        If unsure, state the limitations clearly.
        """

        try:
            response = self.gemini_model.generate_content(prompt)
            answer = response.text

            # Store Q&A history
            self.research_results["qa_history"].append({
                "question": question,
                "answer": answer,
                "timestamp": datetime.datetime.now().isoformat()
            })

            return answer
        except Exception as e:
            logging.error(f"Q&A failed: {e}")
            return f"Error answering question: {str(e)}"

    def full_research_flow(self, topic):
        try:
            # Set the research topic
            self.research_results['topic'] = topic

            # Phase 1: Data collection
            search_results = self.enhanced_web_search(topic)
            self.research_results['data_sources'] = search_results

            # Phase 2: Analysis
            analysis_results = self.advanced_analysis(search_results)

            # Phase 3: Generate visualizations
            visualizations = self.generate_visualizations(search_results)

            # Phase 4: Generate report
            report = self.generate_report(topic, search_results)
            self.research_results['summary'] = report

            return report
        except Exception as e:
            logging.error(f"Research failed: {e}")
            return f"Research error: {str(e)}"



In [7]:
import ipywidgets as widgets
from IPython.display import display, Markdown, clear_output, Image

global_agent = None

def create_research_ui():
    # Create main output area first
    output_area = widgets.Output()  # <-- Initialize early

    # Create input widgets
    topic_input = widgets.Text(
        value='',
        placeholder='Enter research topic (e.g., "Quantum computing applications")',
        description='Topic:',
        layout=widgets.Layout(width='80%')
    )

    # Create API key inputs with default values from the provided code
    gemini_key_input = widgets.Password(
        value='',
        placeholder='Enter your Gemini API key',
        description='Gemini API Key:',
        layout=widgets.Layout(width='80%')
    )

    e2b_key_input = widgets.Password(
        value='',
        placeholder='Enter your E2B API key',
        description='E2B API Key:',
        layout=widgets.Layout(width='80%')
    )

    # Create report type selection
    report_type = widgets.RadioButtons(
        options=[('Scientific Research', 'scientific')],
        value='scientific',
        description='Report Type:',
        disabled=False
    )

    # Create button
    research_button = widgets.Button(
        description='Start Research',
        button_style='primary',
        tooltip='Click to start deep research',
        icon='search'
    )


    tabs = widgets.Tab()
    tabs.children = [
        output_area,
        widgets.Output(description='Visualizations'),
        widgets.Output(description='Q&A')
    ]
    tabs.set_title(0, 'Report')
    tabs.set_title(1, 'Visualizations')
    tabs.set_title(2, 'Q&A')

    # Q&A components
    question_input = widgets.Text(
        placeholder='Ask about the research...',
        layout=widgets.Layout(width='80%')
    )

    ask_button = widgets.Button(
        description='Ask Question',
        button_style='info',
        icon='comment'
    )

    controls = widgets.VBox([
        gemini_key_input,
        e2b_key_input,
        report_type,
        topic_input,
        research_button,
        widgets.HBox([question_input, ask_button])
    ])


    def display_visualizations(visualizations):
        with tabs.children[1]:
            clear_output()
            if not visualizations:
                display(Markdown("### No visualizations available"))
                return

            for viz in visualizations:
                display(Markdown(f"### {viz['title']}"))
                if viz['image'] is not None:
                    try:
                        display(Image(base64.b64decode(viz['image'])))
                    except TypeError:
                        display(Markdown("*Error: Unable to display this visualization.*"))
                else:
                    display(Markdown("*Visualization data is missing.*"))

                # Add description below each visualization
                if viz['type'] == 'research_dashboard':
                    display(Markdown("*This dashboard provides an overview of all research metrics and distributions.*"))
                elif viz['type'] == 'concept_network':
                    display(Markdown("*This network graph shows relationships between key concepts. Larger nodes represent more frequent terms, and thicker edges indicate stronger relationships.*"))
                elif viz['type'] == 'topic_clusters':
                    display(Markdown("*This visualization groups similar content into clusters, with the most relevant terms for each cluster shown.*"))
                elif viz['type'] == 'sentiment_analysis':
                    display(Markdown("*This chart analyzes the emotional tone of research results, plotting content on scales of positivity (negative to positive) and subjectivity (factual to opinion).*"))




    def handle_question(b):
        global global_agent
        with tabs.children[2]:
            clear_output()
            if not question_input.value:
                print("Please enter a question")
                return

            if global_agent is None:
                print("Please start a research first before asking questions")
                return

            try:
                answer = global_agent.answer_question(question_input.value)
                display(Markdown(f"**Q:** {question_input.value}"))
                display(Markdown(f"**A:** {answer}"))
            except Exception as e:
                print(f"Error answering question: {str(e)}")


    # Define button click handler
    def on_button_click(b):
        global global_agent
        with output_area:
            output_area.clear_output()

            # Validation
            if not topic_input.value:
                print("Please enter a research topic.")
                return
            if not gemini_key_input.value:
                print("Please enter a Gemini API key.")
                return

            try:
                # Initialize agent
                global_agent = EnhancedResearchAgent(
                    gemini_api_key=gemini_key_input.value,
                    e2b_api_key=e2b_key_input.value if e2b_key_input.value else None
                )

                # Execute full research flow
                report = global_agent.full_research_flow(topic_input.value)

                # Display report
                display(Markdown("# Research Report"))
                display(Markdown(report))

                # Display visualizations in their tab
                display_visualizations(global_agent.research_results["visualizations"])

            except Exception as e:
                print(f"Research failed: {str(e)}")


    # Connect button click handler
    research_button.on_click(on_button_click)
    ask_button.on_click(handle_question)

    # Arrange UI elements
    display(Markdown("# Deep Research Agent"))
    display(Markdown("""
    This tool conducts comprehensive research on any topic using AI and autonomous web browsing.

    ### Capabilities:
    - Browsing the web for relevant information
    - Executing code to analyze data
    - Generating in-depth analysis reports with visualizations

    To get started, enter your API keys and a research topic below.
    """))

#    display(gemini_key_input)
#    display(e2b_key_input)
#    display(report_type)
#    display(topic_input)
#    display(research_button)
#    display(output_area)
    display(widgets.HBox([controls, tabs]))

# Run the UI
create_research_ui()


# Deep Research Agent


    This tool conducts comprehensive research on any topic using AI and autonomous web browsing.

    ### Capabilities:
    - Browsing the web for relevant information
    - Executing code to analyze data
    - Generating in-depth analysis reports with visualizations

    To get started, enter your API keys and a research topic below.
    

HBox(children=(VBox(children=(Password(description='Gemini API Key:', layout=Layout(width='80%'), placeholder=…