In [None]:
import json
import subprocess
import sys
import os
import time
from typing import List, Dict, Any
from dataclasses import dataclass
from enum import Enum

# Try different import approaches for Google Generative AI.
try:
    import google.generativeai as genai
    GENAI_AVAILABLE = True
    print("✅ Google Generative AI imported successfully")
except ImportError:
    print("❌ Google Generative AI not available. Install with: pip install google-generativeai")
    GENAI_AVAILABLE = False

class TaskStatus(Enum):
    PENDING = "pending"
    IN_PROGRESS = "in_progress"
    COMPLETED = "completed"
    FAILED = "failed"

@dataclass
class Task:
    id: int
    description: str
    status: TaskStatus
    code_to_execute: str = ""
    result: str = ""
    error: str = ""

class GeminiAutoGPT:
    def __init__(self, objective: str, api_key: str = None):
        self.objective = objective
        self.tasks: List[Task] = []
        self.task_counter = 0
        self.max_iterations = 10
        self.current_iteration = 0
        self.use_gemini = False

        # Configure Gemini if available.
        if GENAI_AVAILABLE and api_key and api_key != "YOUR_GEMINI_API_KEY_HERE":
            try:
                genai.configure(api_key=api_key)
                self.model = genai.GenerativeModel('gemini-1.5-flash')
                self.use_gemini = True
                print(f"✅ Gemini API configured successfully!")
            except Exception as e:
                print(f"⚠️ Warning: Could not configure Gemini API: {e}")
                print("Falling back to predefined tasks...")
                self.use_gemini = False
        else:
            print("⚠️ Gemini API not available. Using predefined tasks...")
            self.use_gemini = False

    def generate_tasks(self) -> List[str]:
        """Generate initial tasks based on the objective using Gemini or fallback"""
        if self.use_gemini:
            print("🤖 Asking Gemini to break down the objective into tasks...")

            prompt = f"""
            You are an AI assistant that breaks down objectives into specific, actionable tasks for a Python AutoGPT agent.

            Objective: {self.objective}

            Please break this objective down into 4-6 specific, sequential tasks that a Python script can execute.
            Each task should be:
            1. Specific and actionable
            2. Suitable for Python code execution
            3. Building upon previous tasks
            4. Clear and concise (one sentence each)

            Respond with ONLY a JSON array of task descriptions, like this:
            ["Task 1 description", "Task 2 description", "Task 3 description", ...]

            Do not include any other text or formatting.
            """

            try:
                response = self.model.generate_content(prompt)
                task_descriptions = json.loads(response.text.strip())
                print(f"✅ Gemini generated {len(task_descriptions)} tasks")
                return task_descriptions
            except Exception as e:
                print(f"❌ Error generating tasks with Gemini: {e}")
                print("Falling back to default tasks...")

        # Fallback tasks based on objective type.
        if "data analysis" in self.objective.lower() or "dataset" in self.objective.lower():
            return [
                "Import necessary libraries and set up the environment for data analysis",
                "Generate a comprehensive sample dataset with realistic business metrics",
                "Perform exploratory data analysis with descriptive statistics",
                "Create multiple visualizations including distribution plots and correlation heatmaps",
                "Generate advanced statistical insights and trend analysis",
                "Create a summary report with key findings and recommendations"
            ]
        elif "machine learning" in self.objective.lower():
            return [
                "Import ML libraries and prepare the environment",
                "Generate or load a suitable dataset for machine learning",
                "Perform data preprocessing and feature engineering",
                "Train multiple ML models and compare performance",
                "Evaluate models with cross-validation and metrics",
                "Create visualizations of model performance and predictions"
            ]
        else:
            return [
                "Import necessary libraries and set up the environment",
                "Generate sample data relevant to the objective",
                "Perform basic data exploration and analysis",
                "Create informative visualizations and charts",
                "Generate summary statistics and insights",
                "Create a final report with conclusions"
            ]

    def create_task(self, description: str) -> Task:
        """Create a new task"""
        self.task_counter += 1
        task = Task(
            id=self.task_counter,
            description=description,
            status=TaskStatus.PENDING
        )
        self.tasks.append(task)
        return task

    def generate_code_for_task(self, task: Task) -> str:
        """Generate Python code for a given task using Gemini or predefined templates"""
        if self.use_gemini:
            print(f"🤖 Asking Gemini to generate code for: {task.description}")

            # Get context from previous successful tasks.
            previous_context = ""
            for prev_task in self.tasks:
                if prev_task.status == TaskStatus.COMPLETED and prev_task.id < task.id:
                    previous_context += f"Task {prev_task.id}: {prev_task.description} - COMPLETED\n"

            prompt = f"""
            You are a Python code generator for an AutoGPT agent. Generate complete, executable Python code for the given task.

            Overall Objective: {self.objective}
            Current Task: {task.description}

            Previous completed tasks:
            {previous_context}

            Requirements:
            1. Generate complete, working Python code that accomplishes the task
            2. Include proper error handling and informative print statements
            3. Use libraries like pandas, numpy, matplotlib, seaborn as needed
            4. For data analysis tasks, create sample data if no data exists
            5. For visualization tasks, save plots to a 'plots' directory
            6. Use matplotlib with 'Agg' backend for non-interactive plotting
            7. Include detailed comments explaining the code
            8. Make the code robust and handle edge cases

            CRITICAL - For data generation and visualization:
            - Create realistic datasets with meaningful column names (not A, B, C)
            - Use domain-specific column names related to the objective
            - Ensure all plots have proper titles, axis labels, and legends
            - Use descriptive titles that explain what the visualization shows
            - Always label axes with the actual column names and units where appropriate
            - Add proper legends when using multiple series or categories
            - Make visualizations publication-ready with good formatting

            Respond with ONLY the Python code, no additional text or formatting.
            """

            try:
                response = self.model.generate_content(prompt)
                code = response.text.strip()

                # Clean up the code if it's wrapped in markdown.
                if code.startswith("```python"):
                    code = code[9:]
                if code.endswith("```"):
                    code = code[:-3]

                print(f"✅ Gemini generated {len(code.split())} lines of code")
                return code.strip()

            except Exception as e:
                print(f"❌ Error generating code with Gemini: {e}")
                print("Falling back to template code...")

        # Fallback to predefined code templates.
        return self.get_template_code(task)

    def get_template_code(self, task: Task) -> str:
        """Generate template code based on task description"""
        task_desc = task.description.lower()

        if "import" in task_desc and "libraries" in task_desc:
            return """
# Task: Import necessary libraries and set up environment
print("Setting up environment and importing libraries...")

import os
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use('Agg')  # Use non-interactive backend
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Create directories if they don't exist
os.makedirs('plots', exist_ok=True)
os.makedirs('data', exist_ok=True)

print("✅ Libraries imported successfully!")
print("✅ Directories created!")
"""

        elif "generate" in task_desc and "data" in task_desc:
            return """
# Task: Generate comprehensive sample dataset
print("Generating realistic sample dataset...")

np.random.seed(42)  # For reproducibility

# Generate sample business data
n_samples = 1000
dates = pd.date_range(start='2023-01-01', periods=n_samples, freq='D')

# Create realistic business metrics
df = pd.DataFrame({
    'date': dates,
    'sales_revenue': np.random.normal(50000, 15000, n_samples).clip(min=0),
    'customers': np.random.poisson(200, n_samples),
    'marketing_spend': np.random.normal(8000, 2000, n_samples).clip(min=0),
    'employee_count': np.random.randint(50, 200, n_samples),
    'product_units_sold': np.random.poisson(500, n_samples),
    'customer_satisfaction': np.random.normal(4.2, 0.8, n_samples).clip(1, 5),
    'region': np.random.choice(['North', 'South', 'East', 'West'], n_samples),
    'season': pd.Categorical([
        'Winter' if month in [12, 1, 2] else
        'Spring' if month in [3, 4, 5] else
        'Summer' if month in [6, 7, 8] else 'Fall'
        for month in dates.month
    ])
})

# Add some realistic correlations
df['profit_margin'] = (df['sales_revenue'] - df['marketing_spend']) / df['sales_revenue']
df['revenue_per_customer'] = df['sales_revenue'] / df['customers']

print(f"✅ Generated dataset with {len(df)} rows and {len(df.columns)} columns")
print(f"Columns: {list(df.columns)}")
print(f"Date range: {df['date'].min()} to {df['date'].max()}")

# Save the dataset
df.to_csv('data/business_data.csv', index=False)
print("✅ Dataset saved to data/business_data.csv")
"""

        elif "exploratory" in task_desc or "descriptive" in task_desc:
            return """
# Task: Perform exploratory data analysis
print("Performing exploratory data analysis...")

# Load data if it exists, otherwise use current df
try:
    if os.path.exists('data/business_data.csv'):
        df = pd.read_csv('data/business_data.csv')
        df['date'] = pd.to_datetime(df['date'])
        print("✅ Loaded dataset from file")
    elif 'df' not in globals():
        print("❌ No dataset found, cannot perform analysis")
        raise Exception("No dataset available")
    else:
        print("✅ Using existing dataset")
except:
    print("❌ Error loading dataset")
    raise

# Display basic information about the dataset
print("\\n=== DATASET OVERVIEW ===")
print(f"Dataset shape: {df.shape}")
print(f"\\nColumn types:\\n{df.dtypes}")

print("\\n=== MISSING VALUES ===")
missing_values = df.isnull().sum()
print(missing_values[missing_values > 0] if missing_values.sum() > 0 else "No missing values found")

print("\\n=== DESCRIPTIVE STATISTICS ===")
numeric_columns = df.select_dtypes(include=[np.number]).columns
print(df[numeric_columns].describe())

print("\\n=== CATEGORICAL VARIABLES ===")
categorical_columns = df.select_dtypes(include=['object', 'category']).columns
for col in categorical_columns:
    print(f"\\n{col}:")
    print(df[col].value_counts().head())

# Calculate correlations
print("\\n=== CORRELATIONS ===")
correlation_matrix = df[numeric_columns].corr()
print("Top 5 strongest correlations:")
correlation_pairs = []
for i in range(len(correlation_matrix.columns)):
    for j in range(i+1, len(correlation_matrix.columns)):
        correlation_pairs.append((
            correlation_matrix.columns[i],
            correlation_matrix.columns[j],
            correlation_matrix.iloc[i, j]
        ))

correlation_pairs.sort(key=lambda x: abs(x[2]), reverse=True)
for col1, col2, corr in correlation_pairs[:5]:
    print(f"{col1} vs {col2}: {corr:.3f}")

print("\\n✅ Exploratory data analysis completed!")
"""

        elif "visualization" in task_desc or "chart" in task_desc:
            return """
# Task: Create comprehensive visualizations
print("Creating comprehensive visualizations...")

# Load data if it exists
try:
    if os.path.exists('data/business_data.csv'):
        df = pd.read_csv('data/business_data.csv')
        df['date'] = pd.to_datetime(df['date'])
        print("✅ Loaded dataset from file")
    elif 'df' not in globals():
        print("❌ No dataset found, cannot create visualizations")
        raise Exception("No dataset available")
    else:
        print("✅ Using existing dataset")
except:
    print("❌ Error loading dataset")
    raise

# Set up the plotting style
plt.style.use('default')
sns.set_palette("husl")

# Create multiple visualizations
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('Business Data Analysis Dashboard', fontsize=16, fontweight='bold')

# 1. Sales Revenue Over Time
axes[0, 0].plot(df['date'], df['sales_revenue'], alpha=0.7, linewidth=1)
axes[0, 0].set_title('Sales Revenue Over Time', fontweight='bold')
axes[0, 0].set_xlabel('Date')
axes[0, 0].set_ylabel('Sales Revenue ($)')
axes[0, 0].tick_params(axis='x', rotation=45)

# 2. Revenue Distribution by Region
df.boxplot(column='sales_revenue', by='region', ax=axes[0, 1])
axes[0, 1].set_title('Sales Revenue Distribution by Region', fontweight='bold')
axes[0, 1].set_xlabel('Region')
axes[0, 1].set_ylabel('Sales Revenue ($)')

# 3. Customer Satisfaction vs Revenue
scatter = axes[1, 0].scatter(df['customer_satisfaction'], df['sales_revenue'],
                           alpha=0.6, c=df['customers'], cmap='viridis')
axes[1, 0].set_title('Customer Satisfaction vs Revenue', fontweight='bold')
axes[1, 0].set_xlabel('Customer Satisfaction Score')
axes[1, 0].set_ylabel('Sales Revenue ($)')
plt.colorbar(scatter, ax=axes[1, 0], label='Number of Customers')

# 4. Seasonal Revenue Pattern
seasonal_avg = df.groupby('season')['sales_revenue'].mean()
axes[1, 1].bar(seasonal_avg.index, seasonal_avg.values, color=['skyblue', 'lightgreen', 'orange', 'lightcoral'])
axes[1, 1].set_title('Average Revenue by Season', fontweight='bold')
axes[1, 1].set_xlabel('Season')
axes[1, 1].set_ylabel('Average Sales Revenue ($)')

plt.tight_layout()
plt.savefig('plots/business_dashboard.png', dpi=300, bbox_inches='tight')
plt.close()

# Create correlation heatmap
plt.figure(figsize=(12, 8))
numeric_cols = df.select_dtypes(include=[np.number]).columns
correlation_matrix = df[numeric_cols].corr()
mask = np.triu(np.ones_like(correlation_matrix, dtype=bool))
sns.heatmap(correlation_matrix, mask=mask, annot=True, cmap='coolwarm', center=0,
            square=True, fmt='.2f', cbar_kws={"shrink": .8})
plt.title('Correlation Matrix of Business Metrics', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('plots/correlation_heatmap.png', dpi=300, bbox_inches='tight')
plt.close()

print("✅ Visualizations created and saved to plots/ directory!")
print("Created files:")
print("  - plots/business_dashboard.png")
print("  - plots/correlation_heatmap.png")
"""

        elif "statistical" in task_desc and "insights" in task_desc:
            return """
# Task: Generate advanced statistical insights and trend analysis
print("Generating advanced statistical insights and trend analysis...")

# Load data if it exists
try:
    if os.path.exists('data/business_data.csv'):
        df = pd.read_csv('data/business_data.csv')
        df['date'] = pd.to_datetime(df['date'])
        print("✅ Loaded dataset from file")
    else:
        print("❌ No dataset found, cannot perform statistical analysis")
        raise Exception("No dataset available")
except Exception as e:
    print(f"❌ Error loading dataset: {e}")
    raise

from scipy import stats
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

print("\\n=== ADVANCED STATISTICAL INSIGHTS ===")

# 1. Time Series Analysis
print("\\n1. TIME SERIES TREND ANALYSIS")
df_sorted = df.sort_values('date')
df_sorted['revenue_ma_7'] = df_sorted['sales_revenue'].rolling(window=7).mean()
df_sorted['revenue_ma_30'] = df_sorted['sales_revenue'].rolling(window=30).mean()

# Calculate growth rates
df_sorted['revenue_growth_rate'] = df_sorted['sales_revenue'].pct_change()
avg_growth_rate = df_sorted['revenue_growth_rate'].mean() * 100
print(f"Average daily revenue growth rate: {avg_growth_rate:.2f}%")

# Seasonal analysis
monthly_revenue = df_sorted.groupby(df_sorted['date'].dt.month)['sales_revenue'].agg(['mean', 'std'])
best_month = monthly_revenue['mean'].idxmax()
worst_month = monthly_revenue['mean'].idxmin()
print(f"Best performing month: {best_month} (avg: ${monthly_revenue.loc[best_month, 'mean']:,.2f})")
print(f"Worst performing month: {worst_month} (avg: ${monthly_revenue.loc[worst_month, 'mean']:,.2f})")

# 2. Statistical Tests
print("\\n2. STATISTICAL HYPOTHESIS TESTS")

# Test for regional differences in revenue
regions = df['region'].unique()
regional_revenues = [df[df['region'] == region]['sales_revenue'] for region in regions]
f_stat, p_value = stats.f_oneway(*regional_revenues)
print(f"ANOVA test for regional revenue differences:")
print(f"F-statistic: {f_stat:.4f}, p-value: {p_value:.6f}")
print(f"Result: {'Significant' if p_value < 0.05 else 'Not significant'} regional differences")

# Correlation significance tests
numeric_cols = df.select_dtypes(include=[np.number]).columns
significant_correlations = []
for i, col1 in enumerate(numeric_cols):
    for col2 in numeric_cols[i+1:]:
        corr, p_val = stats.pearsonr(df[col1].dropna(), df[col2].dropna())
        if p_val < 0.05:
            significant_correlations.append((col1, col2, corr, p_val))

print(f"\\nSignificant correlations (p < 0.05):")
for col1, col2, corr, p_val in sorted(significant_correlations, key=lambda x: abs(x[2]), reverse=True)[:5]:
    print(f"{col1} vs {col2}: r={corr:.3f}, p={p_val:.6f}")

# 3. Principal Component Analysis
print("\\n3. PRINCIPAL COMPONENT ANALYSIS")
numeric_data = df[numeric_cols].dropna()
scaler = StandardScaler()
scaled_data = scaler.fit_transform(numeric_data)

pca = PCA()
pca_result = pca.fit_transform(scaled_data)
explained_variance = pca.explained_variance_ratio_

print(f"First 3 components explain {sum(explained_variance[:3]):.1%} of variance")
print("Component importance:")
for i, var in enumerate(explained_variance[:3]):
    print(f"  PC{i+1}: {var:.1%}")

# Feature importance in first component
feature_importance = abs(pca.components_[0])
important_features = sorted(zip(numeric_cols, feature_importance), key=lambda x: x[1], reverse=True)
print("\\nMost important features in PC1:")
for feature, importance in important_features[:5]:
    print(f"  {feature}: {importance:.3f}")

# 4. Outlier Analysis
print("\\n4. OUTLIER ANALYSIS")
from scipy.stats import zscore

outlier_counts = {}
for col in numeric_cols:
    z_scores = np.abs(zscore(df[col].dropna()))
    outliers = np.sum(z_scores > 3)
    outlier_counts[col] = outliers
    if outliers > 0:
        outlier_percentage = (outliers / len(df[col].dropna())) * 100
        print(f"{col}: {outliers} outliers ({outlier_percentage:.1f}%)")

# 5. Business Insights
print("\\n5. KEY BUSINESS INSIGHTS")

# Revenue efficiency metrics
df['revenue_per_employee'] = df['sales_revenue'] / df['employee_count']
df['marketing_roi'] = (df['sales_revenue'] - df['marketing_spend']) / df['marketing_spend']

avg_revenue_per_employee = df['revenue_per_employee'].mean()
avg_marketing_roi = df['marketing_roi'].mean()

print(f"Average revenue per employee: ${avg_revenue_per_employee:,.2f}")
print(f"Average marketing ROI: {avg_marketing_roi:.2f}x")

# Segment analysis
high_satisfaction = df[df['customer_satisfaction'] > 4.5]['sales_revenue'].mean()
low_satisfaction = df[df['customer_satisfaction'] <= 3.5]['sales_revenue'].mean()
satisfaction_impact = ((high_satisfaction - low_satisfaction) / low_satisfaction) * 100

print(f"High satisfaction customers generate {satisfaction_impact:.1f}% more revenue")

# Seasonal patterns
seasonal_stats = df.groupby('season').agg({
    'sales_revenue': ['mean', 'std'],
    'customer_satisfaction': 'mean',
    'marketing_spend': 'mean'
}).round(2)

print("\\nSeasonal performance summary:")
for season in seasonal_stats.index:
    revenue_mean = seasonal_stats.loc[season, ('sales_revenue', 'mean')]
    satisfaction_mean = seasonal_stats.loc[season, ('customer_satisfaction', 'mean')]
    print(f"{season}: Avg Revenue ${revenue_mean:,.2f}, Satisfaction {satisfaction_mean:.2f}")

print("\\n✅ Advanced statistical analysis completed!")
"""

        elif "summary" in task_desc and "report" in task_desc:
            return """
# Task: Create a summary report with key findings and recommendations
print("Creating comprehensive summary report...")

# Load data if it exists
try:
    if os.path.exists('data/business_data.csv'):
        df = pd.read_csv('data/business_data.csv')
        df['date'] = pd.to_datetime(df['date'])
        print("✅ Loaded dataset from file")
    else:
        print("❌ No dataset found, cannot create report")
        raise Exception("No dataset available")
except Exception as e:
    print(f"❌ Error loading dataset: {e}")
    raise

# Generate summary report
report = []
report.append("=" * 80)
report.append("BUSINESS DATA ANALYSIS - EXECUTIVE SUMMARY REPORT")
report.append("=" * 80)
report.append(f"Report Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
report.append(f"Analysis Period: {df['date'].min().strftime('%Y-%m-%d')} to {df['date'].max().strftime('%Y-%m-%d')}")
report.append(f"Total Records Analyzed: {len(df):,}")
report.append("")

# Key Performance Metrics
report.append("KEY PERFORMANCE METRICS")
report.append("-" * 40)
report.append(f"Total Revenue: ${df['sales_revenue'].sum():,.2f}")
report.append(f"Average Daily Revenue: ${df['sales_revenue'].mean():,.2f}")
report.append(f"Revenue Standard Deviation: ${df['sales_revenue'].std():,.2f}")
report.append(f"Total Customers Served: {df['customers'].sum():,}")
report.append(f"Average Customer Satisfaction: {df['customer_satisfaction'].mean():.2f}/5.0")
report.append(f"Total Marketing Investment: ${df['marketing_spend'].sum():,.2f}")
report.append("")

# Regional Performance
report.append("REGIONAL PERFORMANCE")
report.append("-" * 40)
regional_summary = df.groupby('region').agg({
    'sales_revenue': ['mean', 'sum'],
    'customers': 'sum',
    'customer_satisfaction': 'mean'
}).round(2)

for region in regional_summary.index:
    avg_revenue = regional_summary.loc[region, ('sales_revenue', 'mean')]
    total_revenue = regional_summary.loc[region, ('sales_revenue', 'sum')]
    total_customers = regional_summary.loc[region, ('customers', 'sum')]
    avg_satisfaction = regional_summary.loc[region, ('customer_satisfaction', 'mean')]

    report.append(f"{region} Region:")
    report.append(f"  • Average Daily Revenue: ${avg_revenue:,.2f}")
    report.append(f"  • Total Revenue: ${total_revenue:,.2f}")
    report.append(f"  • Total Customers: {total_customers:,}")
    report.append(f"  • Avg Satisfaction: {avg_satisfaction:.2f}/5.0")

report.append("")

# Seasonal Analysis
report.append("SEASONAL TRENDS")
report.append("-" * 40)
seasonal_summary = df.groupby('season').agg({
    'sales_revenue': 'mean',
    'customer_satisfaction': 'mean',
    'marketing_spend': 'mean'
}).round(2)

best_season = seasonal_summary['sales_revenue'].idxmax()
worst_season = seasonal_summary['sales_revenue'].idxmin()

report.append(f"Best Performing Season: {best_season}")
report.append(f"  • Average Revenue: ${seasonal_summary.loc[best_season, 'sales_revenue']:,.2f}")
report.append(f"Lowest Performing Season: {worst_season}")
report.append(f"  • Average Revenue: ${seasonal_summary.loc[worst_season, 'sales_revenue']:,.2f}")
report.append("")

# Key Correlations
report.append("KEY BUSINESS CORRELATIONS")
report.append("-" * 40)
correlations = df.corr()['sales_revenue'].abs().sort_values(ascending=False)[1:6]  # Exclude self-correlation
for var, corr in correlations.items():
    strength = "Strong" if corr > 0.7 else "Moderate" if corr > 0.4 else "Weak"
    report.append(f"• {var}: {strength} correlation ({corr:.3f})")

report.append("")

# Business Insights and Recommendations
report.append("KEY INSIGHTS & RECOMMENDATIONS")
report.append("-" * 40)

# Calculate some insights
high_satisfaction_revenue = df[df['customer_satisfaction'] > 4.0]['sales_revenue'].mean()
low_satisfaction_revenue = df[df['customer_satisfaction'] <= 3.0]['sales_revenue'].mean()
satisfaction_premium = ((high_satisfaction_revenue - low_satisfaction_revenue) / low_satisfaction_revenue) * 100

df['marketing_roi'] = (df['sales_revenue'] - df['marketing_spend']) / df['marketing_spend']
avg_roi = df['marketing_roi'].mean()

insights = [
    f"1. Customer satisfaction drives revenue - high satisfaction customers generate {satisfaction_premium:.1f}% more revenue",
    f"2. Current marketing ROI is {avg_roi:.2f}x, indicating {'strong' if avg_roi > 2 else 'moderate' if avg_roi > 1 else 'poor'} marketing efficiency",
    f"3. {best_season} season shows strongest performance - consider increasing marketing during this period",
    f"4. Revenue variability (${df['sales_revenue'].std():,.0f}) suggests opportunity for process optimization",
    f"5. {regional_summary['sales_revenue']['mean'].idxmax()} region outperforms others - analyze best practices for replication"
]

for insight in insights:
    report.append(insight)

report.append("")

# Recommendations
report.append("STRATEGIC RECOMMENDATIONS")
report.append("-" * 40)
recommendations = [
    "• Focus on customer satisfaction initiatives to drive revenue growth",
    "• Optimize marketing spend allocation based on seasonal performance patterns",
    "• Investigate regional success factors for knowledge transfer",
    "• Implement process improvements to reduce revenue variability",
    "• Develop predictive models for better demand forecasting"
]

for rec in recommendations:
    report.append(rec)

# Save and display report
report_content = "\\n".join(report)
with open('business_analysis_report.txt', 'w') as f:
    f.write(report_content)

print("\\n" + report_content)
print(f"\\n✅ Summary report generated and saved to 'business_analysis_report.txt'")
"""

        else:
            # Generic template - Fixed the df reference issue.
            return f"""
# Task: {task.description}
print("Executing task: {task.description}")

try:
    # Basic task implementation
    import os
    import pandas as pd
    import numpy as np
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    print("Libraries imported successfully")

    # Check if data file exists and load it
    if os.path.exists('data/business_data.csv'):
        df = pd.read_csv('data/business_data.csv')
        df['date'] = pd.to_datetime(df['date'])
        print(f"Working with dataset: {{df.shape}}")
    else:
        print("No dataset file found")

    print("Task completed successfully")

except Exception as e:
    print(f"Error in task execution: {e}")
"""

    def execute_code(self, code: str) -> tuple[str, str]:
        """Execute Python code and return output and error"""
        try:
            # Build cumulative code by combining all previous successful task codes.
            cumulative_code = ""

            # Add all previously executed successful task codes.
            for task in self.tasks:
                if task.status == TaskStatus.COMPLETED and task.code_to_execute:
                    cumulative_code += task.code_to_execute + "\n\n"

            # Add current code.
            full_code = cumulative_code + code

            # Create a temporary file to execute the code.
            with open('temp_code.py', 'w') as f:
                f.write(full_code)

            # Execute the code and capture output.
            result = subprocess.run(
                [sys.executable, 'temp_code.py'],
                capture_output=True,
                text=True,
                timeout=60
            )

            # Clean up
            if os.path.exists('temp_code.py'):
                os.remove('temp_code.py')

            return result.stdout, result.stderr

        except subprocess.TimeoutExpired:
            return "", "Code execution timed out"
        except Exception as e:
            return "", f"Execution error: {str(e)}"

    def execute_task(self, task: Task) -> bool:
        """Execute a single task"""
        print(f"\n--- Executing Task {task.id}: {task.description} ---")

        task.status = TaskStatus.IN_PROGRESS

        # Generate code for the task.
        task.code_to_execute = self.generate_code_for_task(task)

        print("Generated code preview (first 10 lines):")
        print("```python")
        code_lines = task.code_to_execute.split('\n')
        for i, line in enumerate(code_lines[:10]):
            print(line)
        if len(code_lines) > 10:
            print(f"... ({len(code_lines) - 10} more lines)")
        print("```")

        # Execute the code.
        output, error = self.execute_code(task.code_to_execute)

        task.result = output
        task.error = error

        if error and "warning" not in error.lower():
            print(f"❌ Task failed with error: {error}")
            task.status = TaskStatus.FAILED
            return False
        else:
            print(f"✅ Task completed successfully!")
            if output:
                print("Output preview (last 20 lines):")
                output_lines = output.split('\n')
                for line in output_lines[-20:]:
                    if line.strip():
                        print(line)
            task.status = TaskStatus.COMPLETED
            return True

    def should_continue(self) -> bool:
        """Determine if the agent should continue with more tasks"""
        if self.current_iteration >= self.max_iterations:
            print(f"\n🛑 Reached maximum iterations ({self.max_iterations})")
            return False

        pending_tasks = [t for t in self.tasks if t.status == TaskStatus.PENDING]
        if not pending_tasks:
            print("\n✅ All tasks completed!")
            return False

        failed_tasks = [t for t in self.tasks if t.status == TaskStatus.FAILED]
        if len(failed_tasks) > len(self.tasks) * 0.5:
            print("\n❌ Too many tasks failed, stopping execution")
            return False

        return True

    def run(self):
        """Main execution loop"""
        print(f"🚀 Starting {'Gemini-Powered' if self.use_gemini else 'Template-Based'} AutoGPT Agent")
        print(f"Objective: {self.objective}")
        print("=" * 60)

        # Generate initial tasks.
        task_descriptions = self.generate_tasks()
        for desc in task_descriptions:
            self.create_task(desc)

        print(f"\n📋 Generated {len(self.tasks)} tasks:")
        for task in self.tasks:
            print(f"  {task.id}. {task.description}")

        # Execute tasks.
        while self.should_continue():
            self.current_iteration += 1
            print(f"\n🔄 Iteration {self.current_iteration}")

            # Find next pending task.
            pending_tasks = [t for t in self.tasks if t.status == TaskStatus.PENDING]
            if not pending_tasks:
                break

            current_task = pending_tasks[0]
            success = self.execute_task(current_task)

            # Brief pause between tasks.
            time.sleep(1)

        # Final summary.
        self.print_summary()

    def print_summary(self):
        """Print execution summary"""
        print("\n" + "=" * 60)
        print("📊 EXECUTION SUMMARY")
        print("=" * 60)

        completed = len([t for t in self.tasks if t.status == TaskStatus.COMPLETED])
        failed = len([t for t in self.tasks if t.status == TaskStatus.FAILED])
        pending = len([t for t in self.tasks if t.status == TaskStatus.PENDING])

        print(f"Objective: {self.objective}")
        print(f"Total Tasks: {len(self.tasks)}")
        print(f"✅ Completed: {completed}")
        print(f"❌ Failed: {failed}")
        print(f"⏳ Pending: {pending}")
        print(f"🔄 Iterations: {self.current_iteration}")

        print("\nTask Details:")
        for task in self.tasks:
            status_emoji = {
                TaskStatus.COMPLETED: "✅",
                TaskStatus.FAILED: "❌",
                TaskStatus.PENDING: "⏳",
                TaskStatus.IN_PROGRESS: "🔄"
            }
            print(f"  {status_emoji[task.status]} {task.id}. {task.description}")

        # Check if plots were created.
        if os.path.exists('plots') and any(f.endswith('.png') for f in os.listdir('plots')):
            print("\n🎨 Generated Visualizations:")
            for file in os.listdir('plots'):
                if file.endswith('.png'):
                    file_size = os.path.getsize(os.path.join('plots', file)) / 1024
                    print(f"  📊 plots/{file} ({file_size:.1f} KB)")

        # Check for any other generated files.
        generated_files = []
        for root, dirs, files in os.walk('.'):
            for file in files:
                if file.endswith(('.csv', '.json', '.txt', '.html')) and not file.startswith('temp_'):
                    generated_files.append(os.path.join(root, file))

        if generated_files:
            print("\n📄 Other Generated Files:")
            for file in generated_files:
                file_size = os.path.getsize(file) / 1024
                print(f"  📄 {file} ({file_size:.1f} KB)")


# Example usage.
if __name__ == "__main__":
    # Example objectives.
    objectives = [
        "Perform comprehensive data analysis on a sample dataset with multiple visualizations and statistical insights",
        "Create a machine learning pipeline for classification or regression with model evaluation",
        "Build a data processing system that can handle multiple file formats and generate reports",
        "Develop a time series analysis with forecasting and trend visualization",
        "Create an automated data quality assessment tool with detailed reporting"
    ]

    print("🤖 AutoGPT Agent (Gemini-Enhanced)")
    print("Available example objectives:")
    for i, obj in enumerate(objectives, 1):
        print(f"{i}. {obj}")

    # Use the first objective as default.
    selected_objective = objectives[0]
    print(f"\nRunning with objective: {selected_objective}")
    print("\n" + "="*60)

    # Optional: Set your Gemini API key here.
    # GEMINI_API_KEY = "YOUR_KEY_HERE"  # Uncomment and add your key.
    GEMINI_API_KEY = None  # Will use template-based approach.

    # Create and run the agent
    agent = GeminiAutoGPT(selected_objective, GEMINI_API_KEY)
    agent.run()