In [1]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

class RenewableEnergyAnalyzer:
    def __init__(self, data_path='data/raw/renewable_energy_data.csv'):
        self.df = pd.read_csv(data_path)
        
    def analyze_by_categories(self):
        """Analyze renewable energy by different categories"""
        # Define categories for analysis
        categories = {
            'Major Regions': [
                'North America',
                'East Asia & Pacific',
                'Europe & Central Asia',
                'Latin America & Caribbean',
                'South Asia',
                'Sub-Saharan Africa',
                'Middle East & North Africa'
            ],
            'Economic Groups': [
                'High income',
                'Upper middle income',
                'Lower middle income',
                'Low income'
            ],
            'Notable Individual Countries': [
                'Brazil',  # High renewable percentage
                'Norway', 
                'Albania', # 100% renewable
                'China',
                'Germany',
                'France'
            ]
        }
        
        return self.create_comparison_dashboard(categories)
    
    def create_comparison_dashboard(self, categories):
        """Create comparative visualizations"""
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=(
                'Regional Comparison',
                'Economic Groups Comparison',
                'Notable Countries',
                'Trends Over Time'
            )
        )
        
        # 1. Regional Comparison (Latest Year)
        self.add_bar_chart(
            fig, categories['Major Regions'],
            row=1, col=1, title='Regional Renewable Energy Output'
        )
        
        # 2. Economic Groups
        self.add_bar_chart(
            fig, categories['Economic Groups'],
            row=1, col=2, title='Economic Groups Comparison'
        )
        
        # 3. Notable Countries
        self.add_bar_chart(
            fig, categories['Notable Individual Countries'],
            row=2, col=1, title='Notable Countries'
        )
        
        # 4. Time Series for Selected Entities
        self.add_time_series(
            fig, 
            ['World', 'High income', 'Low income'],
            row=2, col=2, 
            title='Global Trends'
        )
        
        # Update layout
        fig.update_layout(
            height=1000,
            showlegend=True,
            title_text="Global Renewable Energy Analysis",
        )
        
        return fig
    
    def add_bar_chart(self, fig, entities, row, col, title):
        """Add bar chart to dashboard"""
        latest_year = self.df['year'].max()
        data = []
        
        for entity in entities:
            entity_data = self.df[
                (self.df['country'] == entity) & 
                (self.df['year'] == latest_year)
            ]
            if len(entity_data) > 0:
                data.append({
                    'entity': entity,
                    'value': entity_data['EG.ELC.RNEW.ZS'].iloc[0]
                })
        
        if data:
            df_plot = pd.DataFrame(data)
            fig.add_trace(
                go.Bar(
                    x=df_plot['entity'],
                    y=df_plot['value'],
                    name=title
                ),
                row=row, col=col
            )
    
    def add_time_series(self, fig, entities, row, col, title):
        """Add time series to dashboard"""
        for entity in entities:
            entity_data = self.df[self.df['country'] == entity]
            if len(entity_data) > 0:
                fig.add_trace(
                    go.Scatter(
                        x=entity_data['year'],
                        y=entity_data['EG.ELC.RNEW.ZS'],
                        name=entity,
                        mode='lines+markers'
                    ),
                    row=row, col=col
                )
    
    def generate_insights(self):
        """Generate key insights from the data"""
        latest_year = self.df['year'].max()
        world_data = self.df[self.df['country'] == 'World']
        
        print("Key Insights:")
        print("=============")
        
        # Global Status
        latest_world = world_data[world_data['year'] == latest_year]['EG.ELC.RNEW.ZS'].iloc[0]
        print(f"\nGlobal Status ({latest_year}):")
        print(f"World renewable electricity output: {latest_world:.1f}%")
        
        # Top Performers (excluding regions/groups)
        individual_countries = self.df[
            ~self.df['country'].str.contains('|'.join([
                'World', 'income', 'region', 'IDA', 'IBRD', 'Arab', 'Asia', 'Europe',
                'Africa', 'America', 'Caribbean', 'Pacific', 'small states'
            ]))
        ]
        
        top_countries = individual_countries[
            individual_countries['year'] == latest_year
        ].nlargest(5, 'EG.ELC.RNEW.ZS')
        
        print("\nTop 5 Countries (Latest Year):")
        for _, row in top_countries.iterrows():
            print(f"{row['country']:<20} {row['EG.ELC.RNEW.ZS']:.1f}%")
        
        # Progress Analysis
        world_progress = world_data.sort_values('year')
        earliest_value = world_progress.iloc[0]['EG.ELC.RNEW.ZS']
        latest_value = world_progress.iloc[-1]['EG.ELC.RNEW.ZS']
        
        print(f"\nGlobal Progress ({world_progress.iloc[0]['year']} - {latest_year}):")
        print(f"Change: {latest_value - earliest_value:.1f} percentage points")
        print(f"Starting: {earliest_value:.1f}%")
        print(f"Current: {latest_value:.1f}%")

# Usage
if __name__ == "__main__":
    analyzer = RenewableEnergyAnalyzer()
    
    # Generate insights
    analyzer.generate_insights()
    
    # Create visualization dashboard
    fig = analyzer.analyze_by_categories()
    
    # Save the dashboard
    fig.write_html("outputs/renewable_energy_dashboard.html")
    print("\nDashboard has been saved to 'outputs/renewable_energy_dashboard.html'")

Key Insights:

Global Status (2022):
World renewable electricity output: 22.9%

Top 5 Countries (Latest Year):
Albania              100.0%
Bhutan               100.0%
Andorra              86.1%
Afghanistan          86.1%
Austria              76.5%

Global Progress (2010 - 2022):
Change: 3.3 percentage points
Starting: 19.6%
Current: 22.9%

Dashboard has been saved to 'outputs/renewable_energy_dashboard.html'


In [2]:
import pandas as pd
import numpy as np
from typing import Dict, List
import plotly.graph_objects as go

class RenewableInsightsGenerator:
    def __init__(self, data_path='data/raw/renewable_energy_data.csv'):
        self.df = pd.read_csv(data_path)
        self.latest_year = self.df['year'].max()
        self.earliest_year = self.df['year'].min()
        
    def generate_comprehensive_insights(self):
        """Generate detailed insights from multiple perspectives"""
        print("===============================================")
        print("COMPREHENSIVE RENEWABLE ENERGY ANALYSIS INSIGHTS")
        print("===============================================")
        
        # 1. Global Overview
        self._analyze_global_status()
        
        # 2. Regional Analysis
        self._analyze_regions()
        
        # 3. Economic Group Analysis
        self._analyze_economic_groups()
        
        # 4. Top Performers
        self._analyze_top_performers()
        
        # 5. Progress Analysis
        self._analyze_progress()
        
        # 6. Investment and Development
        self._analyze_development_impact()
    
    def _analyze_global_status(self):
        """Analyze global renewable energy status"""
        print("\n1. GLOBAL STATUS")
        print("---------------")
        
        world_data = self.df[self.df['country'] == 'World'].sort_values('year')
        latest = world_data[world_data['year'] == self.latest_year].iloc[0]
        
        print(f"\nCurrent Global Status ({self.latest_year}):")
        print(f"• Renewable electricity output: {latest['EG.ELC.RNEW.ZS']:.1f}%")
        print(f"• Renewable energy consumption: {latest['EG.FEC.RNEW.ZS']:.1f}%")
        
        # Calculate growth
        first_year = world_data.iloc[0]
        growth = latest['EG.ELC.RNEW.ZS'] - first_year['EG.ELC.RNEW.ZS']
        print(f"\nGrowth since {self.earliest_year}: {growth:.1f} percentage points")
        
    def _analyze_regions(self):
        """Analyze regional differences"""
        print("\n2. REGIONAL ANALYSIS")
        print("-------------------")
        
        regions = [
            'East Asia & Pacific',
            'Europe & Central Asia',
            'Latin America & Caribbean',
            'Middle East & North Africa',
            'North America',
            'South Asia',
            'Sub-Saharan Africa'
        ]
        
        latest_data = self.df[self.df['year'] == self.latest_year]
        
        print(f"\nRegional Comparison ({self.latest_year}):")
        for region in regions:
            region_data = latest_data[latest_data['country'] == region]
            if not region_data.empty:
                value = region_data['EG.ELC.RNEW.ZS'].iloc[0]
                print(f"• {region:<30} {value:.1f}%")
        
        # Find highest and lowest
        region_data = latest_data[latest_data['country'].isin(regions)]
        if not region_data.empty:
            highest = region_data.nlargest(1, 'EG.ELC.RNEW.ZS').iloc[0]
            lowest = region_data.nsmallest(1, 'EG.ELC.RNEW.ZS').iloc[0]
            
            print(f"\nHighest: {highest['country']} ({highest['EG.ELC.RNEW.ZS']:.1f}%)")
            print(f"Lowest: {lowest['country']} ({lowest['EG.ELC.RNEW.ZS']:.1f}%)")
    
    def _analyze_economic_groups(self):
        """Analyze by economic groups"""
        print("\n3. ECONOMIC GROUP ANALYSIS")
        print("-------------------------")
        
        income_groups = [
            'High income',
            'Upper middle income',
            'Lower middle income',
            'Low income'
        ]
        
        latest_data = self.df[self.df['year'] == self.latest_year]
        
        print(f"\nIncome Group Comparison ({self.latest_year}):")
        for group in income_groups:
            group_data = latest_data[latest_data['country'] == group]
            if not group_data.empty:
                value = group_data['EG.ELC.RNEW.ZS'].iloc[0]
                print(f"• {group:<20} {value:.1f}%")
        
        # Analyze gaps
        group_data = latest_data[latest_data['country'].isin(income_groups)]
        if len(group_data) >= 2:
            highest = group_data.nlargest(1, 'EG.ELC.RNEW.ZS').iloc[0]
            lowest = group_data.nsmallest(1, 'EG.ELC.RNEW.ZS').iloc[0]
            gap = highest['EG.ELC.RNEW.ZS'] - lowest['EG.ELC.RNEW.ZS']
            
            print(f"\nIncome Gap Analysis:")
            print(f"Gap between highest and lowest: {gap:.1f} percentage points")
    
    def _analyze_top_performers(self):
        """Analyze top performing countries"""
        print("\n4. TOP PERFORMERS ANALYSIS")
        print("-------------------------")
        
        # Filter out regions and groups
        exclude_terms = [
            'World', 'income', 'region', 'IDA', 'IBRD', 'Arab', 
            'Asia', 'Europe', 'Africa', 'America', 'Caribbean', 
            'Pacific', 'small states'
        ]
        
        exclude_pattern = '|'.join(exclude_terms)
        individual_countries = self.df[
            ~self.df['country'].str.contains(exclude_pattern, case=False)
        ]
        
        latest_data = individual_countries[
            individual_countries['year'] == self.latest_year
        ]
        
        # Top 10 countries
        top_10 = latest_data.nlargest(10, 'EG.ELC.RNEW.ZS')
        print("\nTop 10 Countries by Renewable Electricity Output:")
        for _, row in top_10.iterrows():
            print(f"• {row['country']:<30} {row['EG.ELC.RNEW.ZS']:.1f}%")
        
        # Most improved
        if self.earliest_year < self.latest_year:
            early_data = individual_countries[
                individual_countries['year'] == self.earliest_year
            ]
            merged_data = pd.merge(
                early_data[['country', 'EG.ELC.RNEW.ZS']], 
                latest_data[['country', 'EG.ELC.RNEW.ZS']], 
                on='country', 
                suffixes=('_early', '_late')
            )
            merged_data['improvement'] = (
                merged_data['EG.ELC.RNEW.ZS_late'] - 
                merged_data['EG.ELC.RNEW.ZS_early']
            )
            
            print(f"\nMost Improved ({self.earliest_year}-{self.latest_year}):")
            top_improved = merged_data.nlargest(5, 'improvement')
            for _, row in top_improved.iterrows():
                print(
                    f"• {row['country']:<30} "
                    f"+{row['improvement']:.1f} percentage points "
                    f"({row['EG.ELC.RNEW.ZS_early']:.1f}% → "
                    f"{row['EG.ELC.RNEW.ZS_late']:.1f}%)"
                )
    
    def _analyze_progress(self):
        """Analyze progress over time"""
        print("\n5. PROGRESS ANALYSIS")
        print("-------------------")
        
        years = sorted(self.df['year'].unique())
        world_data = self.df[self.df['country'] == 'World']
        
        # Calculate compound annual growth rate (CAGR)
        start_value = world_data[world_data['year'] == years[0]]['EG.ELC.RNEW.ZS'].iloc[0]
        end_value = world_data[world_data['year'] == years[-1]]['EG.ELC.RNEW.ZS'].iloc[0]
        years_diff = years[-1] - years[0]
        cagr = ((end_value/start_value) ** (1/years_diff) - 1) * 100
        
        print(f"\nGlobal Progress {years[0]}-{years[-1]}:")
        print(f"• Starting point: {start_value:.1f}%")
        print(f"• Current level: {end_value:.1f}%")
        print(f"• Total increase: {end_value - start_value:.1f} percentage points")
        print(f"• Average annual growth rate: {cagr:.1f}%")
    
    def _analyze_development_impact(self):
        """Analyze development and investment impact"""
        print("\n6. DEVELOPMENT IMPACT")
        print("---------------------")
        
        # Get data for different development categories
        categories = [
            'High income',
            'Upper middle income',
            'Lower middle income',
            'Low income'
        ]
        
        latest_data = self.df[self.df['year'] == self.latest_year]
        
        print(f"\nRenewable Energy by Development Status ({self.latest_year}):")
        for category in categories:
            cat_data = latest_data[latest_data['country'] == category]
            if not cat_data.empty:
                renewable = cat_data['EG.ELC.RNEW.ZS'].iloc[0]
                gdp_growth = cat_data['NY.GDP.MKTP.KD.ZG'].iloc[0]
                print(f"\n{category}:")
                print(f"• Renewable Output: {renewable:.1f}%")
                print(f"• GDP Growth: {gdp_growth:.1f}%")

# Run the analysis
if __name__ == "__main__":
    insights = RenewableInsightsGenerator()
    insights.generate_comprehensive_insights()

COMPREHENSIVE RENEWABLE ENERGY ANALYSIS INSIGHTS

1. GLOBAL STATUS
---------------

Current Global Status (2022):
• Renewable electricity output: 22.9%
• Renewable energy consumption: 19.8%

Growth since 2010: 3.3 percentage points

2. REGIONAL ANALYSIS
-------------------

Regional Comparison (2022):
• East Asia & Pacific            20.4%
• Europe & Central Asia          28.0%
• Latin America & Caribbean      51.7%
• Middle East & North Africa     2.7%
• North America                  19.9%
• South Asia                     16.9%
• Sub-Saharan Africa             26.6%

Highest: Latin America & Caribbean (51.7%)
Lowest: Middle East & North Africa (2.7%)

3. ECONOMIC GROUP ANALYSIS
-------------------------

Income Group Comparison (2022):
• High income          21.3%
• Upper middle income  25.5%
• Lower middle income  17.2%
• Low income           65.8%

Income Gap Analysis:
Gap between highest and lowest: 48.5 percentage points

4. TOP PERFORMERS ANALYSIS
-------------------------

Top 

In [5]:
import os
from openai import OpenAI
from dotenv import load_dotenv

class RenewableEnergyAssistant:
    def __init__(self, data_path='data/raw/renewable_energy_data.csv'):
        # Load environment variables
        load_dotenv()
        
        # Initialize OpenAI client
        self.client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
        
        # Load data
        self.df = pd.read_csv(data_path)
    
    def generate_insight(self, country):
        """Generate insights for a specific country"""
        try:
            country_data = self.df[self.df['country'] == country].sort_values('year')
            
            if len(country_data) == 0:
                return f"No data found for {country}"
            
            latest_data = country_data.iloc[-1]
            
            prompt = f"Analyze renewable energy data for {country}: Current output is {latest_data['EG.ELC.RNEW.ZS']:.1f}%"
            
            completion = self.client.chat.completions.create(
                model="gpt-4o",
                messages=[
                    {"role": "user", "content": prompt}
                ]
            )
            
            return completion.choices[0].message.content
            
        except Exception as e:
            return f"Error: {str(e)}"

# Example usage
if __name__ == "__main__":
    assistant = RenewableEnergyAssistant()
    result = assistant.generate_insight("World")
    print(result)

To analyze the current renewable energy output of 22.9% for the world, it's essential to consider several aspects, such as historical trends, regional differences, types of renewable energy sources, and factors influencing these figures.

### Historical Trends
- **Growth Over Time**: Over the past few decades, there has been a significant increase in the adoption and utilization of renewable energy sources. This growth reflects advancements in technology, policy changes, and increased awareness of environmental issues.
- **Historical Benchmarks**: Compare the current figure of 22.9% with previous years to understand the growth rate. For instance, if the share was 18% five years ago, this indicates a steady rise.

### Regional Differences
- **Leading Regions**: Certain regions or countries, like the European Union, China, and some parts of the United States, have been leading the way in renewable energy adoption.
- **Developing Regions**: In many developing countries, although the penet