In [None]:
from google.colab import files
uploaded = files.upload()

Saving Book2.csv to Book2.csv


In [None]:
import pandas as pd
df = pd.read_csv('Book2.csv')

In [None]:
!pip install pandas scikit-learn numpy



In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

class EnhancedCleanEnergyRecommender:
    def __init__(self, dataset_path):
        self.df = pd.read_csv(dataset_path)
        self.scaler = StandardScaler()
        self.model = RandomForestClassifier(n_estimators=200, random_state=42)
        self.prepare_data()
        self.train_model()

    def train_model(self):
        """Train the RandomForest model on the prepared data."""
        X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42)
        self.model.fit(X_train, y_train)
        print(f"Model trained. Accuracy on test set: {self.model.score(X_test, y_test):.2f}")

    def prepare_data(self):
        """Prepare the dataset for training"""
        # Calculate derived features
        self.df['cost_per_kwh'] = (
            self.df['solar_cost_per_watt_inr'] * 1000 +
            self.df['solar_installation_cost_inr_kw']
        ) / (self.df['solar_irradiation_kwh_m2_year'] * 25)  # 25 years lifespan

        self.df['wind_potential'] = self.df['wind_speed_ms']**3  # Wind power is cubic of speed

        # Select and create features for training
        self.features = [
            'monthly_electricity_usage_kwh',
            'solar_irradiation_kwh_m2_year',
            'wind_speed_ms',
            'temperature_avg_c',
            'rainfall_mm',
            'cost_per_kwh',
            'wind_potential'
        ]

        # Create target variable based on existing recommendations and conditions
        conditions = [
            (self.df['wind_speed_ms'] >= 6.5) & (self.df['grid_stability'] == 'high'),
            (self.df['solar_irradiation_kwh_m2_year'] >= 1800) & (self.df['rainfall_mm'] < 400),
            (self.df['temperature_avg_c'] >= 25) & (self.df['grid_stability'] == 'low')
        ]
        choices = ['Wind', 'Solar PV', 'Biogas']
        self.df['recommended_energy'] = np.select(conditions, choices, default='Solar PV')

        # Scale features
        self.X = self.scaler.fit_transform(self.df[self.features])
        self.y = self.df['recommended_energy']

    def calculate_suitability_scores(self, location_data):
        """Calculate weighted suitability scores using advanced metrics"""
        scores = {}

        # Solar PV suitability
        solar_score = (
            self._normalize(location_data['solar_irradiation_kwh_m2_year'], 1500, 2100) * 0.4 +  # Solar potential
            (1 - self._normalize(location_data['rainfall_mm'], 0, 600)) * 0.3 +  # Less rain is better
            self._normalize(location_data['temperature_avg_c'], 20, 35) * 0.2 +  # Temperature factor
            (1 if location_data['grid_stability'] == 'high' else
             0.7 if location_data['grid_stability'] == 'medium' else 0.4) * 0.1  # Grid stability
        ) * 100

        # Wind suitability
        wind_score = (
            self._normalize(location_data['wind_speed_ms']**3, 64, 343) * 0.5 +  # Cubic wind power
            (1 - self._normalize(location_data['rainfall_mm'], 0, 600)) * 0.2 +  # Less rain is better
            (1 if location_data['grid_stability'] == 'high' else
             0.7 if location_data['grid_stability'] == 'medium' else 0.4) * 0.3  # Grid stability
        ) * 100

        # Biogas suitability
        biogas_score = (
            self._normalize(location_data['temperature_avg_c'], 15, 35) * 0.4 +  # Temperature factor
            (0.4 if location_data['grid_stability'] == 'high' else
             0.7 if location_data['grid_stability'] == 'medium' else 1) * 0.4 +  # Lower grid stability preferred
            self._normalize(location_data['rainfall_mm'], 0, 600) * 0.2  # More rain is better
        ) * 100

        return {
            'Solar PV': solar_score,
            'Wind': wind_score,
            'Biogas': biogas_score
        }

    def _normalize(self, value, min_val, max_val):
        """Normalize values between 0 and 1"""
        return np.clip((value - min_val) / (max_val - min_val), 0, 1)

    def calculate_financial_metrics(self, energy_type, user_data, location_data):
        """Calculate detailed financial metrics including ROI and breakeven"""
        inflation_rate = 0.06  # 6% average inflation
        electricity_price_increase = 0.08  # 8% average annual increase

        # Initial setup costs
        if energy_type == 'Solar PV':
            system_size = self._calculate_solar_system_size(
                user_data['monthly_electricity_usage_kwh'],
                location_data['solar_irradiation_kwh_m2_year']
            )
            initial_cost = self._calculate_solar_costs(system_size, location_data)
            maintenance_percent = location_data['solar_maintenance_cost_percent']
            lifespan = location_data['solar_lifespan_years']

        elif energy_type == 'Wind':
            system_size = self._calculate_wind_system_size(
                user_data['monthly_electricity_usage_kwh'],
                location_data['wind_speed_ms']
            )
            initial_cost = self._calculate_wind_costs(system_size, location_data)
            maintenance_percent = location_data['wind_turbine_maintenance_cost_percent']
            lifespan = location_data['wind_turbine_lifespan_years']

        else:  # Biogas
            initial_cost = (
                location_data['biogas_plant_cost_inr'] +
                location_data['biogas_installation_cost_inr']
            )
            maintenance_percent = location_data['biogas_maintenance_cost_percent']
            lifespan = location_data['biogas_lifespan_years']

        # Calculate annual savings and costs
        annual_maintenance = initial_cost * (maintenance_percent / 100)

        # Ensure 'current_annual_bill' is a float
        current_annual_bill = float(
            user_data['monthly_electricity_usage_kwh'] * 12 *
            location_data['electricity_price_per_kwh_inr']
        )

        # Cap the lifespan for sanity check (e.g., maximum 40 years)
        max_lifespan = 40
        lifespan = min(lifespan, max_lifespan)

        # Calculate NPV of savings over lifespan
        npv_savings = 0
        annual_bill = current_annual_bill

        for year in range(int(lifespan)):
            # Calculate future bills with price increase
            annual_bill *= (1 + electricity_price_increase)

            # Increase annual maintenance costs with inflation
            annual_maintenance *= (1 + inflation_rate)

            # Calculate yearly savings
            yearly_saving = annual_bill - annual_maintenance

            # Avoid extremely large yearly savings by clamping the value
            yearly_saving = min(yearly_saving, 1e9)  # Cap at a reasonable value

            # Calculate NPV of savings
            npv_savings += yearly_saving / ((1 + inflation_rate) ** year)

        # Calculate metrics
        roi = ((npv_savings - initial_cost) / initial_cost) * 100
        simple_payback_months = (initial_cost / (current_annual_bill - annual_maintenance)) * 12

        return {
            'initial_investment': initial_cost,
            'annual_maintenance': annual_maintenance,
            'annual_savings_first_year': current_annual_bill - annual_maintenance,
            'npv_savings': npv_savings,
            'roi_percent': roi,
            'payback_months': simple_payback_months,
            'lifespan_years': lifespan
        }


    def _calculate_solar_system_size(self, monthly_usage, solar_irradiation):
        """Calculate required solar system size in kW"""
        annual_usage = monthly_usage * 12
        system_efficiency = 0.75
        return annual_usage / (solar_irradiation * system_efficiency)

    def _calculate_wind_system_size(self, monthly_usage, wind_speed):
        """Calculate required wind turbine size in kW"""
        annual_usage = monthly_usage * 12
        capacity_factor = 0.35
        return annual_usage / (wind_speed**3 * 8760 * capacity_factor)

    def _calculate_solar_costs(self, system_size, location_data):
        """Calculate total solar installation costs"""
        return (
            system_size * location_data['solar_cost_per_watt_inr'] * 1000 +
            system_size * location_data['solar_installation_cost_inr_kw']
        )

    def _calculate_wind_costs(self, system_size, location_data):
        """Calculate total wind installation costs"""
        return (
            system_size * location_data['wind_turbine_cost_inr_kw'] +
            system_size * location_data['wind_turbine_installation_cost_inr_kw']
        )

    def get_recommendation(self, user_input):
        """Generate comprehensive recommendation based on all factors"""
        location_data = self.df[self.df['State'] == user_input['state']].iloc[0]

        # Calculate suitability scores
        suitability_scores = self.calculate_suitability_scores(location_data)

        # Calculate financial metrics for each option
        financial_metrics = {}
        for energy_type in ['Solar PV', 'Wind', 'Biogas']:
            financial_metrics[energy_type] = self.calculate_financial_metrics(
                energy_type, user_input, location_data
            )

        # Calculate weighted scores considering multiple factors
        final_scores = {}
        for energy_type in suitability_scores:
            if financial_metrics[energy_type]['initial_investment'] <= user_input['budget']:
                final_scores[energy_type] = (
                    suitability_scores[energy_type] * 0.3 +  # Environmental suitability
                    financial_metrics[energy_type]['roi_percent'] * 0.4 +  # ROI
                    (100 - financial_metrics[energy_type]['payback_months'] / 120) * 0.3  # Payback period (normalized to 10 years)
                )
            else:
                final_scores[energy_type] = 0

        # Get best recommendation
        recommended_source = max(final_scores.items(), key=lambda x: x[1])[0]

        return {
            'recommended_source': recommended_source,
            'suitability_score': suitability_scores[recommended_source],
            'financial_metrics': financial_metrics[recommended_source],
            'all_scores': final_scores,
            'explanation': self._generate_recommendation_explanation(
                recommended_source,
                suitability_scores[recommended_source],
                financial_metrics[recommended_source],
                location_data
            )
        }

    def _generate_recommendation_explanation(self, energy_type, suitability_score,
                                          financial_metrics, location_data):
        """Generate detailed explanation for recommendation"""
        explanation = []

        # Environmental suitability explanation
        if energy_type == 'Solar PV':
            explanation.append(
                f"Your location receives good solar irradiation of "
                f"{location_data['solar_irradiation_kwh_m2_year']} kWh/m²/year"
            )
        elif energy_type == 'Wind':
            explanation.append(
                f"Your location has favorable wind speeds of "
                f"{location_data['wind_speed_ms']} m/s"
            )

        # Financial explanation
        explanation.append(
            f"Initial investment: ₹{financial_metrics['initial_investment']:,.2f}"
        )
        explanation.append(
            f"Expected annual savings: ₹{financial_metrics['annual_savings_first_year']:,.2f}"
        )
        explanation.append(
            f"Estimated payback period: {financial_metrics['payback_months']:.1f} months"
        )
        explanation.append(
            f"Projected ROI: {financial_metrics['roi_percent']:.1f}%"
        )

        return " | ".join(explanation)

In [None]:
recommender = EnhancedCleanEnergyRecommender('/content/Book2.csv')
user_input = {
    'state': 'Tamil Nadu',
    'budget': 100000,
    'monthly_electricity_usage_kwh': 10000
}
recommendation = recommender.get_recommendation(user_input)
print(recommendation)

Model trained. Accuracy on test set: 0.83
{'recommended_source': 'Biogas', 'suitability_score': 65.46000000000001, 'financial_metrics': {'initial_investment': 14.0, 'annual_maintenance': 8.640003067185775, 'annual_savings_first_year': inf, 'npv_savings': 15949074683.81639, 'roi_percent': 113921961927.25992, 'payback_months': 0.0, 'lifespan_years': 40.0}, 'all_scores': {'Solar PV': 0, 'Wind': 76586211.84249155, 'Biogas': 45568784820.54197}, 'explanation': 'Initial investment: ₹14.00 | Expected annual savings: ₹inf | Estimated payback period: 0.0 months | Projected ROI: 113921961927.3%'}


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import requests
from datetime import datetime

class EnhancedCleanEnergyRecommender:
    def __init__(self, csv_path):
        self.df = pd.read_csv(csv_path)
        self.scaler = StandardScaler()
        self.model = RandomForestClassifier(n_estimators=200, random_state=42)

    def fetch_remote_sensing_data(self, latitude, longitude):
        """
        Fetch real-time remote sensing data from NASA POWER API
        """
        try:
            base_url = "https://power.larc.nasa.gov/api/temporal/daily/point"
            params = {
                "parameters": "ALLSKY_SFC_SW_DWN,T2M,WS10M,PRECTOT",
                "community": "RE",
                "longitude": longitude,
                "latitude": latitude,
                "start": datetime.now().strftime("%Y%m%d"),
                "end": datetime.now().strftime("%Y%m%d"),
                "format": "JSON"
            }
            response = requests.get(base_url, params=params)
            data = response.json()

            return {
                'solar_radiation': data['properties']['parameter']['ALLSKY_SFC_SW_DWN'],
                'temperature': data['properties']['parameter']['T2M'],
                'wind_speed': data['properties']['parameter']['WS10M'],
                'precipitation': data['properties']['parameter']['PRECTOT']
            }
        except:
            return None

    def prepare_data(self):
        """Prepare dataset with enhanced features"""
        # Calculate efficiency metrics
        self.df['solar_efficiency'] = (self.df['solar_irradiation_kwh_m2_year'] * 0.15) / 365  # 15% panel efficiency
        self.df['wind_efficiency'] = 0.35 * (self.df['wind_speed_ms'] ** 3) * 8760  # Betz limit and annual hours

        # Calculate cost metrics
        self.df['solar_annual_cost'] = (
            (self.df['solar_cost_per_watt_inr'] * 1000 + self.df['solar_installation_cost_inr_kw']) *
            (self.df['solar_maintenance_cost_percent'] / 100)
        )

        self.df['wind_annual_cost'] = (
            (self.df['wind_turbine_cost_inr_kw'] + self.df['wind_turbine_installation_cost_inr_kw']) *
            (self.df['wind_turbine_maintenance_cost_percent'] / 100)
        )

        # Create target variable based on optimal conditions
        conditions = [
            (self.df['wind_speed_ms'] >= 6.0) & (self.df['grid_stability'] == 'high'),
            (self.df['solar_irradiation_kwh_m2_year'] >= 1800) & (self.df['temperature_avg_c'] >= 25),
            (self.df['rainfall_mm'] >= 100) & (self.df['temperature_avg_c'] >= 20)
        ]
        choices = ['Wind', 'Solar PV', 'Biogas']
        self.df['recommended_energy'] = np.select(conditions, choices, default='Solar PV')

        # Select features for training
        features = [
            'monthly_electricity_usage_kwh',
            'solar_irradiation_kwh_m2_year',
            'wind_speed_ms',
            'temperature_avg_c',
            'rainfall_mm',
            'solar_efficiency',
            'wind_efficiency',
            'solar_annual_cost',
            'wind_annual_cost'
        ]

        self.X = self.scaler.fit_transform(self.df[features])
        self.y = self.df['recommended_energy']

    def calculate_roi(self, energy_type, state_data, user_data):
        """Calculate ROI with enhanced metrics"""
        annual_usage = user_data['monthly_usage'] * 12
        electricity_rate = float(state_data['electricity_price_per_kwh_inr'])
        annual_bill = annual_usage * electricity_rate

        if energy_type == 'Solar PV':
            system_size = annual_usage / (state_data['solar_irradiation_kwh_m2_year'] * 0.15)
            initial_cost = system_size * (state_data['solar_cost_per_watt_inr'] * 1000 +
                                        state_data['solar_installation_cost_inr_kw'])
            maintenance = initial_cost * (state_data['solar_maintenance_cost_percent'] / 100)
            lifespan = state_data['solar_lifespan_years']

        elif energy_type == 'Wind':
            system_size = annual_usage / (0.35 * (state_data['wind_speed_ms'] ** 3) * 8760)
            initial_cost = system_size * (state_data['wind_turbine_cost_inr_kw'] +
                                        state_data['wind_turbine_installation_cost_inr_kw'])
            maintenance = initial_cost * (state_data['wind_turbine_maintenance_cost_percent'] / 100)
            lifespan = state_data['wind_turbine_lifespan_years']

        else:  # Biogas
            initial_cost = (state_data['biogas_plant_cost_inr'] +
                          state_data['biogas_installation_cost_inr'])
            maintenance = initial_cost * (state_data['biogas_maintenance_cost_percent'] / 100)
            lifespan = state_data['biogas_lifespan_years']

        # Calculate cumulative savings
        annual_saving = annual_bill - maintenance
        total_saving = annual_saving * lifespan
        roi = ((total_saving - initial_cost) / initial_cost) * 100
        break_even_months = (initial_cost / annual_saving) * 12

        return {
            'initial_investment': initial_cost,
            'annual_savings': annual_saving,
            'total_savings': total_saving,
            'roi_percentage': roi,
            'break_even_months': break_even_months,
            'lifespan_years': lifespan
        }

    def get_recommendation(self, user_input):
        """Generate comprehensive recommendation"""
        state_data = self.df[self.df['State'] == user_input['state']].iloc[0]

        # Get real-time data if coordinates provided
        remote_data = None
        if 'latitude' in user_input and 'longitude' in user_input:
            remote_data = self.fetch_remote_sensing_data(
                user_input['latitude'],
                user_input['longitude']
            )

        # Calculate ROI for each energy type
        recommendations = {}
        for energy_type in ['Solar PV', 'Wind', 'Biogas']:
            roi_metrics = self.calculate_roi(energy_type, state_data, user_input)

            if roi_metrics['initial_investment'] <= user_input['budget']:
                suitability_score = self.calculate_suitability(
                    energy_type,
                    state_data,
                    remote_data
                )

                recommendations[energy_type] = {
                    'suitability_score': suitability_score,
                    'roi_metrics': roi_metrics
                }

        # Find best recommendation
        best_option = max(recommendations.items(),
                         key=lambda x: (x[1]['suitability_score'] * 0.4 +
                                      x[1]['roi_metrics']['roi_percentage'] * 0.6))

        return {
            'recommended_energy': best_option[0],
            'investment_required': best_option[1]['roi_metrics']['initial_investment'],
            'break_even_months': best_option[1]['roi_metrics']['break_even_months'],
            'total_profit': best_option[1]['roi_metrics']['total_savings'],
            'suitability_score': best_option[1]['suitability_score'],
            'all_options': recommendations
        }

    def calculate_suitability(self, energy_type, state_data, remote_data=None):
        """Calculate suitability score with real-time data if available"""
        base_score = 0

        if energy_type == 'Solar PV':
            solar_score = (
                self.normalize(state_data['solar_irradiation_kwh_m2_year'], 1500, 2100) * 0.4 +
                self.normalize(state_data['temperature_avg_c'], 20, 35) * 0.3 +
                (1 - self.normalize(state_data['rainfall_mm'], 0, 600)) * 0.3
            )
            base_score = solar_score * 100

        elif energy_type == 'Wind':
            wind_score = (
                self.normalize(state_data['wind_speed_ms']**3, 64, 343) * 0.5 +
                (1 - self.normalize(state_data['rainfall_mm'], 0, 600)) * 0.3 +
                (1 if state_data['grid_stability'] == 'high' else
                 0.7 if state_data['grid_stability'] == 'medium' else 0.4) * 0.2
            )
            base_score = wind_score * 100

        else:  # Biogas
            biogas_score = (
                self.normalize(state_data['temperature_avg_c'], 15, 35) * 0.4 +
                self.normalize(state_data['rainfall_mm'], 0, 600) * 0.3 +
                (0.4 if state_data['grid_stability'] == 'high' else
                 0.7 if state_data['grid_stability'] == 'medium' else 1) * 0.3
            )
            base_score = biogas_score * 100

        # Adjust score with real-time data if available
        if remote_data:
            real_time_adjustment = self.calculate_realtime_adjustment(
                energy_type,
                remote_data
            )
            base_score = (base_score * 0.7) + (real_time_adjustment * 0.3)

        return base_score

    def normalize(self, value, min_val, max_val):
        """Normalize values between 0 and 1"""
        return np.clip((value - min_val) / (max_val - min_val), 0, 1)

    def calculate_realtime_adjustment(self, energy_type, remote_data):
        """Calculate adjustment factor based on real-time conditions"""
        if energy_type == 'Solar PV':
            return (
                self.normalize(remote_data['solar_radiation'], 0, 1000) * 0.5 +
                self.normalize(remote_data['temperature'], 20, 35) * 0.3 +
                (1 - self.normalize(remote_data['precipitation'], 0, 50)) * 0.2
            ) * 100

        elif energy_type == 'Wind':
            return (
                self.normalize(remote_data['wind_speed']**3, 64, 343) * 0.6 +
                (1 - self.normalize(remote_data['precipitation'], 0, 50)) * 0.4
            ) * 100

        else:  # Biogas
            return (
                self.normalize(remote_data['temperature'], 15, 35) * 0.6 +
                self.normalize(remote_data['precipitation'], 0, 50) * 0.4
            ) * 100

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import requests
from datetime import datetime

class EnhancedCleanEnergyRecommender:
    def __init__(self, csv_path):
        self.df = pd.read_csv(csv_path)
        self.scaler = StandardScaler()
        self.model = RandomForestClassifier(n_estimators=200, random_state=42)

    def fetch_remote_sensing_data(self, latitude, longitude):
        """
        Fetch real-time remote sensing data from NASA POWER API
        """
        try:
            base_url = "https://power.larc.nasa.gov/api/temporal/daily/point"
            params = {
                "parameters": "ALLSKY_SFC_SW_DWN,T2M,WS10M,PRECTOT",
                "community": "RE",
                "longitude": longitude,
                "latitude": latitude,
                "start": datetime.now().strftime("%Y%m%d"),
                "end": datetime.now().strftime("%Y%m%d"),
                "format": "JSON"
            }
            response = requests.get(base_url, params=params)
            data = response.json()

            return {
                'solar_radiation': data['properties']['parameter']['ALLSKY_SFC_SW_DWN'],
                'temperature': data['properties']['parameter']['T2M'],
                'wind_speed': data['properties']['parameter']['WS10M'],
                'precipitation': data['properties']['parameter']['PRECTOT']
            }
        except:
            return None

    def prepare_data(self):
        """Prepare dataset with enhanced features"""
        # Calculate efficiency metrics
        self.df['solar_efficiency'] = (self.df['solar_irradiation_kwh_m2_year'] * 0.15) / 365  # 15% panel efficiency
        self.df['wind_efficiency'] = 0.35 * (self.df['wind_speed_ms'] ** 3) * 8760  # Betz limit and annual hours

        # Calculate cost metrics
        self.df['solar_annual_cost'] = (
            (self.df['solar_cost_per_watt_inr'] * 1000 + self.df['solar_installation_cost_inr_kw']) *
            (self.df['solar_maintenance_cost_percent'] / 100)
        )

        self.df['wind_annual_cost'] = (
            (self.df['wind_turbine_cost_inr_kw'] + self.df['wind_turbine_installation_cost_inr_kw']) *
            (self.df['wind_turbine_maintenance_cost_percent'] / 100)
        )

        # Create target variable based on optimal conditions
        conditions = [
            (self.df['wind_speed_ms'] >= 6.0) & (self.df['grid_stability'] == 'high'),
            (self.df['solar_irradiation_kwh_m2_year'] >= 1800) & (self.df['temperature_avg_c'] >= 25),
            (self.df['rainfall_mm'] >= 100) & (self.df['temperature_avg_c'] >= 20)
        ]
        choices = ['Wind', 'Solar PV', 'Biogas']
        self.df['recommended_energy'] = np.select(conditions, choices, default='Solar PV')

        # Select features for training
        features = [
            'monthly_electricity_usage_kwh',
            'solar_irradiation_kwh_m2_year',
            'wind_speed_ms',
            'temperature_avg_c',
            'rainfall_mm',
            'solar_efficiency',
            'wind_efficiency',
            'solar_annual_cost',
            'wind_annual_cost'
        ]

        self.X = self.scaler.fit_transform(self.df[features])
        self.y = self.df['recommended_energy']

    def calculate_roi(self, energy_type, state_data, user_data):
        """Calculate ROI with enhanced metrics"""
        annual_usage = user_data['monthly_usage'] * 12
        electricity_rate = float(state_data['electricity_price_per_kwh_inr'])
        annual_bill = annual_usage * electricity_rate

        if energy_type == 'Solar PV':
            system_size = annual_usage / (state_data['solar_irradiation_kwh_m2_year'] * 0.15)
            initial_cost = system_size * (state_data['solar_cost_per_watt_inr'] * 1000 +
                                        state_data['solar_installation_cost_inr_kw'])
            maintenance = initial_cost * (state_data['solar_maintenance_cost_percent'] / 100)
            lifespan = state_data['solar_lifespan_years']

        elif energy_type == 'Wind':
            system_size = annual_usage / (0.35 * (state_data['wind_speed_ms'] ** 3) * 8760)
            initial_cost = system_size * (state_data['wind_turbine_cost_inr_kw'] +
                                        state_data['wind_turbine_installation_cost_inr_kw'])
            maintenance = initial_cost * (state_data['wind_turbine_maintenance_cost_percent'] / 100)
            lifespan = state_data['wind_turbine_lifespan_years']

        else:  # Biogas
            initial_cost = (state_data['biogas_plant_cost_inr'] +
                          state_data['biogas_installation_cost_inr'])
            maintenance = initial_cost * (state_data['biogas_maintenance_cost_percent'] / 100)
            lifespan = state_data['biogas_lifespan_years']

        # Calculate cumulative savings
        annual_saving = annual_bill - maintenance
        total_saving = annual_saving * lifespan
        roi = ((total_saving - initial_cost) / initial_cost) * 100
        break_even_months = (initial_cost / annual_saving) * 12

        return {
            'initial_investment': initial_cost,
            'annual_savings': annual_saving,
            'total_savings': total_saving,
            'roi_percentage': roi,
            'break_even_months': break_even_months,
            'lifespan_years': lifespan
        }

    def get_recommendation(self, user_input):
        """Generate comprehensive recommendation"""
        state_data = self.df[self.df['State'] == user_input['state']].iloc[0]

        # Get real-time data if coordinates provided
        remote_data = None
        if 'latitude' in user_input and 'longitude' in user_input:
            remote_data = self.fetch_remote_sensing_data(
                user_input['latitude'],
                user_input['longitude']
            )

        # Calculate ROI for each energy type
        recommendations = {}
        for energy_type in ['Solar PV', 'Wind', 'Biogas']:
            roi_metrics = self.calculate_roi(energy_type, state_data, user_input)

            if roi_metrics['initial_investment'] <= user_input['budget']:
                suitability_score = self.calculate_suitability(
                    energy_type,
                    state_data,
                    remote_data
                )

                recommendations[energy_type] = {
                    'suitability_score': suitability_score,
                    'roi_metrics': roi_metrics
                }

        # Find best recommendation
        best_option = max(recommendations.items(),
                         key=lambda x: (x[1]['suitability_score'] * 0.4 +
                                      x[1]['roi_metrics']['roi_percentage'] * 0.6))

        return {
            'recommended_energy': best_option[0],
            'investment_required': best_option[1]['roi_metrics']['initial_investment'],
            'break_even_months': best_option[1]['roi_metrics']['break_even_months'],
            'total_profit': best_option[1]['roi_metrics']['total_savings'],
            'suitability_score': best_option[1]['suitability_score'],
            'all_options': recommendations
        }

    def calculate_suitability(self, energy_type, state_data, remote_data=None):
        """Calculate suitability score with real-time data if available"""
        base_score = 0

        if energy_type == 'Solar PV':
            solar_score = (
                self.normalize(state_data['solar_irradiation_kwh_m2_year'], 1500, 2100) * 0.4 +
                self.normalize(state_data['temperature_avg_c'], 20, 35) * 0.3 +
                (1 - self.normalize(state_data['rainfall_mm'], 0, 600)) * 0.3
            )
            base_score = solar_score * 100

        elif energy_type == 'Wind':
            wind_score = (
                self.normalize(state_data['wind_speed_ms']**3, 64, 343) * 0.5 +
                (1 - self.normalize(state_data['rainfall_mm'], 0, 600)) * 0.3 +
                (1 if state_data['grid_stability'] == 'high' else
                 0.7 if state_data['grid_stability'] == 'medium' else 0.4) * 0.2
            )
            base_score = wind_score * 100

        else:  # Biogas
            biogas_score = (
                self.normalize(state_data['temperature_avg_c'], 15, 35) * 0.4 +
                self.normalize(state_data['rainfall_mm'], 0, 600) * 0.3 +
                (0.4 if state_data['grid_stability'] == 'high' else
                 0.7 if state_data['grid_stability'] == 'medium' else 1) * 0.3
            )
            base_score = biogas_score * 100

        # Adjust score with real-time data if available
        if remote_data:
            real_time_adjustment = self.calculate_realtime_adjustment(
                energy_type,
                remote_data
            )
            base_score = (base_score * 0.7) + (real_time_adjustment * 0.3)

        return base_score

    def normalize(self, value, min_val, max_val):
        """Normalize values between 0 and 1"""
        return np.clip((value - min_val) / (max_val - min_val), 0, 1)

    def calculate_realtime_adjustment(self, energy_type, remote_data):
        """Calculate adjustment factor based on real-time conditions"""
        if energy_type == 'Solar PV':
            return (
                self.normalize(remote_data['solar_radiation'], 0, 1000) * 0.5 +
                self.normalize(remote_data['temperature'], 20, 35) * 0.3 +
                (1 - self.normalize(remote_data['precipitation'], 0, 50)) * 0.2
            ) * 100

        elif energy_type == 'Wind':
            return (
                self.normalize(remote_data['wind_speed']**3, 64, 343) * 0.6 +
                (1 - self.normalize(remote_data['precipitation'], 0, 50)) * 0.4
            ) * 100

        else:  # Biogas
            return (
                self.normalize(remote_data['temperature'], 15, 35) * 0.6 +
                self.normalize(remote_data['precipitation'], 0, 50) * 0.4
            ) * 100

In [None]:
# Initialize recommender
recommender = EnhancedCleanEnergyRecommender('/content/Book2.csv')
recommender.prepare_data()

# Get recommendation
user_input = {
    'state': 'Kerala',
    'monthly_usage': 1000,  # kWh
    'budget': 10000,      # INR
    'latitude': 19.076,     # Optional
    'longitude': 72.877     # Optional
}

recommendation = recommender.get_recommendation(user_input)
print(recommendation)

{'recommended_energy': 'Wind', 'investment_required': 2317.2133072407046, 'break_even_months': nan, 'total_profit': nan, 'suitability_score': 30.596899641577057, 'all_options': {'Wind': {'suitability_score': 30.596899641577057, 'roi_metrics': {'initial_investment': 2317.2133072407046, 'annual_savings': nan, 'total_savings': nan, 'roi_percentage': nan, 'break_even_months': nan, 'lifespan_years': 20}}}}


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import requests
from datetime import datetime

class EnhancedCleanEnergyRecommender:
    def __init__(self, csv_path):
        self.df = pd.read_csv(csv_path)
        self.scaler = StandardScaler()
        self.model = RandomForestClassifier(n_estimators=200, random_state=42)

    def fetch_remote_sensing_data(self, latitude, longitude):
        """
        Fetch real-time remote sensing data from NASA POWER API
        """
        try:
            base_url = "https://power.larc.nasa.gov/api/temporal/daily/point"
            params = {
                "parameters": "ALLSKY_SFC_SW_DWN,T2M,WS10M,PRECTOT",
                "community": "RE",
                "longitude": longitude,
                "latitude": latitude,
                "start": datetime.now().strftime("%Y%m%d"),
                "end": datetime.now().strftime("%Y%m%d"),
                "format": "JSON"
            }
            response = requests.get(base_url, params=params)
            data = response.json()

            return {
                'solar_radiation': data['properties']['parameter']['ALLSKY_SFC_SW_DWN'],
                'temperature': data['properties']['parameter']['T2M'],
                'wind_speed': data['properties']['parameter']['WS10M'],
                'precipitation': data['properties']['parameter']['PRECTOT']
            }
        except:
            return None

    def prepare_data(self):
        """Prepare dataset with enhanced features"""
        # Calculate efficiency metrics with adjusted factors
        self.df['solar_efficiency'] = (self.df['solar_irradiation_kwh_m2_year'] * 0.20) / 365  # Increased panel efficiency
        self.df['wind_efficiency'] = 0.30 * (self.df['wind_speed_ms'] ** 3) * 8760  # Adjusted Betz limit

        # Calculate cost metrics with balanced maintenance costs
        self.df['solar_annual_cost'] = (
            (self.df['solar_cost_per_watt_inr'] * 1000 + self.df['solar_installation_cost_inr_kw']) *
            (self.df['solar_maintenance_cost_percent'] / 100)
        )

        self.df['wind_annual_cost'] = (
            (self.df['wind_turbine_cost_inr_kw'] + self.df['wind_turbine_installation_cost_inr_kw']) *
            (self.df['wind_turbine_maintenance_cost_percent'] / 100)
        )

        # Create target variable with adjusted thresholds
        conditions = [
            (self.df['wind_speed_ms'] >= 5.5) & (self.df['grid_stability'] == 'high'),  # Lowered wind speed threshold
            (self.df['solar_irradiation_kwh_m2_year'] >= 1700) & (self.df['temperature_avg_c'] >= 25),  # Lowered solar threshold
            (self.df['rainfall_mm'] >= 80) & (self.df['temperature_avg_c'] >= 20)  # Adjusted biogas conditions
        ]
        choices = ['Wind', 'Solar PV', 'Biogas']
        self.df['recommended_energy'] = np.select(conditions, choices, default='Solar PV')

        # Select features for training with balanced weights
        features = [
            'monthly_electricity_usage_kwh',
            'solar_irradiation_kwh_m2_year',
            'wind_speed_ms',
            'temperature_avg_c',
            'rainfall_mm',
            'solar_efficiency',
            'wind_efficiency',
            'solar_annual_cost',
            'wind_annual_cost'
        ]

        self.X = self.scaler.fit_transform(self.df[features])
        self.y = self.df['recommended_energy']

    def calculate_roi(self, energy_type, state_data, user_data):
        """Calculate ROI with balanced metrics"""
        annual_usage = user_data['monthly_usage'] * 12
        electricity_rate = float(state_data['electricity_price_per_kwh_inr'])
        annual_bill = annual_usage * electricity_rate

        if energy_type == 'Solar PV':
            system_size = annual_usage / (state_data['solar_irradiation_kwh_m2_year'] * 0.20)  # Adjusted efficiency
            initial_cost = system_size * (state_data['solar_cost_per_watt_inr'] * 1000 +
                                        state_data['solar_installation_cost_inr_kw'])
            maintenance = initial_cost * (state_data['solar_maintenance_cost_percent'] / 100)
            lifespan = state_data['solar_lifespan_years']

        elif energy_type == 'Wind':
            system_size = annual_usage / (0.30 * (state_data['wind_speed_ms'] ** 3) * 8760)  # Adjusted efficiency
            initial_cost = system_size * (state_data['wind_turbine_cost_inr_kw'] +
                                        state_data['wind_turbine_installation_cost_inr_kw'])
            maintenance = initial_cost * (state_data['wind_turbine_maintenance_cost_percent'] / 100)
            lifespan = state_data['wind_turbine_lifespan_years']

        else:  # Biogas
            initial_cost = (state_data['biogas_plant_cost_inr'] +
                          state_data['biogas_installation_cost_inr'])
            maintenance = initial_cost * (state_data['biogas_maintenance_cost_percent'] / 100)
            lifespan = state_data['biogas_lifespan_years']

        # Calculate cumulative savings with adjusted factors
        annual_saving = annual_bill - maintenance
        total_saving = annual_saving * lifespan
        roi = ((total_saving - initial_cost) / initial_cost) * 100
        break_even_months = (initial_cost / annual_saving) * 12

        return {
            'initial_investment': initial_cost,
            'annual_savings': annual_saving,
            'total_savings': total_saving,
            'roi_percentage': roi,
            'break_even_months': break_even_months,
            'lifespan_years': lifespan
        }

    def calculate_suitability(self, energy_type, state_data, remote_data=None):
        """Calculate suitability score with balanced weights"""
        base_score = 0

        if energy_type == 'Solar PV':
            solar_score = (
                self.normalize(state_data['solar_irradiation_kwh_m2_year'], 1500, 2100) * 0.35 +  # Reduced weight
                self.normalize(state_data['temperature_avg_c'], 20, 35) * 0.35 +  # Increased weight
                (1 - self.normalize(state_data['rainfall_mm'], 0, 600)) * 0.30
            )
            base_score = solar_score * 100

        elif energy_type == 'Wind':
            wind_score = (
                self.normalize(state_data['wind_speed_ms']**3, 27, 343) * 0.40 +  # Reduced weight and lower threshold
                (1 - self.normalize(state_data['rainfall_mm'], 0, 600)) * 0.30 +
                (1 if state_data['grid_stability'] == 'high' else
                 0.7 if state_data['grid_stability'] == 'medium' else 0.4) * 0.30  # Increased grid stability weight
            )
            base_score = wind_score * 100

        else:  # Biogas
            biogas_score = (
                self.normalize(state_data['temperature_avg_c'], 15, 35) * 0.35 +
                self.normalize(state_data['rainfall_mm'], 0, 600) * 0.35 +
                (0.4 if state_data['grid_stability'] == 'high' else
                 0.7 if state_data['grid_stability'] == 'medium' else 1) * 0.30
            )
            base_score = biogas_score * 100

        # Adjust score with real-time data if available
        if remote_data:
            real_time_adjustment = self.calculate_realtime_adjustment(
                energy_type,
                remote_data
            )
            base_score = (base_score * 0.7) + (real_time_adjustment * 0.3)

        return base_score

    def get_recommendation(self, user_input):
        """Generate comprehensive recommendation with balanced scoring"""
        state_data = self.df[self.df['State'] == user_input['state']].iloc[0]

        # Get real-time data if coordinates provided
        remote_data = None
        if 'latitude' in user_input and 'longitude' in user_input:
            remote_data = self.fetch_remote_sensing_data(
                user_input['latitude'],
                user_input['longitude']
            )

        # Calculate scores for each energy type
        recommendations = {}
        for energy_type in ['Solar PV', 'Wind', 'Biogas']:
            roi_metrics = self.calculate_roi(energy_type, state_data, user_input)

            if roi_metrics['initial_investment'] <= user_input['budget']:
                suitability_score = self.calculate_suitability(
                    energy_type,
                    state_data,
                    remote_data
                )

                # Calculate final score with balanced weights
                final_score = (
                    suitability_score * 0.35 +  # Environmental suitability
                    roi_metrics['roi_percentage'] * 0.35 +  # Financial returns
                    (100 - min(roi_metrics['break_even_months'], 120)) * 0.30  # Payback period (max 10 years)
                )

                recommendations[energy_type] = {
                    'suitability_score': suitability_score,
                    'roi_metrics': roi_metrics,
                    'final_score': final_score
                }

        # Find best recommendation based on final score
        if recommendations:
            best_option = max(recommendations.items(), key=lambda x: x[1]['final_score'])

            return {
                'recommended_energy': best_option[0],
                'investment_required': best_option[1]['roi_metrics']['initial_investment'],
                'break_even_months': best_option[1]['roi_metrics']['break_even_months'],
                'total_profit': best_option[1]['roi_metrics']['total_savings'],
                'suitability_score': best_option[1]['suitability_score'],
                'final_score': best_option[1]['final_score'],
                'all_options': recommendations
            }
        else:
            return {
                'error': 'No suitable recommendations found within budget constraints'
            }

    def normalize(self, value, min_val, max_val):
        """Normalize values between 0 and 1"""
        return np.clip((value - min_val) / (max_val - min_val), 0, 1)

    def calculate_realtime_adjustment(self, energy_type, remote_data):
        """Calculate adjustment factor based on real-time conditions with balanced weights"""
        if energy_type == 'Solar PV':
            return (
                self.normalize(remote_data['solar_radiation'], 0, 1000) * 0.40 +
                self.normalize(remote_data['temperature'], 20, 35) * 0.35 +
                (1 - self.normalize(remote_data['precipitation'], 0, 50)) * 0.25
            ) * 100

        elif energy_type == 'Wind':
            return (
                self.normalize(remote_data['wind_speed']**3, 27, 343) * 0.50 +  # Adjusted threshold
                (1 - self.normalize(remote_data['precipitation'], 0, 50)) * 0.50
            ) * 100

        else:  # Biogas
            return (
                self.normalize(remote_data['temperature'], 15, 35) * 0.50 +
                self.normalize(remote_data['precipitation'], 0, 50) * 0.50
            ) * 100

In [None]:
recommender = EnhancedCleanEnergyRecommender('/content/Book2.csv')
recommender.prepare_data()

# Test different scenarios
test_cases = [
    {
        'state': 'Maharashtra',
        'monthly_usage': 1000,
        'budget': 1000000
    },
    {
        'state': 'Tamil Nadu',
        'monthly_usage': 2000,
        'budget': 2000000
    },
    {
        'state': 'Gujarat',
        'monthly_usage': 1500,
        'budget': 1500000
    }
]

for case in test_cases:
    result = recommender.get_recommendation(case)
    print(f"\nState: {case['state']}")
    print(f"Recommended: {result['recommended_energy']}")
    print(f"Suitability Score: {result['suitability_score']:.2f}")
    if 'all_options' in result:
        print("\nAll Options:")
        for energy_type, metrics in result['all_options'].items():
            print(f"{energy_type}: {metrics['final_score']:.2f}")


State: Maharashtra
Recommended: Wind
Suitability Score: 79.38

All Options:
Wind: 90550853.58
Biogas: 22399926.24

State: Tamil Nadu
Recommended: Wind
Suitability Score: 70.81

All Options:
Wind: 146819804.34
Biogas: 47999936.31

State: Gujarat
Recommended: Wind
Suitability Score: 95.46

All Options:
Wind: 156472556.72
Biogas: 33599927.47


In [None]:
from google.colab import files
uploaded = files.upload()

Saving Book3.csv to Book3 (4).csv


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from datetime import datetime

class EnhancedCleanEnergyRecommender:
    def __init__(self, csv_path):
        self.df = pd.read_csv(csv_path)
        self.scaler = StandardScaler()

        # Updated electricity hike rates
        self.electricity_hikes = {
            'Tamil Nadu': 42.0, 'Maharashtra': 28.0, 'Goa': 23.30,
            'West Bengal': 3.0, 'Madhya Pradesh': 4.0, 'Bihar': 4.0,
            'Odisha': 10.0, 'Punjab': 11.10, 'Haryana': 20.40,
            'Rajasthan': 20.70, 'Kerala': 19.0, 'Andhra Pradesh': 16.0,
            'Karnataka': 13.30, 'Chhattisgarh': 18.40, 'Uttar Pradesh': 17.0
        }
        self.default_hike = 15.0  # Average hike for states without specific data

        # Clean and preprocess data
        self._preprocess_data()

    def _preprocess_data(self):
        """Clean and preprocess the dataset"""
        # Convert string numbers to float
        for col in self.df.columns:
            if self.df[col].dtype == 'object':
                self.df[col] = self.df[col].replace({',': ''}, regex=True)
                try:
                    self.df[col] = pd.to_numeric(self.df[col])
                except:
                    pass  # Keep as string if conversion fails

    def calculate_suitability(self, energy_type, state_data, remote_data=None):
        """Calculate suitability score with adjusted parameters for updated dataset"""
        if energy_type == 'Solar PV':
            # Updated solar scoring with new thresholds
            irradiation_score = self.normalize(state_data['solar_irradiation_kwh_m2_year'], 1500, 2100) * 0.45
            temp_score = self.normalize(state_data['temperature_avg_c'], 15, 35) * 0.35
            rainfall_penalty = max(0, 1 - self.normalize(state_data['rainfall_mm'], 0, 400)) * 0.20

            base_score = (irradiation_score + temp_score + rainfall_penalty) * 100

        elif energy_type == 'Wind':
            # Adjusted wind speed thresholds based on new data
            wind_power = (state_data['wind_speed_ms'] ** 3)
            wind_score = self.normalize(wind_power, 27, 512) * 0.6  # Updated for wind speeds 3-8 m/s

            grid_factor = {'high': 1.0, 'medium': 0.7, 'low': 0.4}
            stability_score = grid_factor[state_data['grid_stability']] * 0.4

            base_score = (wind_score + stability_score) * 100

        else:  # Biogas
            temp_score = self.normalize(state_data['temperature_avg_c'], 15, 35) * 0.4
            rainfall_score = self.normalize(state_data['rainfall_mm'], 50, 600) * 0.4

            grid_factor = {'high': 0.4, 'medium': 0.7, 'low': 1.0}
            stability_score = grid_factor[state_data['grid_stability']] * 0.2

            base_score = (temp_score + rainfall_score + stability_score) * 100

        return max(0, min(100, base_score))

    def calculate_roi(self, energy_type, state_data, user_data):
        """Calculate ROI with updated costs and parameters"""
        annual_usage = user_data['monthly_usage'] * 12
        electricity_rate = float(user_data.get('electricity_rate', 8.0))  # Default rate if not provided
        annual_hike_rate = self.electricity_hikes.get(state_data['State'], self.default_hike) / 100

        if energy_type == 'Solar PV':
            # Updated solar calculations with new cost structure
            system_size = annual_usage / (state_data['solar_irradiation_kwh_m2_year'] * 0.15)
            initial_cost = system_size * (state_data['solar_cost_per_watt_inr'] * 1000 +
                                        state_data['solar_installation_cost_inr_kw'])
            maintenance = initial_cost * (state_data['solar_maintenance_cost_percent'] / 100)
            lifespan = int(state_data['solar_lifespan_years'])   # Conversion to integer

        elif energy_type == 'Wind':
            # Updated wind calculations with new installation costs
            system_size = annual_usage / (0.35 * (state_data['wind_speed_ms'] ** 3) * 8760)
            initial_cost = system_size * (state_data['wind_turbine_cost_inr_kw'] +
                                        state_data['wind_turbine_installation_cost_inr_kw'])
            maintenance = initial_cost * (state_data['wind_turbine_maintenance_cost_percent'] / 100)
            lifespan = int(state_data['wind_turbine_lifespan_years'])   # Conversion to integer

        else:  # Biogas
            initial_cost = (state_data['biogas_plant_cost_inr'] * 1000000 +  # Converting to INR
                          state_data['biogas_installation_cost_inr'] * 1000)
            maintenance = initial_cost * (state_data['biogas_maintenance_cost_percent'] / 100)
            lifespan = int(state_data['biogas_lifespan_years'])   # Conversion to integer


        # Calculate cumulative savings with compound growth
        total_saving = 0
        annual_bill = annual_usage * electricity_rate

        for year in range(lifespan):
            annual_bill *= (1 + annual_hike_rate)
            annual_saving = annual_bill - maintenance
            total_saving += annual_saving

        roi = ((total_saving - initial_cost) / initial_cost) * 100
        break_even_months = (initial_cost / (total_saving / lifespan)) * 12

        return {
            'initial_investment': round(initial_cost, 2),
            'annual_savings': round(annual_saving, 2),
            'total_savings': round(total_saving, 2),
            'roi_percentage': round(roi, 2),
            'break_even_months': round(break_even_months, 1),
            'lifespan_years': lifespan
        }

    def normalize(self, value, min_val, max_val):
        """Normalize values between 0 and 1"""
        return np.clip((value - min_val) / (max_val - min_val), 0, 1)

    def get_recommendation(self, user_input):
        """Generate comprehensive recommendation with all options"""
        try:
            state_data = self.df[self.df['State'] == user_input['state']].iloc[0]
        except (IndexError, KeyError):
            return {'error': 'State not found in database'}

        recommendations = {}
        for energy_type in ['Solar PV', 'Wind', 'Biogas']:
            roi_metrics = self.calculate_roi(energy_type, state_data, user_input)

            if roi_metrics['initial_investment'] <= user_input['budget']:
                suitability_score = self.calculate_suitability(energy_type, state_data)

                # Combined score calculation
                combined_score = (
                    suitability_score * 0.4 +
                    min(100, roi_metrics['roi_percentage']) * 0.6
                )

                recommendations[energy_type] = {
                    'suitability_score': round(suitability_score, 2),
                    'roi_metrics': roi_metrics,
                    'combined_score': round(combined_score, 2)
                }

        if not recommendations:
            return {'error': 'No viable options within budget'}

        best_option = max(recommendations.items(),
                         key=lambda x: x[1]['combined_score'])

        return {
            'recommended_energy': best_option[0],
            'investment_required': best_option[1]['roi_metrics']['initial_investment'],
            'break_even_months': best_option[1]['roi_metrics']['break_even_months'],
            'total_profit': best_option[1]['roi_metrics']['total_savings'],
            'suitability_score': best_option[1]['suitability_score'],
            'all_options': recommendations
        }

# Test function
def test_recommender(csv_file_path):
    # Initialize the recommender
    recommender = EnhancedCleanEnergyRecommender(csv_file_path)

    # Debug: Print available states
    print("Available states in dataset:", recommender.df['State'].tolist())

    # Test case 1: High solar potential state
    test_input_1 = {
        'state': 'Rajasthan',
        'monthly_usage': 5000,
        'budget': 160000,
        'electricity_rate': 8.0
    }

    # Test case 2: High wind potential state
    test_input_2 = {
        'state': 'Gujarat',
        'monthly_usage': 4000,
        'budget': 150000,
        'electricity_rate': 7.5
    }

    test_input_3 = {
        'state': 'Tamil Nadu',
        'monthly_usage': 4000,
        'budget': 120000,
        'electricity_rate': 6.5
    }

    # Get recommendations
    print("\nTesting Rajasthan case:")
    result_1 = recommender.get_recommendation(test_input_1)
    print("\nTesting Gujarat case:")
    result_2 = recommender.get_recommendation(test_input_2)
    print("\nTesting Tamil Nadu case:")
    result_3 = recommender.get_recommendation(test_input_3)

    print("\nResults for Rajasthan:")
    print(result_1)
    print("\nResults for Gujarat:")
    print(result_2)
    print("\nResults for Tamil Nadu:")
    print(result_3)

if __name__ == "__main__":
    test_recommender('/content/Book3 (4).csv')

Available states in dataset: ['Andhra Pradesh', 'Arunachal Pradesh', 'Assam', 'Bihar', 'Chhattisgarh', 'Goa', 'Gujarat', 'Haryana', 'Himachal Pradesh', 'Jharkhand', 'Karnataka', 'Kerala', 'Madhya Pradesh', 'Maharashtra', 'Manipur', 'Meghalaya', 'Mizoram', 'Nagaland', 'Odisha', 'Punjab', 'Rajasthan', 'Sikkim', 'Tamil Nadu', 'Telangana', 'Tripura', 'Uttar Pradesh', 'Uttarakhand', 'West Bengal']

Testing Rajasthan case:

Testing Gujarat case:

Testing Tamil Nadu case:

Results for Rajasthan:
{'recommended_energy': 'Wind', 'investment_required': 108719.29, 'break_even_months': 0.4, 'total_profit': 43927990.2, 'suitability_score': 28.0, 'all_options': {'Wind': {'suitability_score': 28.0, 'roi_metrics': {'initial_investment': 108719.29, 'annual_savings': 8047824.2, 'total_savings': 43927990.2, 'roi_percentage': 40304.97, 'break_even_months': 0.4, 'lifespan_years': 15}, 'combined_score': 71.2}}}

Results for Gujarat:
{'recommended_energy': 'Wind', 'investment_required': 4540.73, 'break_even_m

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from datetime import datetime

class EnhancedCleanEnergyRecommender:
    def __init__(self, csv_path):
        self.df = pd.read_csv(csv_path)
        self.scaler = StandardScaler()

        # More realistic electricity hike rates
        self.electricity_hikes = {
            'Tamil Nadu': 5.0, 'Maharashtra': 4.5, 'Goa': 4.0,
            'West Bengal': 3.0, 'Madhya Pradesh': 4.0, 'Bihar': 4.0,
            'Odisha': 3.5, 'Punjab': 4.1, 'Haryana': 4.4,
            'Rajasthan': 4.7, 'Kerala': 4.0, 'Andhra Pradesh': 4.0,
            'Karnataka': 3.8, 'Chhattisgarh': 4.2, 'Uttar Pradesh': 4.0,
            'Gujarat': 4.2
        }
        self.default_hike = 4.0

        self._preprocess_data()

    def calculate_roi(self, energy_type, state_data, user_data):
        """Calculate ROI with more realistic parameters"""
        annual_usage = user_data['monthly_usage'] * 12
        electricity_rate = float(user_data.get('electricity_rate', 8.0))
        annual_hike_rate = self.electricity_hikes.get(state_data['State'], self.default_hike) / 100

        if energy_type == 'Solar PV':
            capacity_factor = 0.18
            system_size = (annual_usage / (365 * 24 * capacity_factor))
            initial_cost = system_size * state_data['solar_cost_per_watt_inr']
            installation_cost = system_size * state_data['solar_installation_cost_inr_kw']
            total_cost = initial_cost + installation_cost
            maintenance = total_cost * (state_data['solar_maintenance_cost_percent'] / 100)
            lifespan = int(state_data['solar_lifespan_years'])

        elif energy_type == 'Wind':
            capacity_factor = 0.25 * (state_data['wind_speed_ms'] / 5.5)  # Adjusted for wind speed
            system_size = (annual_usage / (365 * 24 * capacity_factor))
            initial_cost = system_size * state_data['wind_turbine_cost_inr_kw']
            installation_cost = system_size * state_data['wind_turbine_installation_cost_inr_kw']
            total_cost = initial_cost + installation_cost
            maintenance = total_cost * (state_data['wind_turbine_maintenance_cost_percent'] / 100)
            lifespan = int(state_data['wind_turbine_lifespan_years'])

        else:  # Biogas
            capacity_factor = 0.5 * (1 + state_data['rainfall_mm'] / 1000)  # Adjusted for rainfall
            system_size = (annual_usage / (365 * 24 * capacity_factor))
            total_cost = state_data['biogas_plant_cost_inr'] + state_data['biogas_installation_cost_inr']
            maintenance = total_cost * (state_data['biogas_maintenance_cost_percent'] / 100)
            lifespan = int(state_data['biogas_lifespan_years'])

        # Calculate realistic savings with discount rate
        discount_rate = 0.08  # 8% discount rate
        total_saving = 0
        annual_bill = annual_usage * electricity_rate

        for year in range(lifespan):
            annual_bill *= (1 + annual_hike_rate)
            annual_saving = annual_bill - maintenance
            total_saving += annual_saving / ((1 + discount_rate) ** year)

        roi = ((total_saving - total_cost) / total_cost) * 100
        break_even_months = (total_cost / (annual_saving / 12))

        return {
            'initial_investment': round(total_cost, 2),
            'annual_savings': round(annual_saving, 2),
            'total_savings': round(total_saving, 2),
            'roi_percentage': round(roi, 2),
            'break_even_months': round(break_even_months, 1),
            'lifespan_years': lifespan
        }

    def calculate_suitability(self, energy_type, state_data):
        """Calculate suitability score with refined parameters"""
        if energy_type == 'Solar PV':
            irradiation_score = self.normalize(state_data['solar_irradiation_kwh_m2_year'], 1500, 2100) * 0.5
            temp_penalty = abs(state_data['temperature_avg_c'] - 25) / 20  # Optimal temp around 25°C
            temp_score = (1 - temp_penalty) * 0.3
            rainfall_penalty = self.normalize(state_data['rainfall_mm'], 0, 500) * 0.2

            base_score = (irradiation_score + temp_score - rainfall_penalty) * 100

        elif energy_type == 'Wind':
            wind_score = self.normalize(state_data['wind_speed_ms'], 3, 8) * 0.6
            grid_factor = {'high': 1.0, 'medium': 0.7, 'low': 0.4}
            stability_score = grid_factor[state_data['grid_stability']] * 0.4

            base_score = (wind_score + stability_score) * 100

        else:  # Biogas
            rainfall_score = self.normalize(state_data['rainfall_mm'], 100, 600) * 0.4
            temp_score = self.normalize(state_data['temperature_avg_c'], 20, 35) * 0.4
            grid_factor = {'high': 0.4, 'medium': 0.7, 'low': 1.0}
            stability_score = grid_factor[state_data['grid_stability']] * 0.2

            base_score = (rainfall_score + temp_score + stability_score) * 100

        return max(0, min(100, base_score))

    # Rest of the class methods remain the same
    def _preprocess_data(self):
        """Clean and preprocess the dataset"""
        for col in self.df.columns:
            if self.df[col].dtype == 'object':
                self.df[col] = self.df[col].replace({',': ''}, regex=True)
                try:
                    self.df[col] = pd.to_numeric(self.df[col])
                except:
                    pass

    def normalize(self, value, min_val, max_val):
        """Normalize values between 0 and 1"""
        return np.clip((value - min_val) / (max_val - min_val), 0, 1)

    def get_recommendation(self, user_input):
        """Generate comprehensive recommendation with balanced scoring"""
        try:
            state_data = self.df[self.df['State'] == user_input['state']].iloc[0]
        except (IndexError, KeyError):
            return {'error': 'State not found in database'}

        recommendations = {}
        for energy_type in ['Solar PV', 'Wind', 'Biogas']:
            try:
                roi_metrics = self.calculate_roi(energy_type, state_data, user_input)

                if roi_metrics['initial_investment'] <= user_input['budget']:
                    suitability_score = self.calculate_suitability(energy_type, state_data)

                    # Balanced scoring system
                    roi_score = min(30, roi_metrics['roi_percentage']) / 30 * 100  # Cap ROI at 30%
                    breakeven_score = min(60, roi_metrics['break_even_months']) / 60 * 100  # Cap at 60 months

                    combined_score = (
                        suitability_score * 0.4 +
                        roi_score * 0.4 +
                        (100 - breakeven_score) * 0.2  # Lower break-even period is better
                    )

                    recommendations[energy_type] = {
                        'suitability_score': round(suitability_score, 2),
                        'roi_metrics': roi_metrics,
                        'combined_score': round(combined_score, 2)
                    }
            except Exception as e:
                print(f"Error calculating for {energy_type}: {str(e)}")

        if not recommendations:
            return {'error': 'No viable options within budget'}

        best_option = max(recommendations.items(),
                         key=lambda x: x[1]['combined_score'])

        return {
            'recommended_energy': best_option[0],
            'investment_required': best_option[1]['roi_metrics']['initial_investment'],
            'break_even_months': best_option[1]['roi_metrics']['break_even_months'],
            'total_profit': best_option[1]['roi_metrics']['total_savings'],
            'suitability_score': best_option[1]['suitability_score'],
            'all_options': recommendations
        }

def test_recommender():
    # Create sample dataset with more realistic values
    data = {
        'State': ['Rajasthan', 'Gujarat', 'Tamil Nadu', 'Kerala'],
        'monthly_electricity_usage_kwh': [5000000, 6000000, 4000000, 3000000],
        'solar_irradiation_kwh_m2_year': [2100, 2000, 1900, 1800],
        'wind_speed_ms': [3, 8, 3, 6],
        'temperature_avg_c': [32, 32, 30, 28],
        'grid_stability': ['medium', 'high', 'low', 'medium'],
        'rainfall_mm': [79.52, 90.8, 583.8, 486.7],
        # More realistic solar costs
        'solar_cost_per_watt_inr': [45000, 47000, 46000, 48000],
        'solar_installation_cost_inr_kw': [15000, 14000, 15500, 16000],
        'solar_maintenance_cost_percent': [2.5, 2.5, 2.5, 2.5],
        'solar_lifespan_years': [25, 25, 25, 25],
        # More realistic wind costs
        'wind_turbine_cost_inr_kw': [65000, 63000, 66000, 68000],
        'wind_turbine_installation_cost_inr_kw': [20000, 19000, 21000, 22000],
        'wind_turbine_maintenance_cost_percent': [3.5, 3.5, 3.5, 4.0],
        'wind_turbine_lifespan_years': [20, 20, 18, 20],
        # More realistic biogas costs
        'biogas_plant_cost_inr': [85000, 82000, 86000, 88000],
        'biogas_installation_cost_inr': [25000, 24000, 26000, 27000],
        'biogas_maintenance_cost_percent': [4.0, 4.0, 4.0, 4.5],
        'biogas_lifespan_years': [15, 15, 15, 15]
    }

    df = pd.DataFrame(data)
    df.to_csv('/content/Book3 (4).csv', index=False)

    recommender = EnhancedCleanEnergyRecommender('/content/Book3 (4).csv')

    test_cases = [
        {
            'state': 'Rajasthan',  # High solar potential
            'monthly_usage': 5000,
            'budget': 1000000,
            'electricity_rate': 8.0
        },
        {
            'state': 'Gujarat',    # High wind potential
            'monthly_usage': 4000,
            'budget': 1000000,
            'electricity_rate': 7.5
        },
        {
            'state': 'Tamil Nadu', # Balanced conditions
            'monthly_usage': 4000,
            'budget': 1000000,
            'electricity_rate': 6.5
        },
        {
            'state': 'Kerala',     # High rainfall, good for biogas
            'monthly_usage': 3000,
            'budget': 800000,
            'electricity_rate': 7.0
        }
    ]

    for i, test_input in enumerate(test_cases, 1):
        print(f"\nTest case {i} - {test_input['state']}:")
        result = recommender.get_recommendation(test_input)
        print(f"Recommended Energy: {result['recommended_energy']}")
        print(f"Investment Required: ₹{result['investment_required']:,.2f}")
        print(f"Break-even Period: {result['break_even_months']:.1f} months")
        print(f"Suitability Score: {result['suitability_score']:.1f}")
        print("\nAll Options:")
        for energy_type, details in result['all_options'].items():
            print(f"\n{energy_type}:")
            print(f"Suitability Score: {details['suitability_score']:.1f}")
            print(f"Combined Score: {details['combined_score']:.1f}")
            print(f"ROI: {details['roi_metrics']['roi_percentage']:.1f}%")

if __name__ == "__main__":
    test_recommender()


Test case 1 - Rajasthan:
Recommended Energy: Biogas
Investment Required: ₹110,000.00
Break-even Period: 1.4 months
Suitability Score: 46.0

All Options:

Biogas:
Suitability Score: 46.0
Combined Score: 77.9
ROI: 5427.7%

Test case 2 - Gujarat:
Recommended Energy: Biogas
Investment Required: ₹106,000.00
Break-even Period: 1.9 months
Suitability Score: 40.0

All Options:

Biogas:
Suitability Score: 40.0
Combined Score: 75.4
ROI: 4043.8%

Test case 3 - Tamil Nadu:
Recommended Energy: Biogas
Investment Required: ₹112,000.00
Break-even Period: 2.1 months
Suitability Score: 85.4

All Options:

Biogas:
Suitability Score: 85.4
Combined Score: 93.5
ROI: 3492.0%

Test case 4 - Kerala:
Recommended Energy: Biogas
Investment Required: ₹115,000.00
Break-even Period: 3.1 months
Suitability Score: 66.3

All Options:

Biogas:
Suitability Score: 66.3
Combined Score: 85.5
ROI: 2518.2%


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import requests
from datetime import datetime

class EnhancedCleanEnergyRecommender:
    def _init_(self, csv_path):
        self.df = pd.read_csv(csv_path)
        self.scaler = StandardScaler()
        self.model = RandomForestClassifier(n_estimators=200, random_state=42)

    def fetch_remote_sensing_data(self, latitude, longitude):
        """
        Fetch real-time remote sensing data from NASA POWER API
        """
        try:
            base_url = "https://power.larc.nasa.gov/api/temporal/daily/point"
            params = {
                "parameters": "ALLSKY_SFC_SW_DWN,T2M,WS10M,PRECTOT",
                "community": "RE",
                "longitude": longitude,
                "latitude": latitude,
                "start": datetime.now().strftime("%Y%m%d"),
                "end": datetime.now().strftime("%Y%m%d"),
                "format": "JSON"
            }
            response = requests.get(base_url, params=params)
            data = response.json()

            return {
                'solar_radiation': data['properties']['parameter']['ALLSKY_SFC_SW_DWN'],
                'temperature': data['properties']['parameter']['T2M'],
                'wind_speed': data['properties']['parameter']['WS10M'],
                'precipitation': data['properties']['parameter']['PRECTOT']
            }
        except Exception as e:
            print(f"Error fetching remote sensing data: {e}") # print the error
            return None

    def prepare_data(self):
        """Prepare dataset with enhanced features"""
        # Calculate efficiency metrics with adjusted factors
        self.df['solar_efficiency'] = (self.df['solar_irradiation_kwh_m2_year'] * 0.20) / 365  # Increased panel efficiency
        self.df['wind_efficiency'] = 0.30 * (self.df['wind_speed_ms'] ** 3) * 8760  # Adjusted Betz limit

        # Calculate cost metrics with balanced maintenance costs
        self.df['solar_annual_cost'] = (
            (self.df['solar_cost_per_watt_inr'] * 1000 + self.df['solar_installation_cost_inr_kw']) *
            (self.df['solar_maintenance_cost_percent'] / 100)
        )

        self.df['wind_annual_cost'] = (
            (self.df['wind_turbine_cost_inr_kw'] + self.df['wind_turbine_installation_cost_inr_kw']) *
            (self.df['wind_turbine_maintenance_cost_percent'] / 100)
        )

        # Create target variable with adjusted thresholds
        conditions = [
            (self.df['wind_speed_ms'] >= 5.0),
            (self.df['solar_irradiation_kwh_m2_year'] >= 1700) & (self.df['temperature_avg_c'] >= 25),
            (self.df['rainfall_mm'] >= 80) & (self.df['temperature_avg_c'] >= 20)
        ]
        choices = ['Wind', 'Solar PV', 'Biogas']
        self.df['recommended_energy'] = np.select(conditions, choices, default='Solar PV')

        # Select features for training with balanced weights
        features = [
            'monthly_electricity_usage_kwh',
            'solar_irradiation_kwh_m2_year',
            'wind_speed_ms',
            'temperature_avg_c',
            'rainfall_mm',
            'solar_efficiency',
            'wind_efficiency',
            'solar_annual_cost',
            'wind_annual_cost'
        ]

        self.X = self.scaler.fit_transform(self.df[features])
        self.y = self.df['recommended_energy']

    def calculate_roi(self, energy_type, state_data, user_data):
        """Calculate ROI with balanced metrics"""
        annual_usage = user_data['monthly_usage'] * 12
        electricity_rate = float(state_data['electricity_price_per_kwh_inr'])
        annual_bill = annual_usage * electricity_rate
        try:
            if energy_type == 'Solar PV':
                system_size = annual_usage / (state_data['solar_irradiation_kwh_m2_year'] * 0.20)  # Adjusted efficiency
                initial_cost = system_size * (state_data['solar_cost_per_watt_inr'] * 1000 +
                                                state_data['solar_installation_cost_inr_kw'])
                maintenance = initial_cost * (state_data['solar_maintenance_cost_percent'] / 100)
                lifespan = state_data['solar_lifespan_years']

            elif energy_type == 'Wind':
                system_size = annual_usage / (0.30 * (state_data['wind_speed_ms'] ** 3) * 8760)  # Adjusted efficiency
                initial_cost = system_size * (state_data['wind_turbine_cost_inr_kw'] +
                                                state_data['wind_turbine_installation_cost_inr_kw'])
                maintenance = initial_cost * (state_data['wind_turbine_maintenance_cost_percent'] / 100)
                lifespan = state_data['wind_turbine_lifespan_years']

            else:  # Biogas
                initial_cost = (state_data['biogas_plant_cost_inr'] +
                                state_data['biogas_installation_cost_inr'])
                maintenance = initial_cost * (state_data['biogas_maintenance_cost_percent'] / 100)
                lifespan = state_data['biogas_lifespan_years']

            # Calculate cumulative savings with adjusted factors
            annual_saving = annual_bill - maintenance
            total_saving = annual_saving * lifespan
            roi = ((total_saving - initial_cost) / initial_cost) * 100
            break_even_months = (initial_cost / annual_saving) * 12

            return {
                'initial_investment': initial_cost,
                'annual_savings': annual_saving,
                'total_savings': total_saving,
                'roi_percentage': roi,
                'break_even_months': break_even_months,
                'lifespan_years': lifespan
            }
        except Exception as e:
            print(f"ROI error: {e}")
            return {
                'initial_investment': 0,
                'annual_savings': 0,
                'total_savings': 0,
                'roi_percentage': 0,
                'break_even_months': 1200,
                'lifespan_years': 0
            }

    def calculate_suitability(self, energy_type, state_data, remote_data=None):
        """Calculate suitability score with balanced weights"""
        base_score = 0

        if energy_type == 'Solar PV':
            solar_score = (
                self.normalize(state_data['solar_irradiation_kwh_m2_year'], 1500, 2100) * 0.55 +  # Drastically Increase weight for solar irradiation
                self.normalize(state_data['temperature_avg_c'], 20, 35) * 0.30 +
                (1 - self.normalize(state_data['rainfall_mm'], 0, 600)) * 0.15 #Reduce Rainfall impact
            )
            base_score = solar_score * 100

        elif energy_type == 'Wind':
            wind_score = (
                self.normalize(state_data['wind_speed_ms']**3, 27, 343) * 0.15 +  # Decrease weight for wind speed
                (1 - self.normalize(state_data['rainfall_mm'], 0, 600)) * 0.40 + #Increase rainfall impact
                (1 if state_data['grid_stability'] == 'high' else
                 0.7 if state_data['grid_stability'] == 'medium' else 0.4) * 0.45 #Increase grid stability impact
            )
            base_score = wind_score * 100

        else:  # Biogas
            biogas_score = (
                self.normalize(state_data['temperature_avg_c'], 15, 35) * 0.40 +  # Increase weight for temperature
                self.normalize(state_data['rainfall_mm'], 0, 600) * 0.50 + #Major Impact
                (0.4 if state_data['grid_stability'] == 'high' else
                 0.7 if state_data['grid_stability'] == 'medium' else 1) * 0.10 #Reduce
            )
            base_score = biogas_score * 100

        # Adjust score with real-time data if available
        if remote_data:
            real_time_adjustment = self.calculate_realtime_adjustment(
                energy_type,
                remote_data
            )
            base_score = (base_score * 0.7) + (real_time_adjustment * 0.3)

        return base_score

    def get_recommendation(self, user_input):
        """Generate comprehensive recommendation with balanced scoring"""
        state_data = self.df[self.df['State'] == user_input['state']].iloc[0]

        # Get real-time data if coordinates provided
        remote_data = None
        if 'latitude' in user_input and 'longitude' in user_input:
            remote_data = self.fetch_remote_sensing_data(
                user_input['latitude'],
                user_input['longitude']
            )

        # Calculate scores for each energy type
        recommendations = {}
        for energy_type in ['Solar PV', 'Wind', 'Biogas']:
            if pd.isna(state_data['biogas_plant_cost_inr']) and energy_type == 'Biogas':
              continue #Skip if no biogas data

            roi_metrics = self.calculate_roi(energy_type, state_data, user_input)

            if roi_metrics['initial_investment'] <= user_input['budget']:
                suitability_score = self.calculate_suitability(
                    energy_type,
                    state_data,
                    remote_data
                )

                # Calculate final score with balanced weights
                final_score = (
                    suitability_score * 0.35 +  # Environmental suitability
                    roi_metrics['roi_percentage'] * 0.35 +  # Financial returns
                    (100 - min(roi_metrics['break_even_months'], 120)) * 0.30  # Payback period (max 10 years)
                )

                recommendations[energy_type] = {
                    'suitability_score': suitability_score,
                    'roi_metrics': roi_metrics,
                    'final_score': final_score
                }

        # Find best recommendation based on final score
        if recommendations:
            best_option = max(recommendations.items(), key=lambda x: x[1]['final_score'])

            return {
                'recommended_energy': best_option[0],
                'investment_required': best_option[1]['roi_metrics']['initial_investment'],
                'break_even_months': best_option[1]['roi_metrics']['break_even_months'],
                'total_profit': best_option[1]['roi_metrics']['total_savings'],
                'suitability_score': best_option[1]['suitability_score'],
                'final_score': best_option[1]['final_score'],
                'all_options': recommendations
            }
        else:
            return {
                'error': 'No suitable recommendations found within budget constraints'
            }

    def normalize(self, value, min_val, max_val):
        """Normalize values between 0 and 1"""
        return np.clip((value - min_val) / (max_val - min_val), 0, 1)

    def calculate_realtime_adjustment(self, energy_type, remote_data):
        """Calculate adjustment factor based on real-time conditions with balanced weights"""
        if energy_type == 'Solar PV':
            return (
                self.normalize(remote_data['solar_radiation'], 0, 1000) * 0.40 +
                self.normalize(remote_data['temperature'], 20, 35) * 0.35 +
                (1 - self.normalize(remote_data['precipitation'], 0, 50)) * 0.25
            ) * 100

        elif energy_type == 'Wind':
            return (
                self.normalize(remote_data['wind_speed']**3, 27, 343) * 0.50 +  # Adjusted threshold
                (1 - self.normalize(remote_data['precipitation'], 0, 50)) * 0.50
            ) * 100

        else:  # Biogas
            return (
                self.normalize(remote_data['temperature'], 15, 35) * 0.50 +
                self.normalize(remote_data['precipitation'], 0, 50) * 0.50
            ) * 100

# Test different scenarios
recommender = EnhancedCleanEnergyRecommender('/content/Book3.csv')
recommender.prepare_data()

test_cases = [
    {
        'state': 'Maharashtra',
        'monthly_usage': 1000,
        'budget': 1000000
    },
    {
        'state': 'Tamil Nadu',
        'monthly_usage': 2000,
        'budget': 2000000
    },
    {
        'state': 'Gujarat',
        'monthly_usage': 1500,
        'budget': 1500000
    },
        {
        'state': 'Bihar',
        'monthly_usage': 1500,
        'budget': 1500000
    },
        {
        'state': 'Rajasthan',
        'monthly_usage': 1500,
        'budget': 1500000
    }
]

for case in test_cases:
    state = case['state']
    state_data = recommender.df[recommender.df['State'] == state].iloc[0]

    print(f"\nState: {state}")
    print(f"  Wind Speed: {state_data['wind_speed_ms']}")
    print(f"  Solar Irradiation: {state_data['solar_irradiation_kwh_m2_year']}")
    print(f"  Temperature: {state_data['temperature_avg_c']}")
    print(f"  Rainfall: {state_data['rainfall_mm']}")
    print(f"  Grid Stability: {state_data['grid_stability']}")
    print(f"  Solar cost per watt: {state_data['solar_cost_per_watt_inr']}")
    print(f"  Wind Turbine Cost: {state_data['wind_turbine_cost_inr_kw']}")
    print(f"  Biogas Plant Cost: {state_data['biogas_plant_cost_inr']}")

    result = recommender.get_recommendation(case)
    print(f"  Recommended: {result['recommended_energy']}")
    print(f"  Suitability Score: {result['suitability_score']:.2f}")
    if 'all_options' in result:
        print("\n  All Options:")
        for energy_type, metrics in result['all_options'].items():
            print(f"    {energy_type}: {metrics['final_score']:.2f}")
    elif 'error' in result:
        print(f"Error: {result['error']}")

TypeError: EnhancedCleanEnergyRecommender() takes no arguments

In [None]:
from google.colab import files
uploaded = files.upload()

Saving Updated_Book3.csv to Updated_Book3.csv


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import requests
from datetime import datetime

class MLEnergyRecommender:
    def __init__(self, csv_path):
        self.df = pd.read_csv(csv_path)
        self.scaler = StandardScaler()
        self.model = RandomForestClassifier(n_estimators=100, random_state=42)
        self.prepare_data()
        self.train_model()

    def prepare_data(self):
        """Prepare dataset with features and target variable"""
        # Calculate advanced features
        self.df['solar_potential'] = (
            self.df['solar_irradiation_kwh_m2_year'] *
            (35 - abs(30 - self.df['temperature_avg_c'])) / 35 *
            (1 - self.df['rainfall_mm']/1000)
        )

        self.df['wind_potential'] = (
            (self.df['wind_speed_ms'] ** 3) *
            (1 - self.df['rainfall_mm']/1000)
        )

        self.df['biogas_potential'] = (
            (self.df['temperature_avg_c'] / 35) *
            (self.df['rainfall_mm'] / 600) *
            (0.8 if self.df['grid_stability'] == 'high'
             else 1.0 if self.df['grid_stability'] == 'medium'
             else 1.2)
        )

        # Calculate cost-effectiveness scores
        self.df['solar_cost_effectiveness'] = (
            1000000 / (self.df['solar_cost_per_watt_inr'] * 1000 +
                      self.df['solar_installation_cost_inr_kw'])
        ) * self.df['solar_lifespan_years']

        self.df['wind_cost_effectiveness'] = (
            1000000 / (self.df['wind_turbine_cost_inr_kw'] +
                      self.df['wind_turbine_installation_cost_inr_kw'])
        ) * self.df['wind_turbine_lifespan_years']

        self.df['biogas_cost_effectiveness'] = (
            1000000 / (self.df['biogas_plant_cost_inr'] +
                      self.df['biogas_installation_cost_inr'])
        ) * self.df['biogas_lifespan_years']

        # Select features for training
        self.features = [
            'monthly_electricity_usage_kwh',
            'solar_irradiation_kwh_m2_year',
            'wind_speed_ms',
            'temperature_avg_c',
            'rainfall_mm',
            'solar_potential',
            'wind_potential',
            'biogas_potential',
            'solar_cost_effectiveness',
            'wind_cost_effectiveness',
            'biogas_cost_effectiveness'
        ]

        # Convert grid stability to numeric
        stability_map = {'low': 0, 'medium': 1, 'high': 2}
        self.df['grid_stability_numeric'] = self.df['grid_stability'].map(stability_map)
        self.features.append('grid_stability_numeric')

        # Prepare target variable
        self.target = 'recommended_energy_alternative'

        # Handle any missing values
        self.df[self.features] = self.df[self.features].fillna(0)
        self.df[self.target] = self.df[self.target].fillna('Solar')

    def train_model(self):
        """Train the Random Forest model"""
        X = self.df[self.features]
        y = self.df[self.target]

        # Scale the features
        X_scaled = self.scaler.fit_transform(X)

        # Split the data
        X_train, X_test, y_train, y_test = train_test_split(
            X_scaled, y, test_size=0.2, random_state=42
        )

        # Train the model
        self.model.fit(X_train, y_train)

        # Print model performance
        y_pred = self.model.predict(X_test)
        print("\nModel Performance:")
        print(classification_report(y_test, y_pred))

    def get_recommendation(self, user_input):
        """Generate recommendation based on user input using the trained model"""
        # Prepare input data
        input_data = pd.DataFrame({
            'monthly_electricity_usage_kwh': [user_input['monthly_usage'] * 1000000],
            'solar_irradiation_kwh_m2_year': [self.df[self.df['State'] == user_input['state']]['solar_irradiation_kwh_m2_year'].iloc[0]],
            'wind_speed_ms': [self.df[self.df['State'] == user_input['state']]['wind_speed_ms'].iloc[0]],
            'temperature_avg_c': [self.df[self.df['State'] == user_input['state']]['temperature_avg_c'].iloc[0]],
            'rainfall_mm': [self.df[self.df['State'] == user_input['state']]['rainfall_mm'].iloc[0]],
            'grid_stability_numeric': [{'low': 0, 'medium': 1, 'high': 2}[self.df[self.df['State'] == user_input['state']]['grid_stability'].iloc[0]]]
        })

        # Calculate potentials and cost-effectiveness
        state_data = self.df[self.df['State'] == user_input['state']].iloc[0]

        input_data['solar_potential'] = (
            input_data['solar_irradiation_kwh_m2_year'] *
            (35 - abs(30 - input_data['temperature_avg_c'])) / 35 *
            (1 - input_data['rainfall_mm']/1000)
        )

        input_data['wind_potential'] = (
            (input_data['wind_speed_ms'] ** 3) *
            (1 - input_data['rainfall_mm']/1000) *
            (1.2 if state_data['grid_stability'] == 'high'
             else 1.0 if state_data['grid_stability'] == 'medium'
             else 0.8)
        )

        input_data['biogas_potential'] = (
            (input_data['temperature_avg_c'] / 35) *
            (input_data['rainfall_mm'] / 600) *
            (0.8 if state_data['grid_stability'] == 'high'
             else 1.0 if state_data['grid_stability'] == 'medium'
             else 1.2)
        )

        input_data['solar_cost_effectiveness'] = (
            1000000 / (state_data['solar_cost_per_watt_inr'] * 1000 +
                      state_data['solar_installation_cost_inr_kw'])
        ) * state_data['solar_lifespan_years']

        input_data['wind_cost_effectiveness'] = (
            1000000 / (state_data['wind_turbine_cost_inr_kw'] +
                      state_data['wind_turbine_installation_cost_inr_kw'])
        ) * state_data['wind_turbine_lifespan_years']

        input_data['biogas_cost_effectiveness'] = (
            1000000 / (state_data['biogas_plant_cost_inr'] +
                      state_data['biogas_installation_cost_inr'])
        ) * state_data['biogas_lifespan_years']

        # Scale the input data
        input_scaled = self.scaler.transform(input_data[self.features])

        # Get prediction and probabilities
        prediction = self.model.predict(input_scaled)[0]
        probabilities = self.model.predict_proba(input_scaled)[0]

        # Get confidence scores for each option
        energy_types = self.model.classes_
        confidence_scores = {energy_type: prob * 100 for energy_type, prob in zip(energy_types, probabilities)}

        return {
            'recommended_energy': prediction,
            'confidence_scores': confidence_scores,
            'feature_importance': dict(zip(self.features, self.model.feature_importances_))
        }

# Example usage:
if __name__ == "__main__":
    # Initialize and train the model
    recommender = MLEnergyRecommender('/content/Updated_Book3.csv')

    # Test cases
    test_cases = [
        {
            'state': 'Maharashtra',
            'monthly_usage': 1000,
            'budget': 1000000
        },
        {
            'state': 'Gujarat',
            'monthly_usage': 1500,
            'budget': 1500000
        },
        {
            'state': 'Tamil Nadu',
            'monthly_usage': 2000,
            'budget': 2000000
        }
    ]

    # Test the model
    print("\nTest Predictions:")
    for case in test_cases:
        result = recommender.get_recommendation(case)
        print(f"\nState: {case['state']}")
        print(f"Recommended Energy: {result['recommended_energy']}")
        print("Confidence Scores:")
        for energy_type, score in result['confidence_scores'].items():
            print(f"  {energy_type}: {score:.2f}%")

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

class EnhancedCleanEnergyRecommender:
    def __init__(self, csv_path):
        self.df = pd.read_csv(csv_path)
        self.scaler = StandardScaler()
        self.model = None
        self.features = None
        self._preprocess_data()
        self.train_model()

    def _preprocess_data(self):
        """Clean and preprocess the dataset"""
        # Convert string numbers to float
        for col in self.df.columns:
            if self.df[col].dtype == 'object':
                try:
                    self.df[col] = self.df[col].replace({',': ''}, regex=True)
                    self.df[col] = pd.to_numeric(self.df[col])
                except:
                    pass

        # Calculate sustainability scores for each energy type
        self.df['solar_sustainability'] = self._calculate_solar_sustainability()
        self.df['wind_sustainability'] = self._calculate_wind_sustainability()
        self.df['biogas_sustainability'] = self._calculate_biogas_sustainability()

        # Define features for the model
        self.features = [
            'monthly_electricity_usage_kwh',
            'solar_irradiation_kwh_m2_year',
            'wind_speed_ms',
            'temperature_avg_c',
            'rainfall_mm',
            'solar_sustainability',
            'wind_sustainability',
            'biogas_sustainability'
        ]

    def _calculate_solar_sustainability(self):
        """Calculate solar sustainability score"""
        return (
            self.normalize(self.df['solar_irradiation_kwh_m2_year'], 1500, 2100) * 0.45 +
            self.normalize(self.df['temperature_avg_c'], 15, 35) * 0.35 +
            (1 - self.normalize(self.df['rainfall_mm'], 0, 400)) * 0.20
        ) * 100

    def _calculate_wind_sustainability(self):
        """Calculate wind sustainability score"""
        wind_power = self.df['wind_speed_ms'] ** 3
        wind_score = self.normalize(wind_power, 27, 512) * 0.6

        # Convert grid stability to numeric scores
        grid_scores = self.df['grid_stability'].map({'high': 1.0, 'medium': 0.7, 'low': 0.4})
        stability_score = grid_scores * 0.4

        return (wind_score + stability_score) * 100

    def _calculate_biogas_sustainability(self):
        """Calculate biogas sustainability score"""
        temp_score = self.normalize(self.df['temperature_avg_c'], 15, 35) * 0.4
        rainfall_score = self.normalize(self.df['rainfall_mm'], 50, 600) * 0.4

        grid_scores = self.df['grid_stability'].map({'high': 0.4, 'medium': 0.7, 'low': 1.0})
        stability_score = grid_scores * 0.2

        return (temp_score + rainfall_score + stability_score) * 100

    def normalize(self, values, min_val, max_val):
        """Normalize values between 0 and 1"""
        return np.clip((values - min_val) / (max_val - min_val), 0, 1)

    def train_model(self):
        """Train the Random Forest model"""
        # Prepare features and target
        X = self.df[self.features]
        y = self.df['recommended_energy_alternative']

        # Scale features
        X_scaled = self.scaler.fit_transform(X)

        # Split data
        X_train, X_test, y_train, y_test = train_test_split(
            X_scaled, y, test_size=0.2, random_state=42
        )

        # Train model
        self.model = RandomForestClassifier(n_estimators=100, random_state=42)
        self.model.fit(X_train, y_train)

        # Print model performance
        y_pred = self.model.predict(X_test)
        print("\nModel Performance:")
        print(classification_report(y_test, y_pred))

    def get_recommendation(self, user_input):
        """Generate recommendation based on user input"""
        # Get state data
        state_data = self.df[self.df['State'] == user_input['State']].iloc[0]

        # Prepare input features
        input_features = pd.DataFrame({
            'monthly_electricity_usage_kwh': [user_input['monthly_usage'] * 1000],
            'solar_irradiation_kwh_m2_year': [state_data['solar_irradiation_kwh_m2_year']],
            'wind_speed_ms': [state_data['wind_speed_ms']],
            'temperature_avg_c': [state_data['temperature_avg_c']],
            'rainfall_mm': [state_data['rainfall_mm']],
            'solar_sustainability': [self._calculate_solar_sustainability().iloc[
                self.df[self.df['State'] == user_input['State']].index[0]
            ]],
            'wind_sustainability': [self._calculate_wind_sustainability().iloc[
                self.df[self.df['State'] == user_input['State']].index[0]
            ]],
            'biogas_sustainability': [self._calculate_biogas_sustainability().iloc[
                self.df[self.df['State'] == user_input['State']].index[0]
            ]]
        })

        # Scale input features
        input_scaled = self.scaler.transform(input_features)

        # Get prediction and probabilities
        prediction = self.model.predict(input_scaled)[0]
        probabilities = self.model.predict_proba(input_scaled)[0]

        # Get confidence scores
        energy_types = self.model.classes_
        confidence_scores = {
            energy_type: prob * 100
            for energy_type, prob in zip(energy_types, probabilities)
        }

        return {
            'recommended_energy': prediction,
            'confidence_scores': confidence_scores,
            'sustainability_scores': {
                'solar': input_features['solar_sustainability'].iloc[0],
                'wind': input_features['wind_sustainability'].iloc[0],
                'biogas': input_features['biogas_sustainability'].iloc[0]
            }
        }

def test_recommender(csv_file_path):
    # Initialize the recommender
    recommender = EnhancedCleanEnergyRecommender(csv_file_path)

    # Test cases
    test_cases = [
        {
            'State': 'Rajasthan',
            'monthly_usage': 5000,
            'budget': 10000000,
            'electricity_rate': 8.0,
        },
        {
            'State': 'Tamil Nadu',
            'monthly_usage': 5000,
            'budget': 10000000,
            'electricity_rate': 8.0
        },
        {
            'State': 'Gujarat',
            'monthly_usage': 5000,
            'budget': 100000,
            'electricity_rate': 10.0
        }
    ]

    for test_input in test_cases:
        print(f"\nTest Case for {test_input['State']}:")
        result = recommender.get_recommendation(test_input)
        print(f"Recommended Energy: {result['recommended_energy']}")
        print("\nConfidence Scores:")
        for energy_type, score in result['confidence_scores'].items():
            print(f"{energy_type}: {score:.2f}%")
        print("\nSustainability Scores:")
        for energy_type, score in result['sustainability_scores'].items():
            print(f"{energy_type}: {score:.2f}")

if __name__ == "__main__":
    test_recommender("Updated_Book3.csv")


Model Performance:
              precision    recall  f1-score   support

      Biogas       1.00      1.00      1.00         2
       Solar       1.00      0.75      0.86         4
        Wind       0.00      0.00      0.00         0

    accuracy                           0.83         6
   macro avg       0.67      0.58      0.62         6
weighted avg       1.00      0.83      0.90         6


Test Case for Rajasthan:
Recommended Energy: Solar

Confidence Scores:
Biogas: 8.00%
Solar: 91.00%
Wind: 1.00%

Sustainability Scores:
solar: 90.77
wind: 28.00
biogas: 50.15

Test Case for Tamil Nadu:
Recommended Energy: Solar

Confidence Scores:
Biogas: 13.00%
Solar: 86.00%
Wind: 1.00%

Sustainability Scores:
solar: 56.25
wind: 16.00
biogas: 88.82

Test Case for Gujarat:
Recommended Energy: Solar

Confidence Scores:
Biogas: 7.00%
Solar: 82.00%
Wind: 11.00%

Sustainability Scores:
solar: 82.71
wind: 100.00
biogas: 44.97


In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.utils import resample

class EnhancedCleanEnergyRecommender:
    def __init__(self, csv_path):
        self.df = pd.read_csv(csv_path)
        self.scaler = StandardScaler()
        self.model = None
        self.features = None
        self._preprocess_data()
        self.train_model()

    def _preprocess_data(self):
        """Clean and preprocess the dataset"""
        # Convert string numbers to float
        for col in self.df.columns:
            if self.df[col].dtype == 'object':
                try:
                    self.df[col] = self.df[col].replace({',': ''}, regex=True)
                    self.df[col] = pd.to_numeric(self.df[col])
                except:
                    pass

        # Create more distinctive features for each energy type
        self.df['solar_viability'] = self._calculate_solar_viability()
        self.df['wind_viability'] = self._calculate_wind_viability()
        self.df['biogas_viability'] = self._calculate_biogas_viability()

        # Create economic features
        self.df['solar_cost_effectiveness'] = self._calculate_solar_cost_effectiveness()
        self.df['wind_cost_effectiveness'] = self._calculate_wind_cost_effectiveness()
        self.df['biogas_cost_effectiveness'] = self._calculate_biogas_cost_effectiveness()

        # Define features for the model
        self.features = [
            'solar_irradiation_kwh_m2_year',
            'wind_speed_ms',
            'temperature_avg_c',
            'rainfall_mm',
            'solar_viability',
            'wind_viability',
            'biogas_viability',
            'solar_cost_effectiveness',
            'wind_cost_effectiveness',
            'biogas_cost_effectiveness'
        ]

    def _calculate_solar_viability(self):
        """Calculate enhanced solar viability score"""
        return (
            self.normalize(self.df['solar_irradiation_kwh_m2_year'], 1500, 2100) * 0.5 +
            self.normalize(self.df['temperature_avg_c'], 15, 35) * 0.3 +
            (1 - self.normalize(self.df['rainfall_mm'], 0, 400)) * 0.2
        ) * 100

    def _calculate_wind_viability(self):
        """Calculate enhanced wind viability score"""
        # Exponential scaling for wind speed to better capture power generation potential
        wind_power = np.power(self.df['wind_speed_ms'], 3)
        wind_score = self.normalize(wind_power, 27, 512) * 0.7

        grid_scores = self.df['grid_stability'].map({'high': 1.0, 'medium': 0.7, 'low': 0.4})
        stability_score = grid_scores * 0.3

        return (wind_score + stability_score) * 100

    def _calculate_biogas_viability(self):
        """Calculate enhanced biogas viability score"""
        temp_score = self.normalize(self.df['temperature_avg_c'], 15, 35) * 0.4
        rainfall_score = self.normalize(self.df['rainfall_mm'], 50, 600) * 0.4

        grid_scores = self.df['grid_stability'].map({'high': 0.4, 'medium': 0.7, 'low': 1.0})
        stability_score = grid_scores * 0.2

        return (temp_score + rainfall_score + stability_score) * 100

    def _calculate_solar_cost_effectiveness(self):
        """Calculate solar cost effectiveness"""
        return 100 * (1 - self.normalize(
            self.df['solar_cost_per_watt_inr'] * 1000 + self.df['solar_installation_cost_inr_kw'],
            50000, 150000
        ))

    def _calculate_wind_cost_effectiveness(self):
        """Calculate wind cost effectiveness"""
        return 100 * (1 - self.normalize(
            self.df['wind_turbine_cost_inr_kw'] + self.df['wind_turbine_installation_cost_inr_kw'],
            100000, 200000
        ))

    def _calculate_biogas_cost_effectiveness(self):
        """Calculate biogas cost effectiveness"""
        return 100 * (1 - self.normalize(
            self.df['biogas_plant_cost_inr'] + self.df['biogas_installation_cost_inr'],
            30000, 100000
        ))

    def normalize(self, values, min_val, max_val):
        """Normalize values between 0 and 1"""
        return np.clip((values - min_val) / (max_val - min_val), 0, 1)

    def determine_optimal_energy(self, row):
        """Determine the optimal energy source based on viability scores"""
        scores = {
            'Solar': row['solar_viability'] * row['solar_cost_effectiveness'],
            'Wind': row['wind_viability'] * row['wind_cost_effectiveness'],
            'Biogas': row['biogas_viability'] * row['biogas_cost_effectiveness']
        }
        return max(scores.items(), key=lambda x: x[1])[0]

    def train_model(self):
        """Train the Random Forest model with balanced dataset"""
        # Create balanced target variable
        self.df['recommended_energy'] = self.df.apply(self.determine_optimal_energy, axis=1)

        # Prepare features and target
        X = self.df[self.features]
        y = self.df['recommended_energy']

        # Scale features
        X_scaled = self.scaler.fit_transform(X)

        # Split data
        X_train, X_test, y_train, y_test = train_test_split(
            X_scaled, y, test_size=0.2, random_state=42, stratify=y
        )

        # Train model with balanced class weights
        self.model = RandomForestClassifier(
            n_estimators=100,
            class_weight='balanced',
            random_state=42
        )
        self.model.fit(X_train, y_train)

        # Print model performance
        y_pred = self.model.predict(X_test)
        print("\nModel Performance:")
        print(classification_report(y_test, y_pred))

    def get_recommendation(self, user_input):
        """Generate recommendation based on user input"""
        # Get state data
        state_data = self.df[self.df['State'] == user_input['State']].iloc[0]

        # Calculate viability scores for input
        solar_viability = (
            self.normalize(state_data['solar_irradiation_kwh_m2_year'], 1500, 2100) * 0.5 +
            self.normalize(state_data['temperature_avg_c'], 15, 35) * 0.3 +
            (1 - self.normalize(state_data['rainfall_mm'], 0, 400)) * 0.2
        ) * 100

        wind_power = np.power(state_data['wind_speed_ms'], 3)
        wind_viability = (
            self.normalize(wind_power, 27, 512) * 0.7 +
            {'high': 1.0, 'medium': 0.7, 'low': 0.4}[state_data['grid_stability']] * 0.3
        ) * 100

        biogas_viability = (
            self.normalize(state_data['temperature_avg_c'], 15, 35) * 0.4 +
            self.normalize(state_data['rainfall_mm'], 50, 600) * 0.4 +
            {'high': 0.4, 'medium': 0.7, 'low': 1.0}[state_data['grid_stability']] * 0.2
        ) * 100

        # Prepare input features
        input_features = pd.DataFrame({
            'solar_irradiation_kwh_m2_year': [state_data['solar_irradiation_kwh_m2_year']],
            'wind_speed_ms': [state_data['wind_speed_ms']],
            'temperature_avg_c': [state_data['temperature_avg_c']],
            'rainfall_mm': [state_data['rainfall_mm']],
            'solar_viability': [solar_viability],
            'wind_viability': [wind_viability],
            'biogas_viability': [biogas_viability],
            'solar_cost_effectiveness': [self._calculate_solar_cost_effectiveness().iloc[
                self.df[self.df['State'] == user_input['State']].index[0]
            ]],
            'wind_cost_effectiveness': [self._calculate_wind_cost_effectiveness().iloc[
                self.df[self.df['State'] == user_input['State']].index[0]
            ]],
            'biogas_cost_effectiveness': [self._calculate_biogas_cost_effectiveness().iloc[
                self.df[self.df['State'] == user_input['State']].index[0]
            ]]
        })

        # Scale input features
        input_scaled = self.scaler.transform(input_features)

        # Get prediction and probabilities
        prediction = self.model.predict(input_scaled)[0]
        probabilities = self.model.predict_proba(input_scaled)[0]

        # Get confidence scores
        energy_types = self.model.classes_
        confidence_scores = {
            energy_type: prob * 100
            for energy_type, prob in zip(energy_types, probabilities)
        }

        return {
            'recommended_energy': prediction,
            'confidence_scores': confidence_scores,
            'viability_scores': {
                'solar': solar_viability,
                'wind': wind_viability,
                'biogas': biogas_viability
            }
        }

def test_recommender(csv_file_path):
    # Initialize the recommender
    recommender = EnhancedCleanEnergyRecommender(csv_file_path)

    # Test cases
    test_cases = [
        {
            'State': 'Rajasthan',  # High solar potential
            'monthly_usage': 5000,
            'budget': 10000000,
            'electricity_rate': 8.0,
        },
        {
            'State': 'Gujarat',    # High wind potential
            'monthly_usage': 5000,
            'budget': 10000000,
            'electricity_rate': 8.0
        },
        {
            'State': 'Kerala',     # High rainfall, good for biogas
            'monthly_usage': 5000,
            'budget': 10000000,
            'electricity_rate': 8.0
        }
    ]

    for test_input in test_cases:
        print(f"\nTest Case for {test_input['State']}:")
        result = recommender.get_recommendation(test_input)
        print(f"Recommended Energy: {result['recommended_energy']}")
        print("\nConfidence Scores:")
        for energy_type, score in result['confidence_scores'].items():
            print(f"{energy_type}: {score:.2f}%")
        print("\nViability Scores:")
        for energy_type, score in result['viability_scores'].items():
            print(f"{energy_type}: {score:.2f}")

if __name__ == "__main__":
    test_recommender("Updated_Book3.csv")


Model Performance:
              precision    recall  f1-score   support

      Biogas       1.00      1.00      1.00         6

    accuracy                           1.00         6
   macro avg       1.00      1.00      1.00         6
weighted avg       1.00      1.00      1.00         6


Test Case for Rajasthan:
Recommended Energy: Biogas

Confidence Scores:
Biogas: 89.00%
Wind: 11.00%

Viability Scores:
solar: 91.52
wind: 21.00
biogas: 50.15

Test Case for Gujarat:
Recommended Energy: Wind

Confidence Scores:
Biogas: 40.00%
Wind: 60.00%

Viability Scores:
solar: 82.63
wind: 100.00
biogas: 44.97

Test Case for Kerala:
Recommended Energy: Biogas

Confidence Scores:
Biogas: 94.00%
Wind: 6.00%

Viability Scores:
solar: 44.50
wind: 48.28
biogas: 71.76
