In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA

# Load energy dataset
csv_file_path = 'D:/Jupyter Notebook/final_dataset_1.csv'
energy_data = pd.read_csv(csv_file_path)

# Set 'Year' column as index
energy_data.set_index('Year', inplace=True)

# Get unique countries in the dataset
countries = energy_data['country'].unique()

# Function to predict energy generation and consumption for a given country and energy type
def predict_energy_and_consumption(country_data, energy_type):
    # Split data into training and testing sets
    train_data = country_data.loc[:2020]  # Use data up to 2020 for training
    
    # Define forecast years
    forecast_years = np.arange(2021, 2028)  # Using np.arange for consistent range
    
    try:
        # Build and train ARIMA model for energy generation
        if energy_type == "solar":
            energy_column = 'Solar Generation (TWh)'
        elif energy_type == "wind":
            energy_column = 'Solar Generation (TWh)'
        elif energy_type == "hydro":
            energy_column = 'Hydro Generation (TWh)'
        else:
            raise ValueError("Invalid energy type. Choose from 'solar', 'wind', or 'hydro'.")
            
        energy_model = ARIMA(train_data[energy_column], order=(7, 1, 0))  # Example order, you need to tune this
        energy_fit_model = energy_model.fit()
        energy_predictions = energy_fit_model.forecast(steps=len(forecast_years))

        # Build and train ARIMA model for consumption (assuming similar column names for consumption)
        consumption_column = energy_column.replace("generation", "consumption")
        consumption_model = ARIMA(train_data[consumption_column], order=(7, 1, 0))  # Example order, you need to tune this
        consumption_fit_model = consumption_model.fit()
        consumption_predictions = consumption_fit_model.forecast(steps=len(forecast_years))

        # Create forecast index (years)
        energy_predictions.index = pd.Index(forecast_years)  # Set index explicitly
        consumption_predictions.index = pd.Index(forecast_years)  # Set index explicitly
    
    except Exception as e:
        print(f"Error encountered for country data: {e}")
        energy_predictions = pd.Series(index=forecast_years).fillna(0)  # Fill NaN with 0 on error
        consumption_predictions = pd.Series(index=forecast_years).fillna(0)  # Fill NaN with 0 on error
    
    return energy_predictions, consumption_predictions

# Predict energy generation and consumption for each country and energy type
predicted_data = []

for country in countries:
    country_data = energy_data[energy_data['country'] == country]
    
    for energy_type in ["solar", "wind", "hydro"]:
        energy_predictions, consumption_predictions = predict_energy_and_consumption(country_data, energy_type)
        
        for year in range(2021, 2028):
            predicted_data.append({
                'country': country,
                'Year': year,
                f'predicted_{energy_type}_generation': energy_predictions.loc[year],
                f'predicted_{energy_type}_consumption': consumption_predictions.loc[year]
            })

# Create DataFrame from predicted data
predicted_df = pd.DataFrame(predicted_data)
print(predicted_df)

# Save predictions to a CSV file
predicted_df.to_csv('new_predicted_energy_and_consumption.csv', index=False)
