In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.arima.model import ARIMA
import os

In [2]:
def forecast_models(country):
    # Load the dataset
    file_path = f'country_dataset/{country}.csv'
    df = pd.read_csv(file_path)
    
    # Extract relevant columns
    X = df[['Year']]
    y = df['Total renewable water resources per capita (m3/inhab/year)']

    # Initialize models
    models = {
        'Linear Regression': LinearRegression(),
        'Decision Tree': DecisionTreeRegressor(random_state=123),
        'Random Forest': RandomForestRegressor(random_state=123),
        'XGBoost': xgb.XGBRegressor(random_state=123)
    }

    # Prepare future years DataFrame
    future_years = pd.DataFrame({
        'Year': [2022, 2023, 2024]
    })

    plt.figure(figsize=(14, 8))

    # Plot historical data
    plt.plot(X, y, label='Historical Data', marker='o')

    for name, model in models.items():
        model.fit(X, y)
        future_predictions = model.predict(future_years)
        
        # Plot model predictions
        plt.plot(future_years['Year'], future_predictions, label=f'{name} Predictions', marker='o', linestyle='--')

        # Optionally, evaluate the model
        y_train_pred = model.predict(X)
        mse = mean_squared_error(y, y_train_pred)
        print(f'{name} Mean Squared Error on training data: {mse:.2f}')

    # Time Series Forecast Model: ARIMA
    arima_model = ARIMA(y, order=(1, 1, 1))  # Adjust order as needed
    arima_fit = arima_model.fit()
    arima_forecast = arima_fit.predict(start=len(y), end=len(y) + len(future_years) - 1, typ='levels')

    # Plot ARIMA predictions
    plt.plot(future_years['Year'], arima_forecast, label='ARIMA Predictions', marker='o', linestyle='--', color='magenta')

    # Customize the plot
    plt.xlabel('Year')
    plt.ylabel('Renewable Water Resources (m3/inhab/year)')
    plt.title('Comparison of Forecasting Models for Renewable Water Resources')
    plt.legend()
    plt.grid(True)
    plt.show()


    for name, model in models.items():
        future_predictions = model.predict(future_years)
        
        print(f'{name} Predictions for 2022, 2023, 2024:')
        for year, pred in zip(future_years['Year'], future_predictions):
            print(f'  {year}: {pred:.2f} m3/inhab/year')
    
    # Print ARIMA predictions
    print('ARIMA Predictions for 2022, 2023, 2024:')
    for year, pred in zip(future_years['Year'], arima_forecast):
        print(f'  {year}: {pred:.2f} m3/inhab/year')



In [3]:
def model(country):
    # Load the dataset
    file_path = f'dataset/{country}.csv'
    if not os.path.isfile(file_path):
        return 'Country not found'
    df = pd.read_csv(file_path)
    
    # Extract relevant columns
    X = df[['Year']]
    y = df['Total renewable water resources per capita (m3/inhab/year)']

    # Prepare future years DataFrame
    future_years = pd.DataFrame({
        'Year': [2022, 2023, 2024, 2025]
    })

    # Time Series Forecast Model: ARIMA
    arima_model = ARIMA(y, order=(1, 1, 1))  # Adjust order as needed
    arima_fit = arima_model.fit()
    arima_forecast = arima_fit.predict(start=len(y), end=len(y) + len(future_years) - 1, typ='levels')
    
    prediction_dict = {
        'Country': [country] * len(future_years),
        'Year': [year for year in future_years['Year']],
        'Total renewable water resources per capita (m3/inhab/year)': [pred for pred in arima_forecast]
    }

    # Write the predictions to a CSV file
    CSV_FILE_PATH = './COUNTRY.csv'
    prediction_df = pd.DataFrame(prediction_dict)

    # Append to the CSV file if it exists, otherwise create it
    if os.path.isfile(CSV_FILE_PATH):
        # Append mode with header only if file is empty
        prediction_df.to_csv(CSV_FILE_PATH, mode='a', header=False, index=False)
    else:
        # Create file and write header
        prediction_df.to_csv(CSV_FILE_PATH, mode='w', header=True, index=False)

    print('Successfully wrote predictions to CSV file')
    

In [4]:
# Read the csv file
df = pd.read_csv('../dataset/new_dataset.csv')
# Get the unique countries
countries = df['Country'].unique()
for country in countries:
    print(f'Forecasting models for {country}')
    model(country)

Forecasting models for Afghanistan
Forecasting models for Albania
Forecasting models for Algeria
Forecasting models for Angola
Forecasting models for Antigua and Barbuda
Forecasting models for Argentina
Forecasting models for Armenia
Forecasting models for Australia
Forecasting models for Austria
Forecasting models for Azerbaijan
Forecasting models for Bahrain
Forecasting models for Bangladesh
Forecasting models for Barbados
Forecasting models for Belarus
Forecasting models for Belgium
Forecasting models for Belize
Forecasting models for Benin
Forecasting models for Bhutan
Forecasting models for Bosnia and Herzegovina
Forecasting models for Botswana
Forecasting models for Brazil
Forecasting models for Brunei Darussalam
Forecasting models for Bulgaria
Forecasting models for Burkina Faso
Forecasting models for Burundi
Forecasting models for Cabo Verde
Forecasting models for Cambodia
Forecasting models for Cameroon
Forecasting models for Canada
Forecasting models for Central African Repub