In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge

class BiomassForecast:
    def __init__(self, biomass_file, distance_file):
        self.biomass_file = biomass_file
        self.distance_file = distance_file
        self.start_year = 2010
        self.locality_size = 8
        self.biomass_data = None
        self.distance_matrix = None
        self.locality_data = None
        self.scalers = {}

    def load_data(self):
        self.biomass_data = pd.read_csv(self.biomass_file)
        self.biomass_data.rename(columns={"Index": "Site_no"}, inplace=True)
        self.biomass_data['2018'] = 0
        self.biomass_data['2019'] = 0
        self.biomass_data = pd.melt(self.biomass_data, id_vars=['Site_no', 'Latitude', 'Longitude'], 
                                    var_name='Year', value_name='Value').astype({'Year': 'Int16'})
        self.distance_matrix = pd.read_csv(self.distance_file, index_col=0)

    def locality_index(self, distance):
        indices = np.argsort(distance)
        return indices[:self.locality_size+1].values

    def compute_locality_values(self):
        locality_values = np.zeros((self.biomass_data.shape[0], self.locality_size+1))
        for index, row in self.biomass_data.iterrows():
            site = int(row.Site_no)
            year = int(row.Year)
            if year - self.start_year >= 2:
                loc_index = self.locality_index(self.distance_matrix.iloc[site])
                locality_values[index] = self.biomass_data[
                    (self.biomass_data.Site_no.isin(loc_index)) & 
                    (self.biomass_data.Year == (year - 2))
                ].Value.values
        columns = [f"p{i}" for i in range(self.locality_size+1)]
        locality_df = pd.DataFrame(locality_values, columns=columns)
        self.locality_data = pd.concat([self.biomass_data, locality_df], axis=1)

    def preprocess_data(self):
        self.locality_data_copy = self.locality_data.copy()
        self.locality_data = pd.get_dummies(self.locality_data, columns=['Site_no'])
        self.locality_data['Year'] = self.locality_data['Year'] - self.start_year

        self.scalers['Latitude'] = StandardScaler()
        self.scalers['Longitude'] = StandardScaler()
        self.locality_data['Latitude'] = self.scalers['Latitude'].fit_transform(
            self.locality_data['Latitude'].values.reshape(-1, 1))
        self.locality_data['Longitude'] = self.scalers['Longitude'].fit_transform(
            self.locality_data['Longitude'].values.reshape(-1, 1))

    def split_data(self):
        train = self.locality_data[(self.locality_data.Year != 8) & (self.locality_data.Year != 9)]
        test = self.locality_data[(self.locality_data.Year == 8) | (self.locality_data.Year == 9)]
        y_train = train['Value']
        X_train = train.drop('Value', axis=1)
        X_test = test.drop('Value', axis=1)
        return X_train, y_train, X_test

    def train_model(self, X_train, y_train):
        model = Ridge()
        model.fit(X_train, y_train)
        return model

    def predict(self, model, X_test):
        return model.predict(X_test)

    def save_forecast(self, predictions, output_file):
        biomass_forecast = self.biomass_data[['Latitude', 'Longitude']].drop_duplicates().reset_index(drop=True)
        biomass_forecast[['2018', '2019']] = np.reshape(predictions, (2, 2418)).T
        path=r'./Biomass Results/'
        biomass_forecast.to_csv(path,output_file)

    def run(self, output_file):
        self.load_data()
        self.compute_locality_values()
        self.preprocess_data()
        X_train, y_train, X_test = self.split_data()
        model = self.train_model(X_train, y_train)
        predictions = self.predict(model, X_test)
        self.save_forecast(predictions, output_file)
        print(f"Forecast saved to {output_file}")

def main():
    biomass_file = r".\Dataset\Biomass_History.csv"
    distance_file = r".\Dataset\Distance_Matrix.csv"
    output_file = 'Biomass_Forecast.csv'
    
    forecast = BiomassForecast(biomass_file, distance_file)
    forecast.run(output_file)

if __name__ == "__main__":
    main()


Forecast saved to Biomass_Forecast.csv
