In [1]:
import os, sys, time
from datetime import datetime
import pandas as pd
import numpy as np
import geojson, json
import xarray as xr
import rioxarray

from sklearn.cross_decomposition import CCA
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
#from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score, RepeatedKFold, LeaveOneOut, LeavePOut, KFold, cross_val_predict
from sklearn.metrics import r2_score, mean_squared_error, roc_auc_score, mean_absolute_percentage_error, mean_squared_error, explained_variance_score
from sklearn.base import BaseEstimator, RegressorMixin
from rasterstats import zonal_stats
from matplotlib import pyplot as plt
import matplotlib.colors as colors
import cartopy.crs as ccrs
import importlib
from PyQt5 import QtWidgets, uic, QtCore
from PyQt5.QtWidgets import QFileDialog
import traceback


import gl
from dev_functions.dev_functions import *



def readGUI():
    #defined parameters/variables
    gl.config['downloadDir']=gl.window.lineEditDirectory.text()
    gl.config['fcstTargetYear']=int(gl.window.lineEdit_tgtyear.text())
    gl.config['predictorYear']=int(gl.window.lineEdit_srcyear.text())
    gl.config['predictorMonth']=gl.window.comboBox_srcmon.currentText()
    gl.config['fcstTargetSeas']=gl.window.comboBox_tgtseas.currentText()
    gl.config['climStartYr']=int(gl.window.lineEdit_climstartyr.text())
    gl.config['climEndYr']=int(gl.window.lineEdit_climendyr.text())
    gl.config['timeAggregation']=gl.window.comboBox_timeaggregation.currentText()

    gl.config["predictandFileName"]=gl.window.lineEdit_predictandfile.text()
    gl.config["predictandVar"]=gl.window.comboBox_predictandvar.currentText()

    gl.config["zonesFile"]=gl.window.lineEdit_zonesfile.text()
    gl.config["zonesAttribute"]=gl.window.comboBox_zonesattribute.currentText()
    gl.config["zonesAggregate"]=gl.window.checkBox_zonesaggregate.isChecked()

    gl.config["overlayFile"]=gl.window.lineEdit_overlayfile.text()

    
    gl.config["predictorExtents"]=[]
    for model in range(5):
        temp={}
        for var in ["minLon","maxLon","minLat","maxLat"]:
            itemName="lineEdit_{}{}".format(var.lower(), model)
            if hasattr(gl.window, itemName):
                item=getattr(gl.window, itemName, None)
                temp[itemName]=item
        if len(temp)==4:
            gl.config["predictorExtents"].append(temp)
    
    gl.config["predictorFiles"]=[]
    for model in range(5):
        temp=[]
        itemName="lineEdit_predictorfile{}".format(model)
        if hasattr(gl.window, itemName):
            temp.append(getattr(gl.window, itemName, None))
        itemName="lineEdit_predictorvar{}".format(model)
        if hasattr(gl.window, itemName):
            temp.append(getattr(gl.window, itemName, None))
        if len(temp)==2:
            gl.config["predictorFiles"].append(temp)

    gl.config["crossval"]=[]
    for model in range(5):
        itemName="lineEdit_crossval{}".format(model)
        if hasattr(gl.window, itemName):
            gl.config["crossval"].append(getattr(gl.window, itemName, None))
                
    gl.config["preproc"]=[]
    for model in range(5):
        itemName="lineEdit_preproc{}".format(model)
        if hasattr(gl.window, itemName):
            gl.config["preproc"].append(getattr(gl.window, itemName, None))
                
    gl.config["regression"]=[]
    for model in range(5):
        itemName="lineEdit_regression{}".format(model)
        if hasattr(gl.window, itemName):
            gl.config["regression"].append(getattr(gl.window, itemName, None))



def computeModel(model=0):
    
    readGUI()    
    
    
    
    showMessage(model, "INFO")
    leadTime=getLeadTime()
    if leadTime is None:
        raise StopEarly
    
    #reading predictors data
    predictors=readPredictors()
    if predictors is None:
        raise StopEarly
        
    #reading predictand data - this will calculate seasonal from monthly if needed.
    predictand, geoData=readPredictand()
    if predictand is None:
        raise StopEarly
    
    if gl.config["zonesMap"]!="":
        zonesVector=gpd.read_file(gl.config["zonesMap"])
    else:
        zonesVector=None
        
    if gl.config["targetType"]=="zones":
        showMessage("Aggregating data to zones read from {} ...".format(gl.config["zonesMap"]))
        cont=True
        predictand,geoData=aggregatePredictand(predictand, geoData, zonesVector)
        
    #defining target date for forecast. If seasonal - then this is the first month of the season.
    fcstTgtDate=pd.to_datetime("01 {} {}".format(gl.config['fcstTargetMonth'], gl.config['fcstTargetYear']))
    
    gl.config["fcstTgtCode"]=seasons[fcstTgtDate.month-1]
    #will have to implement iteration through predictors?? for the time being - just a single predictor
    
    #finding overlap of predictand and predictor
    showMessage("Aligning predictor and predictand data...")
    predictandHcst,predictorHcst=getHcstData(predictand,predictors[0])
    predictorFcst=getFcstData(predictors[0])
    if predictandHcst is None:
        raise StopEarly

    showMessage("Setting up directories to write to...")        
    if gl.config['fcstBaseTime']=="seas":
        forecastID="{}-{}".format(gl.predictorDate.strftime("%Y%m"), seasons[fcstTgtDate.month-1])
    else:
        forecastID="{}-{}".format(gl.predictorDate.strftime("%Y%m"), fcstTgtDate.strftime("%b"))
    forecastDir="{}/{}/{}".format(gl.config['rootDir'], forecastID, gl.config['targetType'])

    mapsDir="{}/maps/".format(forecastDir)
    timeseriesDir="{}/timeseries/".format(forecastDir)
    outputDir="{}/output/".format(forecastDir)
    diagsDir="{}/diagnostics/".format(forecastDir)

    for adir in [mapsDir,outputDir, diagsDir,timeseriesDir]:
        if not os.path.exists(adir):
            print("\toutput directory {} does not exist. creating...".format(adir))
            os.makedirs(adir)
            print("\tdone")
    showMessage("done")        
        
        
    #calculaing observed terciles
    #is there a need to do a strict control of overlap???
    result=getObsTerciles(predictand, predictandHcst)
    if result is None:
        raise StopEarly
    obsTercile,tercThresh=result


    #setting up cross-validation
    cv=cvs[gl.config['crossVal']]
    
    
    #arguments for regressor
    kwargs=regressor_configs[gl.config['model']]

    if gl.config['method']=="PCR":
        #regession model
        regressor = PCRegressor(regressor_name=gl.config['model'], **kwargs)
    if gl.config['method']=="CCA":
        regressor = CCAregressor(regressor_name=gl.config['model'], **kwargs)
        
  
    #cross-validated hindcast
    showMessage("Calculating cross-validated hindcast...")
    cvHcst = cross_val_predict(regressor,predictorHcst,  predictandHcst, cv=cv)
    cvHcst=pd.DataFrame(cvHcst, index=predictandHcst.index, columns=predictandHcst.columns)

    
    #actual prediction
    showMessage("Calculating deteriministic forecast...")
    regressor.fit(predictorHcst,  predictandHcst)
    detFcst=regressor.predict(predictorFcst)
    detFcst=pd.DataFrame(detFcst, index=[fcstTgtDate], columns=predictandHcst.columns)
    
    #calculate forecast anomalies
    refData=predictand[str(gl.config["climStartYr"]):str(gl.config["climEndYr"])]   
    detFcst=getFcstAnomalies(detFcst,refData)
    
    #deriving probabilistic prediction
    showMessage("Calculating probabilistic hindcast and forecast using error variance...")
    result=probabilisticForecast(cvHcst, predictandHcst,detFcst["forecast"],tercThresh)
    if result is None:
        raise StopEarly
    probFcst,probHcst=result
    showMessage("Hindcast and forecast calculated.")
    
    
    #calculating skill
    showMessage("Calculating skill scores...")
    scores=getSkill(probHcst,cvHcst,predictandHcst,obsTercile)    
    if scores is None:
        raise StopEarly
    
    #saving data
    
    
    showMessage("Plotting forecast maps...")    
    #plotting forecast
    if gl.config["targetType"]=="grid":
        detfcst=detFcst.stack(level=["lat","lon"],future_stack=True).droplevel(0).T
        probfcst=probFcst.stack(level=["lat","lon"],future_stack=True).droplevel(0).T
    else:
        detfcst=detFcst.stack(future_stack=True).droplevel(0).T
        probfcst=probFcst.stack(future_stack=True).droplevel(0).T
        
    plotMaps(detfcst, geoData, mapsDir, forecastID, zonesVector)
    plotMaps(probfcst, geoData, mapsDir, forecastID, zonesVector)
    
    showMessage("Plotting skill maps...")    
    #plotting skill scores
    plotMaps(scores, geoData, mapsDir, forecastID, zonesVector)

    showMessage("Plotting time series...") 
    plotTimeSeries(cvHcst,predictandHcst, detFcst, tercThresh, timeseriesDir, forecastID)
    
    showMessage("All done!")    
    
    
    return



class StopEarly(Exception):
    pass

    
class Worker(QtCore.QThread):
    log = QtCore.pyqtSignal(str)
    finished = QtCore.pyqtSignal(str)

    def __init__(self, task_name, task_function, *args, **kwargs):
        super().__init__()
        self.task_name=task_name
        self.task_function = task_function
        self.args = args
        self.kwargs = kwargs

    def run(self):
        """Run the provided function in a thread and emit logs."""
        try:
            self.log.emit(f"Task '{self.task_name}' started...")
            # Run the task
            self.task_function(*self.args, **self.kwargs)
            self.log.emit(f"Task '{self.task_name}' finished successfully.")
        except Exception as e:
            tb = traceback.format_exc()
            self.log.emit(f"Error occurred in {self.task_name}:\n{tb}")            
        finally:
            self.finished.emit(self.task_name)
            

class MainWindow(QtWidgets.QMainWindow):
    log_signal = QtCore.pyqtSignal(str)
    
    def __init__(self):
        super().__init__()
        uic.loadUi("forecast.ui", self)
        
        #initialize garbage collector
        self.workers = []
        
        self.log_signal.connect(self.append_log)

        # Collect buttons
        self.buttons = [self.button_run0]

        # Connect signals
        self.button_run0.clicked.connect(lambda _, idx=0: self.start_task(f"Model {idx}", computeModel, idx))
        
        self.clearLogButton.clicked.connect(self.logWindow.clear)
        
        #directory browser
        self.browseButton.clicked.connect(self.browse_directory)
        

    # ---------- Thread Handling ----------
    def start_task(self, name, func, *args):
        worker = Worker(name, func, *args)
        worker.log.connect(self.log_signal.emit)
        # finished cleans up workers stack
        worker.finished.connect(self.cleanup_worker)
        self.workers.append(worker)  # keep reference
        worker.start()
        
    def append_log(self, message: str):
        self.logWindow.appendHtml(f"<pre>{message}</pre>")
        self.logWindow.ensureCursorVisible()
            
    def cleanup_worker(self, task_name):
        self.workers = [w for w in self.workers if w.isRunning()]
        self.logWindow.appendHtml(f"<i>Task '{task_name}' cleaned up.</i>")

        
    def set_buttons_enabled(self, enabled: bool):
        for btn in self.buttons:
            btn.setEnabled(enabled)

    def browse_directory(self):
        dir_path = QFileDialog.getExistingDirectory(self, "Select Directory", "")
        if dir_path:
            self.lineEditDirectory.setText(dir_path)
            
    
if __name__ == "__main__":
    
    #shows the main window
    app = QtWidgets.QApplication(sys.argv)
    gl.window = MainWindow()
    gl.window.show()

    
    

    
    

tgtSeass=["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec","Jan-Mar","Feb-Apr","Mar-May","Apr-Jun","May-Jul","Jun-Aug","Jul-Sep","Aug-Oct","Sep-Nov","Oct-Dec","Nov-Jan","Dec-Feb"]

srcMons=["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]

timeAggregations={"sum","mean"}

crossvalidators={
    "KF":["K-Fold",{"n_splits":5}],
    "LOO":["Leave One Out",{}],
}

#can be read from json - potentially editable by user
regressors = {
    "OLS":["Linear regression", {}],
    "Lasso":["Lasso regression", {'alpha': 0.01}],
    "Ridge":["Ridge regression", {'alpha': 1.0}],
    "RF":["Random Forest", {'n_estimators': 100, 'max_depth': 5}],
    "MLP":["Multi Layer Perceptron", {'hidden_layer_sizes': (50, 25), 'max_iter': 1000, 'random_state': 0}],
    "Trees":["Decision Trees", {'max_depth': 2}]
}

preprocessors={
    "PCR":["Principal Component Regression (PCR)", {}],
    "CCA":["Canonical Corelation Analysis (CCA)", {}]
}



makeConfig()
populateGui()


sys.exit(app.exec_())

sst


SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [1]:
#this is the main processing stream
try:

    
except (TypeError, ValueError) as e:
    print(f"An error occurred: {e}")
    # Alternatively, to get the line number specifically:
    exc_type, exc_value, exc_traceback = sys.exc_info()
    if exc_traceback:
        line_number = exc_traceback.tb_lineno
        print(f"Error occurred on line: {line_number}")

except StopEarly:
    print("Execution stopped")    

NameError: name 'StopEarly' is not defined