In [10]:
from scripts.imports import *

df = pd.read_pickle('five_sites_data_snow_cc.pkl')
df = df[df['Confidence'] == 1]

# This first one is rudimentary because I think I can build it quickly. I want to make a second attempt further down using the segments in each 500m cell individually to do fractional snow cover, i.e. an algorithm that binary detects snow/non-snow per segment, and if 4/5 segments are snow-detected then we get 80% FSC.

#### Classification Accuracy and RMSE for MxD10A1F

In [13]:
from scripts.imports import *

df = pd.read_pickle('five_sites_data_snow_cc.pkl')
df = df[df['Confidence'] == 1]

X = df[['camera','meanEgstrong', 'meanEvstrong', 'msw', 'asr', 'night','MxD10A1F','FSC','JointSnow']]
X = X[X['MxD10A1F'] <= 100].dropna().reset_index(drop=True)

##############################

rmse = np.sqrt(np.mean((X['MxD10A1F'] / 100 - X['FSC']) ** 2))

print(f"RMSE: {rmse}")

##############################

from sklearn.metrics import f1_score

# Assuming X is your DataFrame

# Initialize variables to store the best F1-score and corresponding threshold
best_f1 = 0
best_threshold = 0

# Loop through possible thresholds between the minimum and maximum values of X['MxD10A1F']
thresholds = np.linspace(X['MxD10A1F'].min(), X['MxD10A1F'].max(), 100)

for threshold in thresholds:
    # Apply threshold to classify X['MxD10A1F']
    y_pred = np.where(X['MxD10A1F'] > threshold, 1, 0)
    
    # Compute the F1-score between the predicted labels and true labels (X['FSC'])
    current_f1 = f1_score(X['FSC'], y_pred)
    
    # Check if the current F1-score is better than the best one found so far
    if current_f1 > best_f1:
        best_f1 = current_f1
        best_threshold = threshold

print()
print("Classification FSC")
print(f"Optimal Threshold: {best_threshold}")
print(f"Best F1-Score: {best_f1}")

##############################

# Initialize variables to store the best F1-score and corresponding thresholds
best_f1 = 0
best_thresholds = (0, 0)

# Define a function to classify based on two thresholds
def classify_mxd10a1f(value, threshold1, threshold2):
    if value <= threshold1:
        return 0  # Class 0
    elif threshold1 < value <= threshold2:
        return 1  # Class 1
    else:
        return 2  # Class 2

# Loop through possible thresholds
thresholds1 = np.linspace(X['MxD10A1F'].min(), X['MxD10A1F'].max(), 100)
thresholds2 = np.linspace(X['MxD10A1F'].min(), X['MxD10A1F'].max(), 100)

for threshold1 in thresholds1:
    for threshold2 in thresholds2:
        if threshold1 >= threshold2:
            continue  # Ensure that threshold1 < threshold2
        
        # Apply the thresholds to classify X['MxD10A1F'] into 0, 1, or 2
        y_pred = X['MxD10A1F'].apply(lambda x: classify_mxd10a1f(x, threshold1, threshold2))
        
        # Compute the macro-averaged F1 score between JointSnow and the predicted values
        current_f1 = f1_score(X['JointSnow'], y_pred, average='macro')
        
        # Check if the current F1-score is better than the best one found so far
        if current_f1 > best_f1:
            best_f1 = current_f1
            best_thresholds = (threshold1, threshold2)

print()
print("Classification JointSnow")
print(f"Optimal Thresholds: {best_thresholds}")
print(f"Best Macro-Averaged F1-Score: {best_f1}")

RMSE: 0.31574939008390074

Classification FSC
Optimal Threshold: 6.434343434343434
Best F1-Score: 0.9846378931967813

Classification JointSnow
Optimal Thresholds: (6.434343434343434, 55.151515151515156)
Best Macro-Averaged F1-Score: 0.6611897780614685


#### How many data points from each site?

In [14]:
from scripts.imports import *

df = pd.read_pickle('five_sites_data_snow_cc.pkl')
df = df[df['Confidence'] == 1]

X = df[['camera','meanEgstrong', 'meanEvstrong', 'msw', 'asr', 'night']]
X = X.dropna()
# y = df['FSC']

print(X[X['camera']=='sodankyla_full'].shape[0],X[X['camera']=='delta_junction'].shape[0],X[X['camera']=='marcell_MN'].shape[0],X[X['camera']=='lacclair'].shape[0],X[X['camera']=='torgnon'].shape[0])

556 446 515 241 82


#### Linear Regression

In [22]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.utils import resample
from sklearn.model_selection import LeaveOneGroupOut

# Step 1: Splitting the data based on the 'camera' column
train_sites = ['sodankyla_full', 'delta_junction', 'marcell_MN']
test_sites = ['lacclair', 'torgnon']

# Training set: data from 'sodankyla_full', 'delta_junction', and 'marcell_MN'
train_df = df[df['camera'].isin(train_sites)]

# Test set: data from 'lacclair' and 'torgnon' (We'll keep this aside for final evaluation)
test_df = df[df['camera'].isin(test_sites)]

# Extract features and labels
X_train = train_df[['meanEgstrong', 'meanEvstrong', 'msw', 'asr', 'night']]  # Add other relevant columns
y_train = train_df['FSC']

# Drop rows with NaN values in X_train and ensure y_train aligns with the filtered X_train
X_train = X_train.dropna()
y_train = y_train[X_train.index]  # Align y_train with X_train

# Dummy encode 'night' feature
X_train = pd.get_dummies(X_train, columns=['night'], drop_first=True)

# Extract the 'camera' column as the group identifier for cross-validation
groups = train_df['camera'][X_train.index]  # Ensure 'groups' aligns with the filtered X_train

# Step 2: Leave-One-Group-Out Cross-Validation
logo = LeaveOneGroupOut()

# Initialize Linear Regression model
lr_model = LinearRegression()

# Step 3: Perform bootstrapping and cross-validation
n_bootstraps = 100  # Number of bootstraps
cv_results = []
left_out_groups = []  # To store the left-out group (site) for each fold

for train_idx, test_idx in logo.split(X_train, y_train, groups):
    # Get the training and validation data for this fold
    X_fold_train, X_fold_val = X_train.iloc[train_idx], X_train.iloc[test_idx]
    y_fold_train, y_fold_val = y_train.iloc[train_idx], y_train.iloc[test_idx]
    
    # Get the left-out group (site) for this fold
    left_out_group = groups.iloc[test_idx].unique()[0]  # Only one unique site is left out
    left_out_groups.append(left_out_group)
    
    # Perform bootstrapping 100 times
    fold_rmse_scores = []
    for i in range(n_bootstraps):
        # Bootstrap sampling with replacement
        X_bootstrap, y_bootstrap = resample(X_fold_train, y_fold_train, random_state=i)
        
        # Train the Linear Regression model on the bootstrap sample
        lr_model.fit(X_bootstrap, y_bootstrap)
        
        # Validate the model on the validation fold
        y_val_pred = lr_model.predict(X_fold_val)
        
        # Calculate RMSE and append to the fold results
        rmse = np.sqrt(mean_squared_error(y_fold_val, y_val_pred))
        fold_rmse_scores.append(rmse)
    
    # Store the average RMSE score for this fold
    cv_results.append(np.mean(fold_rmse_scores))

# Step 4: Print the cross-validation results along with the left-out groups
for i, group in enumerate(left_out_groups):
    print(f"Fold {i+1}: Left-out group: {group}, RMSE: {cv_results[i]}")
print("Mean Cross-Validation RMSE: ", np.mean(cv_results))


Fold 1: Left-out group: delta_junction, RMSE: 0.3379415492106153
Fold 2: Left-out group: marcell_MN, RMSE: 0.47158627934567215
Fold 3: Left-out group: sodankyla_full, RMSE: 0.35563478728273373
Mean Cross-Validation RMSE:  0.38838753861300707


#### Logistic Regression

In [23]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from sklearn.utils import resample
from sklearn.model_selection import LeaveOneGroupOut

# Step 1: Splitting the data based on the 'camera' column
train_sites = ['sodankyla_full', 'delta_junction', 'marcell_MN']
test_sites = ['lacclair', 'torgnon']

# Training set: data from 'sodankyla_full', 'delta_junction', and 'marcell_MN'
train_df = df[df['camera'].isin(train_sites)]

# Test set: data from 'lacclair' and 'torgnon' (We'll keep this aside for final evaluation)
test_df = df[df['camera'].isin(test_sites)]

# Extract features and labels
X_train = train_df[['meanEgstrong', 'meanEvstrong', 'msw', 'asr', 'night']]  # Add other relevant columns
y_train = train_df['FSC']

# Drop rows with NaN values in X_train and ensure y_train aligns with the filtered X_train
X_train = X_train.dropna()
y_train = y_train[X_train.index]  # Align y_train with X_train

# Dummy encode 'night' feature
X_train = pd.get_dummies(X_train, columns=['night'], drop_first=True)

# Extract the 'camera' column as the group identifier for cross-validation
groups = train_df['camera'][X_train.index]  # Ensure 'groups' aligns with the filtered X_train

# Step 2: Leave-One-Group-Out Cross-Validation
logo = LeaveOneGroupOut()

# Initialize Logistic Regression model
lr_model = LogisticRegression(max_iter=10000)

# Step 3: Perform bootstrapping and cross-validation
n_bootstraps = 100  # Number of bootstraps
cv_results = []
left_out_groups = []  # To store the left-out group (site) for each fold

for train_idx, test_idx in logo.split(X_train, y_train, groups):
    # Get the training and validation data for this fold
    X_fold_train, X_fold_val = X_train.iloc[train_idx], X_train.iloc[test_idx]
    y_fold_train, y_fold_val = y_train.iloc[train_idx], y_train.iloc[test_idx]
    
    # Get the left-out group (site) for this fold
    left_out_group = groups.iloc[test_idx].unique()[0]  # Only one unique site is left out
    left_out_groups.append(left_out_group)
    
    # Perform bootstrapping 100 times
    fold_f1_scores = []
    for i in range(n_bootstraps):
        # Bootstrap sampling with replacement
        X_bootstrap, y_bootstrap = resample(X_fold_train, y_fold_train, random_state=i)
        
        # Train the Logistic Regression model on the bootstrap sample
        lr_model.fit(X_bootstrap, y_bootstrap)
        
        # Validate the model on the validation fold
        y_val_pred = lr_model.predict(X_fold_val)
        
        # Calculate F1 score and append to the fold results
        f1 = f1_score(y_fold_val, y_val_pred)
        fold_f1_scores.append(f1)
    
    # Store the average F1 score for this fold
    cv_results.append(np.mean(fold_f1_scores))

# Step 4: Print the cross-validation results along with the left-out groups
for i, group in enumerate(left_out_groups):
    print(f"Fold {i+1}: Left-out group: {group}, F1 Score: {cv_results[i]}")
print("Mean Cross-Validation F1 Score: ", np.mean(cv_results))


Fold 1: Left-out group: delta_junction, F1 Score: 0.8875110846775404
Fold 2: Left-out group: marcell_MN, F1 Score: 0.768263651552506
Fold 3: Left-out group: sodankyla_full, F1 Score: 0.8677306672951767
Mean Cross-Validation F1 Score:  0.841168467841741


#### Decision Tree Classifier

In [19]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score
from sklearn.utils import resample
from sklearn.model_selection import LeaveOneGroupOut

# Step 1: Splitting the data based on the 'camera' column
train_sites = ['sodankyla_full', 'delta_junction', 'marcell_MN']
test_sites = ['lacclair', 'torgnon']

# Training set: data from 'sodankyla_full', 'delta_junction', and 'marcell_MN'
train_df = df[df['camera'].isin(train_sites)]

# Test set: data from 'lacclair' and 'torgnon' (We'll keep this aside for final evaluation)
test_df = df[df['camera'].isin(test_sites)]

# Extract features and labels
X_train = train_df[['meanEgstrong', 'meanEvstrong', 'msw', 'asr', 'night']]  # Add other relevant columns
y_train = train_df['FSC']

# Drop rows with NaN values in X_train and ensure y_train aligns with the filtered X_train
X_train = X_train.dropna()
y_train = y_train[X_train.index]  # Align y_train with X_train

# Dummy encode 'night' feature
X_train = pd.get_dummies(X_train, columns=['night'], drop_first=True)

# Extract the 'camera' column as the group identifier for cross-validation
groups = train_df['camera'][X_train.index]  # Ensure 'groups' aligns with the filtered X_train

# Step 2: Leave-One-Group-Out Cross-Validation
logo = LeaveOneGroupOut()

# Initialize decision tree classifier
dt_classifier = DecisionTreeClassifier(random_state=42)

# Step 3: Perform bootstrapping and cross-validation
n_bootstraps = 100  # Number of bootstraps
cv_results = []
left_out_groups = []

for train_idx, test_idx in logo.split(X_train, y_train, groups):
    # Get the training and validation data for this fold
    X_fold_train, X_fold_val = X_train.iloc[train_idx], X_train.iloc[test_idx]
    y_fold_train, y_fold_val = y_train.iloc[train_idx], y_train.iloc[test_idx]

    # Get the left-out group (site) for this fold
    left_out_group = groups.iloc[test_idx].unique()[0]  # Only one unique site is left out
    left_out_groups.append(left_out_group)
    
    # Perform bootstrapping 100 times
    fold_f1_scores = []
    for i in range(n_bootstraps):
        # Bootstrap sampling with replacement
        X_bootstrap, y_bootstrap = resample(X_fold_train, y_fold_train, random_state=i)
        
        # Train the decision tree classifier on the bootstrap sample
        dt_classifier.fit(X_bootstrap, y_bootstrap)
        
        # Validate the model on the validation fold
        y_val_pred = dt_classifier.predict(X_fold_val)
        
        # Calculate the F1 score and append to the fold results
        f1 = f1_score(y_fold_val, y_val_pred)
        fold_f1_scores.append(f1)
    
    # Store the average F1 score for this fold
    cv_results.append(np.mean(fold_f1_scores))

# Step 4: Print the cross-validation results along with the left-out groups
for i, group in enumerate(left_out_groups):
    print(f"Fold {i+1}: Left-out group: {group}, F1 Score: {cv_results[i]}")
print("Mean Cross-Validation F1 Score: ", np.mean(cv_results))

Fold 1: Left-out group: delta_junction, F1 Score: 0.792529702974205
Fold 2: Left-out group: marcell_MN, F1 Score: 0.6875191064407464
Fold 3: Left-out group: sodankyla_full, F1 Score: 0.863481287548975
Mean Cross-Validation F1 Score:  0.7811766989879755


#### Random Forest

Discrete

In [20]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
from sklearn.utils import resample
from sklearn.model_selection import LeaveOneGroupOut

# Step 1: Splitting the data based on the 'camera' column
train_sites = ['sodankyla_full', 'delta_junction', 'marcell_MN']
test_sites = ['lacclair', 'torgnon']

# Training set: data from 'sodankyla_full', 'delta_junction', and 'marcell_MN'
train_df = df[df['camera'].isin(train_sites)]

# Test set: data from 'lacclair' and 'torgnon' (We'll keep this aside for final evaluation)
test_df = df[df['camera'].isin(test_sites)]

# Extract features and labels
X_train = train_df[['meanEgstrong', 'meanEvstrong', 'msw', 'asr', 'night']]  # Add other relevant columns
y_train = train_df['FSC']

# Drop rows with NaN values in X_train and ensure y_train aligns with the filtered X_train
X_train = X_train.dropna()
y_train = y_train[X_train.index]  # Align y_train with X_train

# Dummy encode 'night' feature
X_train = pd.get_dummies(X_train, columns=['night'], drop_first=True)

# Extract the 'camera' column as the group identifier for cross-validation
groups = train_df['camera'][X_train.index]  # Ensure 'groups' aligns with the filtered X_train

# Step 2: Leave-One-Group-Out Cross-Validation
logo = LeaveOneGroupOut()

# Initialize Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Step 3: Perform bootstrapping and cross-validation
n_bootstraps = 100  # Number of bootstraps
cv_results = []
left_out_groups = []

for train_idx, test_idx in logo.split(X_train, y_train, groups):
    # Get the training and validation data for this fold
    X_fold_train, X_fold_val = X_train.iloc[train_idx], X_train.iloc[test_idx]
    y_fold_train, y_fold_val = y_train.iloc[train_idx], y_train.iloc[test_idx]

    # Get the left-out group (site) for this fold
    left_out_group = groups.iloc[test_idx].unique()[0]  # Only one unique site is left out
    left_out_groups.append(left_out_group)
    
    # Perform bootstrapping 100 times
    fold_f1_scores = []
    for i in range(n_bootstraps):
        # Bootstrap sampling with replacement
        X_bootstrap, y_bootstrap = resample(X_fold_train, y_fold_train, random_state=i)
        
        # Train the Random Forest classifier on the bootstrap sample
        rf_classifier.fit(X_bootstrap, y_bootstrap)
        
        # Validate the model on the validation fold
        y_val_pred = rf_classifier.predict(X_fold_val)
        
        # Calculate the F1 score and append to the fold results
        f1 = f1_score(y_fold_val, y_val_pred)
        fold_f1_scores.append(f1)
    
    # Store the average F1 score for this fold
    cv_results.append(np.mean(fold_f1_scores))

# Step 4: Print the cross-validation results along with the left-out groups
for i, group in enumerate(left_out_groups):
    print(f"Fold {i+1}: Left-out group: {group}, F1 Score: {cv_results[i]}")
print("Mean Cross-Validation F1 Score: ", np.mean(cv_results))

Fold 1: Left-out group: delta_junction, F1 Score: 0.8456644970202808
Fold 2: Left-out group: marcell_MN, F1 Score: 0.7495452431687483
Fold 3: Left-out group: sodankyla_full, F1 Score: 0.9007255720762799
Mean Cross-Validation F1 Score:  0.8319784374217697


Continuous

In [21]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.utils import resample
from sklearn.model_selection import LeaveOneGroupOut

# Step 1: Splitting the data based on the 'camera' column
train_sites = ['sodankyla_full', 'delta_junction', 'marcell_MN']
test_sites = ['lacclair', 'torgnon']

# Training set: data from 'sodankyla_full', 'delta_junction', and 'marcell_MN'
train_df = df[df['camera'].isin(train_sites)]

# Test set: data from 'lacclair' and 'torgnon' (We'll keep this aside for final evaluation)
test_df = df[df['camera'].isin(test_sites)]

# Extract features and labels
X_train = train_df[['meanEgstrong', 'meanEvstrong', 'msw', 'asr', 'night']]  # Add other relevant columns
y_train = train_df['FSC']

# Drop rows with NaN values in X_train and ensure y_train aligns with the filtered X_train
X_train = X_train.dropna()
y_train = y_train[X_train.index]  # Align y_train with X_train

# Dummy encode 'night' feature
X_train = pd.get_dummies(X_train, columns=['night'], drop_first=True)

# Extract the 'camera' column as the group identifier for cross-validation
groups = train_df['camera'][X_train.index]  # Ensure 'groups' aligns with the filtered X_train

# Step 2: Leave-One-Group-Out Cross-Validation
logo = LeaveOneGroupOut()

# Initialize Random Forest Regressor
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)

# Step 3: Perform bootstrapping and cross-validation
n_bootstraps = 100  # Number of bootstraps
cv_results = []
left_out_groups = []  # To store the left-out group (site) for each fold

for train_idx, test_idx in logo.split(X_train, y_train, groups):
    # Get the training and validation data for this fold
    X_fold_train, X_fold_val = X_train.iloc[train_idx], X_train.iloc[test_idx]
    y_fold_train, y_fold_val = y_train.iloc[train_idx], y_train.iloc[test_idx]
    
    # Get the left-out group (site) for this fold
    left_out_group = groups.iloc[test_idx].unique()[0]  # Only one unique site is left out
    left_out_groups.append(left_out_group)
    
    # Perform bootstrapping 100 times
    fold_rmse_scores = []
    for i in range(n_bootstraps):
        # Bootstrap sampling with replacement
        X_bootstrap, y_bootstrap = resample(X_fold_train, y_fold_train, random_state=i)
        
        # Train the Random Forest Regressor on the bootstrap sample
        rf_regressor.fit(X_bootstrap, y_bootstrap)
        
        # Validate the model on the validation fold
        y_val_pred = rf_regressor.predict(X_fold_val)
        
        # Calculate RMSE and append to the fold results
        rmse = np.sqrt(mean_squared_error(y_fold_val, y_val_pred))
        fold_rmse_scores.append(rmse)
    
    # Store the average RMSE score for this fold
    cv_results.append(np.mean(fold_rmse_scores))

# Step 4: Print the cross-validation results along with the left-out groups
for i, group in enumerate(left_out_groups):
    print(f"Fold {i+1}: Left-out group: {group}, RMSE: {cv_results[i]}")
print("Mean Cross-Validation RMSE: ", np.mean(cv_results))


Fold 1: Left-out group: delta_junction, RMSE: 0.3930058156271325
Fold 2: Left-out group: marcell_MN, RMSE: 0.4253265189429128
Fold 3: Left-out group: sodankyla_full, RMSE: 0.3024553661194076
Mean Cross-Validation RMSE:  0.37359590022981765


#### Support Vector Machines

SVC

In [25]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import f1_score
from sklearn.utils import resample
from sklearn.model_selection import GridSearchCV, LeaveOneGroupOut

# Step 1: Splitting the data based on the 'camera' column
train_sites = ['sodankyla_full', 'delta_junction', 'marcell_MN']
test_sites = ['lacclair', 'torgnon']

# Training set: data from 'sodankyla_full', 'delta_junction', and 'marcell_MN'
train_df = df[df['camera'].isin(train_sites)]

# Test set: data from 'lacclair' and 'torgnon' (We'll keep this aside for final evaluation)
test_df = df[df['camera'].isin(test_sites)]

# Extract features and labels
X_train = train_df[['meanEgstrong', 'meanEvstrong', 'msw', 'asr', 'night']]  # Add other relevant columns
y_train = train_df['FSC']

# Drop rows with NaN values in X_train and ensure y_train aligns with the filtered X_train
X_train = X_train.dropna()
y_train = y_train[X_train.index]  # Align y_train with X_train

# Dummy encode 'night' feature
X_train = pd.get_dummies(X_train, columns=['night'], drop_first=True)

# Extract the 'camera' column as the group identifier for cross-validation
groups = train_df['camera'][X_train.index]  # Ensure 'groups' aligns with the filtered X_train

# Step 2: Define the parameter grid for GridSearchCV
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'kernel': ['linear', 'rbf'],  # Kernel options: linear and radial basis function (RBF)
    'gamma': ['scale', 'auto']  # Kernel coefficient for 'rbf' kernel
}

# Step 3: Leave-One-Group-Out Cross-Validation
logo = LeaveOneGroupOut()

# Initialize SVC model
svc_model = SVC()

# Step 4: Perform bootstrapping and cross-validation
n_bootstraps = 100  # Number of bootstraps
cv_results = []
left_out_groups = []  # To store the left-out group (site) for each fold

for train_idx, test_idx in logo.split(X_train, y_train, groups):
    # Get the training and validation data for this fold
    X_fold_train, X_fold_val = X_train.iloc[train_idx], X_train.iloc[test_idx]
    y_fold_train, y_fold_val = y_train.iloc[train_idx], y_train.iloc[test_idx]
    
    # Get the left-out group (site) for this fold
    left_out_group = groups.iloc[test_idx].unique()[0]  # Only one unique site is left out
    left_out_groups.append(left_out_group)
    
    # Perform GridSearchCV with bootstrapping
    fold_f1_scores = []
    
    for i in range(n_bootstraps):
        # Bootstrap sampling with replacement
        X_bootstrap, y_bootstrap = resample(X_fold_train, y_fold_train, random_state=i)
        
        # GridSearchCV for SVC
        svc_search = GridSearchCV(svc_model, param_grid, cv=3, scoring='f1')
        svc_search.fit(X_bootstrap, y_bootstrap)
        best_svc = svc_search.best_estimator_
        
        # Predict and evaluate SVC on validation set
        y_val_pred = best_svc.predict(X_fold_val)
        f1 = f1_score(y_fold_val, y_val_pred)
        fold_f1_scores.append(f1)
    
    # Store the average F1 score for this fold
    cv_results.append(np.mean(fold_f1_scores))

# Step 5: Print the cross-validation results along with the left-out groups
for i, group in enumerate(left_out_groups):
    print(f"Fold {i+1}: Left-out group: {group}, F1 Score: {cv_results[i]}")
print("Mean Cross-Validation F1 Score: ", np.mean(cv_results))


Fold 1: Left-out group: delta_junction, F1 Score: 0.763728133131482
Fold 2: Left-out group: marcell_MN, F1 Score: 0.7164310687684331
Fold 3: Left-out group: sodankyla_full, F1 Score: 0.8818990618535529
Mean Cross-Validation F1 Score:  0.7873527545844894


SVR

#### k-Nearest Neighbours

Classifer

Regressor

#### Neural Network