# Import

In [None]:
import numpy as np

from sklearn.cluster import KMeans, MiniBatchKMeans
from sklearn.metrics import classification_report

from scipy.stats import mode

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import plotly.offline as pyo
pyo.init_notebook_mode()

# Local imports
from ipynb.fs.defs.task3_1 import DatasetManager
from ipynb.fs.defs.task3_2 import ModelManager, plot_bar_data, plot_collection

# Code

## ModelManager Class

In [None]:
# ModelManager class is modified to accomodate new clustering models
class ModelManager3(ModelManager):
    def __init__(self, feature_set, targets):
        super().__init__(feature_set, targets)
    
    def train_model_reg(self):
        """
        Trains a KMeans clustering model for
        regression tasks.
        """
        # Getting training and test data
        X_train = self._train_and_test_sets.get("X_train")
        y_train = self._train_and_test_sets.get("y_train")
        
        X_test = self._train_and_test_sets.get("X_test")
        y_test = self._train_and_test_sets.get("y_test")
        
        # Fit the KMeans model on the training data
        print("Fitting model...")
        km = MiniBatchKMeans(n_clusters=50, batch_size=50)
        km.fit(X_train)
        print("Model fitting complete...")

        # Assign instances in training set to closest cluster and get mean target value of each cluster
        print("Making predictions...")
        train_labels = km.predict(X_train)
        cluster_means = [y_train[train_labels == i].mean() for i in range(km.n_clusters)]
        train_preds = np.array([cluster_means[i] for i in train_labels])

        # Predict clusters of the test set and assign mean target value of corresponding cluster to each instance
        test_labels = km.predict(X_test)
        test_preds = np.array([cluster_means[i] for i in test_labels])
        
        self._train_preds = train_preds
        self._test_preds = test_preds
        self._trained_model = km
    
    def train_model_clf(self, n_classes):
        """
        Trains a KMeans clustering model for 
        classification tasks.
        """
        assert self._train_and_test_sets != None, "You don't have your training and test sets."
        # Getting training and test data
        X_train = self._train_and_test_sets.get("X_train")
        y_train = self._train_and_test_sets.get("y_train")
        
        X_test = self._train_and_test_sets.get("X_test")
        y_test = self._train_and_test_sets.get("y_test")
        
        # Fit the KMeans model on the training data
        print("Fitting model...")
        km = MiniBatchKMeans(n_clusters=n_classes, batch_size=n_classes)
        km.fit(X_train)
        print("Model fitting complete...")

        # Predict clusters on the training set
        print("Making predictions...")
        train_preds = km.predict(X_train)

        # Predict clusters on the test set
        test_preds = km.predict(X_test)

        # Assign class variables
        self._trained_model = km
        self._train_preds = train_preds
        self._test_preds = test_preds
        
    def visualise_results_clf(self):
        """
        Creates a series of plots to visualise performance
        results for a classification model.
        """
        assert self._trained_model != None, "You haven't trained a model yet."
        # Getting training, test and predictions data
        y_train = self._train_and_test_sets.get("y_train")
        y_test = self._train_and_test_sets.get("y_test")
        train_preds = self._train_preds
        test_preds = self._test_preds

        # Get key metric plot
        key_metric_plot = self._get_key_metric_plot_clf(y_train, y_test, train_preds, test_preds)

        # True values vs predictions
        true_pred_plot = self._get_true_pred_plot(y_test, test_preds)

        # Combining plots
        plots = {
            (1,1,"Dataset",""): key_metric_plot,
            (2,1,"","Class"): true_pred_plot,
        }

        subplot_titles = [
            "Key Metrics", 
            "True vs Predicted Values",
        ]

        specs = [
            [{"type": "bar", "colspan": 2}, None],
            [{"type": "xy", "colspan": 2}, None],
        ]

        combined_plot = plot_collection(
            plots, 
            rows=2, 
            cols=2, 
            subplot_titles=subplot_titles, 
            specs=specs, 
            title="Model Performance Results", 
        )

        return combined_plot

## Loading Datasets

In [None]:
# Productivity dataset; using optimal configuration as determined in Task3-1
gwp_dsm = DatasetManager("gwp_assessment")
gwp_dsm.load_and_preprocess([0,1,2,3], "iterative")
gwp_dsm.create_feature_set(7)
gwp_dsm.scale_feature_set()

# Star dataset; using optimal configuration as determined in Task3-1
star_dsm = DatasetManager("star_assessment")
star_dsm.load_and_preprocess([0,1,8,9,12,16,17], "knn")
star_dsm.create_feature_set(8)
star_dsm.scale_feature_set()

## Getting Targets and Features

In [None]:
# Productivity dataset
gwp_features = gwp_dsm.get_scaled_feat_ds()
gwp_targets = gwp_dsm.get_complete_ds()[:, -1]

# Star dataset
star_features = star_dsm.get_scaled_feat_ds()
star_targets = star_dsm.get_complete_ds()[:, -1]

## Initialising Model Managers

In [None]:
# GWP dataset
gwp_mm = ModelManager3(gwp_features, gwp_targets)

# Star dataset
star_mm = ModelManager3(star_features, star_targets)

## Model Evaluation

**Methodology**
1. Datasets will be split into training and test sets.
2. Models will be trained on training sets; cross validation will be used to optimise hyperparameters.
3. Model performance will be evaluated using selected evaluation metrics; the results will then be visualised to paint full picture of a model's performance.
4. Steps 1-3 will be repeated for several training-test splits (80-20, 75-25, 70-30, 60-40, 50-50) to assess the effect of split ratio on model performance.

**Evaluation metrics**
- Productivity dataset: accuracy, precision, recall, F1 score. These metrics are ideal metrics for evaluating classification models as they provide comprehensive insight into a model's performance. Accuracy helps understand the overall effectiveness of the model. However, it can be misleading in imbalanced datasets, which is where precision and recall come in. They provide a more nuanced view of the model's ability to correctly identify positive instances and avoid false positives. The F1 score harmonises precision and recall, offering a single metric that seeks a balance between these two characteristics, making it especially useful when the costs of false positives and false negatives are significantly different.

- Star dataset: mean squared error (MSE), mean abolute error (MAE), R2 score. These are robust metrics for evaluating regression models, with each illuminating different aspects of model performance. MSE emphasizes larger errors by squaring residuals, making it useful when larger errors are undesirable. MAE provides a more straightforward measure of average error magnitude, regardless of direction. The R2 score complements these by providing a relative measure of how much variance the model can explain, giving a broader picture of model performance beyond just raw error. These combined provide a comprehensive assessment of the model's effectiveness.

**Notes**
- Due to the size of the star dataset (as well as the limitations of the machine on which this program was developed) only small subset of the dataset (approximately 2%) will be used to train models.

### 80-20 Split

#### Splitting Datasets into Train and Test Sets

In [None]:
# Splitting productivity dataset
gwp_mm.split_dataset(train_size=0.8, test_size=0.2)

# Splitting star dataset
star_mm.split_dataset(train_size=0.016, test_size=0.004)

#### Model Training

In [None]:
# Productivity dataset
gwp_mm.train_model_reg()

In [None]:
# Star dataset
star_mm.train_model_clf(3)

#### Results

In [None]:
# Star dataset
gwp_mm.visualise_results_reg()

In [None]:
# Star dataset
star_mm.visualise_results_clf()

#### Analysis (INCOMPLETE)
**Productivity dataset**
- The model acheives a satisfactorally low score across all error metrics (MSE and MAE) in both the training and test sets.
- However, the R2 score suggests that the model is unlikely to generalise well to novel data and may therefore require more fine-tuning. 
- The R2 score is particularly low on the test set, which could be an indication of underfitting.

**Star dataset**
- The model's accuracy, precision and recall scores are consistent across both the training and test sets, with the model scoring only slighter better on the training set across all metrics.
- The scores indicate that the model has neither over- or under- fitted on the training data and can generalise well to new data.

### 75-25 Split

#### Splitting Datasets into Train and Test Sets

In [None]:
# Splitting productivity dataset
gwp_mm.split_dataset(train_size=0.75, test_size=0.25)

# Splitting star dataset
star_mm.split_dataset(train_size=0.015, test_size=0.005)

#### Model Training

In [None]:
# Productivity dataset
gwp_mm.train_model_reg()

In [None]:
# Star dataset
star_mm.train_model_clf(3)

#### Results

In [None]:
# Star dataset
gwp_mm.visualise_results_reg()

In [None]:
# Star dataset
star_mm.visualise_results_clf()

#### Analysis (INCOMPLETE)
**Productivity dataset**
- The model acheives a satisfactorally low score across all error metrics (MSE and MAE) in both the training and test sets.
- However, the R2 score suggests that the model is unlikely to generalise well to novel data and may therefore require more fine-tuning. 
- The R2 score is particularly low on the test set, which could be an indication of underfitting.

**Star dataset**
- The model's accuracy, precision and recall scores are consistent across both the training and test sets, with the model scoring only slighter better on the training set across all metrics.
- The scores indicate that the model has neither over- or under- fitted on the training data and can generalise well to new data.

### 70-30 Split

#### Splitting Datasets into Train and Test Sets

In [None]:
# Splitting productivity dataset
gwp_mm.split_dataset(train_size=0.7, test_size=0.3)

# Splitting star dataset
star_mm.split_dataset(train_size=0.014, test_size=0.006)

#### Model Training

In [None]:
# Productivity dataset
gwp_mm.train_model_reg()

In [None]:
# Star dataset
star_mm.train_model_clf(3)

#### Results

In [None]:
# Star dataset
gwp_mm.visualise_results_reg()

In [None]:
# Star dataset
star_mm.visualise_results_clf()

#### Analysis (INCOMPLETE)
**Productivity dataset**
- The model acheives a satisfactorally low score across all error metrics (MSE and MAE) in both the training and test sets.
- However, the R2 score suggests that the model is unlikely to generalise well to novel data and may therefore require more fine-tuning. 
- The R2 score is particularly low on the test set, which could be an indication of underfitting.

**Star dataset**
- The model's accuracy, precision and recall scores are consistent across both the training and test sets, with the model scoring only slighter better on the training set across all metrics.
- The scores indicate that the model has neither over- or under- fitted on the training data and can generalise well to new data.

### 60-40 Split

#### Splitting Datasets into Train and Test Sets

In [None]:
# Splitting productivity dataset
gwp_mm.split_dataset(train_size=0.6, test_size=0.4)

# Splitting star dataset
star_mm.split_dataset(train_size=0.012, test_size=0.008)

#### Model Training

In [None]:
# Productivity dataset
gwp_mm.train_model_reg()

In [None]:
# Star dataset
star_mm.train_model_clf(3)

#### Results

In [None]:
# Star dataset
gwp_mm.visualise_results_reg()

In [None]:
# Star dataset
star_mm.visualise_results_clf()

#### Analysis (INCOMPLETE)
**Productivity dataset**
- The model acheives a satisfactorally low score across all error metrics (MSE and MAE) in both the training and test sets.
- However, the R2 score suggests that the model is unlikely to generalise well to novel data and may therefore require more fine-tuning. 
- The R2 score is particularly low on the test set, which could be an indication of underfitting.

**Star dataset**
- The model's accuracy, precision and recall scores are consistent across both the training and test sets, with the model scoring only slighter better on the training set across all metrics.
- The scores indicate that the model has neither over- or under- fitted on the training data and can generalise well to new data.

### 50-50 Split

#### Splitting Datasets into Train and Test Sets

In [None]:
# Splitting productivity dataset
gwp_mm.split_dataset(train_size=0.5, test_size=0.5)

# Splitting star dataset
star_mm.split_dataset(train_size=0.01, test_size=0.01)

#### Model Training

In [None]:
# Productivity dataset
gwp_mm.train_model_reg()

In [None]:
# Star dataset
star_mm.train_model_clf(3)

#### Results

In [None]:
# Star dataset
gwp_mm.visualise_results_reg()

In [None]:
# Star dataset
star_mm.visualise_results_clf()

#### Analysis (INCOMPLETE)
**Productivity dataset**
- The model acheives a satisfactorally low score across all error metrics (MSE and MAE) in both the training and test sets.
- However, the R2 score suggests that the model is unlikely to generalise well to novel data and may therefore require more fine-tuning. 
- The R2 score is particularly low on the test set, which could be an indication of underfitting.

**Star dataset**
- The model's accuracy, precision and recall scores are consistent across both the training and test sets, with the model scoring only slighter better on the training set across all metrics.
- The scores indicate that the model has neither over- or under- fitted on the training data and can generalise well to new data.

### Analysis of split ratios (INCOMPLETE)
**Productivity dataset**
- The overall accuracy of the model is relatively unaffected by the changing of split ratio. The MSE and MAE scores are fairly consistent across all split ratios with only a slight uptick as split ratio goes from 80-20 to 50-50.
- The R2 scores are very low for all split ratios but they vary arbitrarily. A robust relationship/correlation cannot be determined; this may require further investigation.
- 80-20 appears to be the optimal split ratio.

**Star dataset**
- Whilst model performance (across all metrics) is adequate for all split ratios, the gap in performance on the training set and test set begins to narrow slight as the split ratio approaches 50-50, with overall performance decreasing as well.; this suggests that as the model is fed less and less training data, is starts to underfit.
- The true-positive rate for all classes remains fairly consistent as the split ratio approaches 50-50, with the 80-20 split having the slight edge.
- Conversely, the average precision for classes 1 and 2 picks up slightly as the ratio approaches 50-50; class remains fairly consistent. This would suggest that split ratios closer to 50-50 have a better distribution of all the classes in the dataset.
- The optimal split ratio appears to be 80-20.

# Mardown Answer

Something...