<div
  style="
    display: flex;
    justify-content: center;
    flex-direction: column;
    align-items: center;
  "
>
<div style="width: 400px; padding: 20px; background: #111; border: 1px solid #333; color: #eee">
  <p style="opacity: 0.5; margin-bottom: -10px"><i>TDT4173 Machine Learning 2023</i></p>
  <h1 style="margin-bottom: -5px"><b>Long Notebook</b></h1>
  <h3>Kaggle Name: Neural Net Ninjas</h3>
  <br />
  <h3>Team Members:</h3>
  <table style="margin: 0 auto; width: 100%; text-align: left">
    <tr style="background: #222">
      <th style="border-width: 0.5px; border-color: #555">Name</th>
      <th style="border-width: 0.5px; border-color: #555">Student ID</th>
    </tr>
    <tr>
      <td style="border-width: 0.5px; border-color: #555">Antonsen Eggen, Sivert</td>
      <td style="border-width: 0.5px; border-color: #555">123 45 678</td>
    </tr>
    <tr>
      <td style="border-width: 0.5px; border-color: #555">Broch Grude, Kristoffer VI Nicolay</td>
      <td style="border-width: 0.5px; border-color: #555">123 45 678</td>
    </tr>
    <tr>
      <td style="border-width: 0.5px; border-color: #555">Raa, Mathias</td>
      <td style="border-width: 0.5px; border-color: #555">123 45 678</td>
    </tr>
  </table>
  </div>
</div>

---

### Outline
* [1 – Setup & Utils](#1)
* [2 – Exploratory Data Analysis](#2)
    * [2.1 – Data Overview](#2.1)
    * [2.2 – Data Cleaning](#2.2)
    * [2.3 – Data Visualization](#2.3)
* [3 – Preprocessing](#3)
    * [3.1 – Feature Selection](#3.1)
* [4 – Modeling](#4)
    * [4.1 – Model Selection](#4.1)
    * [4.2 – Model Evaluation](#4.2)
* [5 – Postprocessing](#5)
* [6 – Conclusion](#6)

# 1 – Setup & Utils

In [9]:
# Utils
from IPython.utils import io
import os

# Data
import datetime
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Scikit-learn
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler
from sklearn.model_selection import cross_val_predict, KFold, train_test_split

# Models
from autogluon.tabular import TabularDataset, TabularPredictor
from autogluon.common import space

import tensorflow as tf

import lightgbm as lgb
import xgboost as xgb
import catboost as cb

import scipy.optimize as opt

In [11]:
model = TabularPredictor.load("../notebooks/AutogluonModels/ag-20231109_121143")

In [15]:
model.leaderboard()

                     model   score_val  pred_time_val     fit_time  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0      WeightedEnsemble_L2  -76.269572      20.138919  1731.731595                0.000258           0.112333            2       True         12
1        LightGBMXT_BAG_L1  -79.130943       5.825268   401.225449                5.825268         401.225449            1       True          3
2    NeuralNetTorch_BAG_L1  -80.999773       0.158062   286.837707                0.158062         286.837707            1       True         10
3     LightGBMLarge_BAG_L1  -82.199357      14.155331  1043.556106               14.155331        1043.556106            1       True         11
4          LightGBM_BAG_L1  -84.301498       5.900289   330.119560                5.900289         330.119560            1       True          4
5           XGBoost_BAG_L1  -87.991734       4.499097  2448.605472                4.499097        2448.605472            1       T

Unnamed: 0,model,score_val,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L2,-76.269572,20.138919,1731.731595,0.000258,0.112333,2,True,12
1,LightGBMXT_BAG_L1,-79.130943,5.825268,401.225449,5.825268,401.225449,1,True,3
2,NeuralNetTorch_BAG_L1,-80.999773,0.158062,286.837707,0.158062,286.837707,1,True,10
3,LightGBMLarge_BAG_L1,-82.199357,14.155331,1043.556106,14.155331,1043.556106,1,True,11
4,LightGBM_BAG_L1,-84.301498,5.900289,330.11956,5.900289,330.11956,1,True,4
5,XGBoost_BAG_L1,-87.991734,4.499097,2448.605472,4.499097,2448.605472,1,True,9
6,CatBoost_BAG_L1,-89.615045,0.137059,777.549562,0.137059,777.549562,1,True,6
7,NeuralNetFastAI_BAG_L1,-96.8248,0.289305,693.155043,0.289305,693.155043,1,True,8
8,ExtraTreesMSE_BAG_L1,-97.479302,0.84964,6.579317,0.84964,6.579317,1,True,7
9,RandomForestMSE_BAG_L1,-98.704215,0.81238,29.07113,0.81238,29.07113,1,True,5


In [8]:
def scale_data(data, cols=None):
    """
    Scale model-data using MinMaxScaler
    """
    
    if cols is None:
        ignore_cols = [
            "location",
            "dew_or_rime:idx",
            "is_day:idx",
            "is_in_shadow:idx",
            "pv_measurement",
            "data_type",
        ]

        cols = [col for col in data.columns if col not in ignore_cols]

    X_scaler = MinMaxScaler()
    X_scaler = X_scaler.fit(
        data[data["data_type"].isin(["observed", "estimated"])][cols]
    )
    y_scaler = MinMaxScaler()
    y_scaler = y_scaler.fit(
        data[data["data_type"].isin(["observed", "estimated"])][["pv_measurement"]]
    )

    data_copy = data.copy()

    data_copy[cols] = X_scaler.transform(data_copy[cols])
    data_copy[["pv_measurement"]] = y_scaler.transform(data[["pv_measurement"]])

    return data_copy, X_scaler, y_scaler

# 2 – Exploratory Data Analysis

## 2.1 – Domain Knowledge

## 2.2 – Data Quality

## 2.3 – Feature Exploration

## 2.4 – Aggregations

# 3 – Preprocessing

# 4 – Modeling

# 5 – Postprocessing