In [None]:
import sys
import os
from pathlib import Path

root = Path().resolve()

if "analisis" in os.getcwd():
    root = root.parent
    os.chdir(root)

src_path = str(root.joinpath(Path("src")))

sys.path.append(src_path)
from datetime import datetime
import pytz
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from dashboard.train import train
from utils.utils import ALERTS_END_TIMESTAMP, TZ

In [None]:

ml = train(False, datetime.fromtimestamp(ALERTS_END_TIMESTAMP / 1000, tz=pytz.timezone(TZ)))

if not ml:
    raise ValueError

In [None]:

from sklearn.model_selection import LearningCurveDisplay, ShuffleSplit

fig = plt.figure(figsize=(10, 6))
ax = plt.gca()

common_params = {
    "X": ml.x,
    "y": ml.y,
    "train_sizes": np.linspace(0.1, 1.0, 5),
    "cv": ShuffleSplit(n_splits=20, test_size=0.2, random_state=0),
    "score_type": "both",
    "n_jobs": 4,
    "line_kw": {"marker": "o"},
    "std_display_style": "fill_between",
    "score_name": "Accuracy",
}

LearningCurveDisplay.from_estimator(ml.model, **common_params, ax=ax)
handles, label = ax.get_legend_handles_labels()

ax.set_xlim(left=0)
ax.set_ylim(0, 1)
ax.set_xlabel("Número de muestras en el conjunto de entrenamiento")
ax.set_ylabel("Exactitud (Accuracy)")
ax.tick_params(axis="both", which="major", labelsize=12)
ax.legend(handles[:2], ["Puntaje de entrenamiento", "Puntaje de validación"], loc="lower right", fontsize=12)
fig.tight_layout()
plt.savefig("thesis/images/learning_curve.png")
fig.show()