<a href="https://colab.research.google.com/github/sanntana21/TFG/blob/first_model_implementation/entrenamiento_individual.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
#  We start by getting access to the drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [21]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch.nn as nn
from fastai.data.core import DataLoaders
from fastai.learner import Learner
from fastai.metrics import mse
from fastai.losses import MSELossFlat
from fastai.callback.all import *
from fastai.data.transforms import *
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
import random as random
import plotly.express as px

# **FUNCIONES AUXILIARES**

In [22]:
def make_preds(model, input_data):
  """
  Uses model to make predictions on input_data.

  Parameters
  ----------
  model: trained model 
  input_data: windowed input data (same kind of data model was trained on)

  Returns model predictions on input_data.
  """
  forecast = model.predict(input_data)
  return tf.squeeze(forecast) # return 1D array of predictions

def plot_predictions_vs_real(predictions, reals):
    df = pd.DataFrame()
    number_of_points = len(predictions)
    df["hour"] = range(0,number_of_points)
    df["participant"] = "prediction"
    df["value"] = predictions
    for i in range(0,number_of_points):
      df.loc[number_of_points+i] = [i,"real",reals[i]]

    print(df)

    plt.figure(1)
    fig = px.line(df, x = "hour", y = "value" , title = "predicitons vs reals" , color = "participant")
    fig.update_xaxes(
          rangeslider_visible = True,
          rangeselector = dict(
              buttons = list([
                  dict(count=1,label="1y",step="year",stepmode="backward"),
                  dict(count=2,label="2y",step="year",stepmode="backward"),
                  dict(count=3,label="3y",step="year",stepmode="backward"),
                  dict(step="all")
              ])
          )

      )
    fig.show()

def set_output_precision(decimals):
  """
  format the output of the all the data structures
  with an specific number of decimals
  """
  np.set_printoptions(precision=decimals)
  into='{'+':.{}f'.format(decimals)+'}'
  pd.options.display.float_format = into.format

  pass

set_output_precision(6)

# **Procesamiento de los datos**

In [23]:
#First we read datasets into pandasDataFrame
df = pd.read_csv("/content/drive/MyDrive/TFG/Resources/METS_in_minutes.csv",sep=",",dtype={"METS_by_hour_for_all_population":"float32"})


print("Desviación de METS:" , df["METS"].std())
print('\033[1m' + "SET OF VALUES\n" + '\033[0m')
print(df.head())

total_nan_values = df.apply(lambda x: x.isna().sum())["METS"]

print('\033[1m' + "\nValores NULOS: "  + '\033[0m' + str(total_nan_values) )

Desviación de METS: 0.4516855486903255
[1mSET OF VALUES
[0m
  participant            timestamp  minute     METS
0       A3FNz  2021-11-16 00:00:00       0 0.000000
1       A3FNz  2021-11-16 00:01:00       1 0.000000
2       A3FNz  2021-11-16 00:02:00       2 0.000000
3       A3FNz  2021-11-16 00:03:00       3 0.000000
4       A3FNz  2021-11-16 00:04:00       4 0.000000
[1m
Valores NULOS: [0m0


In [24]:
#Generate trainable sets for the LSTM

def create_minutes_to_minutes_forecasting_sets(values):
    X = []
    y = []
    for i in range(0, values["minute"].max() - 1439*2,1):
        first_minute_in_window = i
        last_minute_in_window = i + 1440
        last_minute_in_prediction = last_minute_in_window + 1440
        X.append([j for j in values.loc[(values["minute"] >= first_minute_in_window) & (values["minute"] < last_minute_in_window)]["METS"]])
        y.append([j for j in values.loc[(values["minute"] >= last_minute_in_window) & (values["minute"] < last_minute_in_prediction)]["METS"]])
    return X,y


def create_minutes_to_hours_forecasting_sets(y_in_minutes):
    y = []
    for window_of_values in y_in_minutes:
        y.append([ sum(window_of_values[first_minute_of_the_hour:first_minute_of_the_hour+60]) for first_minute_of_the_hour in range(0,1440-59,60)])
    return y


def create_minutes_to_day_forecasting_sets(y_in_minutes):
    y = []
    for window_of_values in y_in_minutes:
        y.append([sum(window_of_values)])
    return y


# **SETS GENERATION**

In [None]:
dataX = []
dataY_minute = []
dataY_hour = []
dataY_day = []

participants =  df['participant'].unique()
for participant in participants[:]:
  pX,pY = create_minutes_to_minutes_forecasting_sets(df.loc[(df["participant"] == participant)])
  dataX.append(pX)
  dataY_minute.append(pY)
  dataY_hour.append(create_minutes_to_hours_forecasting_sets(pY))
  dataY_day.append(create_minutes_to_day_forecasting_sets(pY))

dataX = np.array(dataX)
for dataY in [ np.array(dataY_minute), np.array(dataY_hour), np.array(dataY_day)]:
  # This 24 correspond to the hours of the first day which didn't have 1440 previous minutes
  print('\033[1m' + "\t SHAPES" + '\033[0m')
  print(dataX.shape)
  print(dataY.shape)
  # set the precision of the array to 15 decimal places
  print('\033[1m'+"\t First Element"+'\033[0m')
  print(dataX[0])
  print(dataY[0])

  print(".-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.")

In [None]:
import pickle
file_path = '/content/drive/MyDrive/TFG/Resources/'
documents = ['minuteX','minuteY','hourX','hourY']
data_to_save = [dataX,dataY_minute,dataY_hour,dataY_day]
# Save the list using pickle
for i in range(0,4,1):
  with open(file_path+documents[i]+".pkl", 'wb') as file:
      pickle.dump(data_to_save[i], file)