In [None]:
import pandas as pd
import numpy as np
import datetime as dt
import dask
import multiprocessing
import hvplot.pandas
import holoviews as hv
import holoviews.operation.datashader as hd
import hvplot.dask
import panel as pn
import datashader as ds
#import datashader.transfer_functions as tf
from pymongo import MongoClient
from holoviews.operation.datashader import datashade, rasterize
from matplotlib.colors import LinearSegmentedColormap
#import colorcet as cc
from bokeh.models import HoverTool
from bokeh.palettes import Category20

In [None]:
hv.extension("bokeh")
pn.extension(loading_spinner='dots', loading_color='#00aa41', sizing_mode="stretch_width")
pd.options.plotting.backend = 'holoviews'

In [None]:
# Constant limit for query db
HOURS_COUNT = 720 # 30 days 
#HOURS_COUNT = 24 # 1 day 
MINS_COUNT =  500 # Los últimos 1440 registros de 1 minuto, equivalente a 1 día de datos

In [None]:
# MongoDB Connection (default localhost if)
print("Connecting to database...")

try:
    client = MongoClient(serverSelectionTimeoutMS = 2000)
    client.server_info()
except:
    print('Connection to database (' + str(client.HOST) + ':' + str(client.PORT) + ') failed.\nCheck if the database is running.')
    exit()

print('Database connection successful.')
caco_db = client['CACO']
clusco_min_collection = caco_db['CLUSCO_min']
clusco_hour_collection = caco_db['CLUSCO_hour']

    
# Query for all documents with name scb_pixel_temperature present
SCB_pixel_temperature_all_query = {'name': 'scb_pixel_temperature'}

# Query for all documents with a range of dates
SCB_pixel_temperature_current_year_query = {'name': 'scb_pixel_temperature', 'date': {'$gte': dt.datetime(dt.date.today().year, 1, 1), '$lt': dt.datetime.today()}}

# Carga de los datos desde la base de datos a una lista de listas
data_1min = []
dates_1min = []

data_1hour = []
dates_1hour = []


In [None]:
print('Getting data from DB...')

# Query con limite
# for document in collection.find(SCB_pixel_temperature_all_query).limit(HOURS_COUNT):
#     data.append(document['avg'])
#     dates.append(document['date'])

# Query con todos los resultados para el campo SCB_pixel_temperature_query
# for document in collection.find(SCB_pixel_temperature_all_query):
#     data.append(document['avg'])
#     dates.append(document['date'])

# Query with all hourly results for the current year and cache dates and averages temperatures from each channel.
for document in clusco_hour_collection.find(SCB_pixel_temperature_current_year_query):
    data_1hour.append(document['avg'])
    dates_1hour.append(document['date'])
    
# Query with all minutes data results for the current year and cache dates and averages temperatures from each channel.
for document in clusco_min_collection.find(SCB_pixel_temperature_current_year_query).limit(MINS_COUNT):
    data_1min.append(document['avg'])
    dates_1min.append(document['date'])

In [None]:
print('Building pandas dataframe...')

# Pandas dataframe
scb_p_temp_df_1min = pd.DataFrame(data_1min, columns=[f"channel_{i+1}" for i in range(len(data_1min[0]))])
scb_p_temp_df_1hour = pd.DataFrame(data_1hour, columns=[f"channel_{i+1}" for i in range(len(data_1hour[0]))])

# Add dates to dataframe
scb_p_temp_df_1min['date'] = pd.to_datetime(dates_1min)
scb_p_temp_df_1hour['date'] = pd.to_datetime(dates_1hour)
#scb_p_temp_df = scb_p_temp_df.set_index('date')

# Melt dataframe to converts from width df to long,
# where channel would be a variable and the temperature the value...
print('Transforms pandas dataframe from wide to long...')
scb_p_temp_df_long_1min = scb_p_temp_df_1min.melt(id_vars=['date'], var_name='channel', value_name='temperature')
scb_p_temp_df_long_1hour = scb_p_temp_df_1hour.melt(id_vars=['date'], var_name='channel', value_name='temperature')

# Eliminamos valores de 0ºC que probablemente se debe a algún tipo de error al guardar o recoger el dato del sensor
scb_p_temp_df_long_1min = scb_p_temp_df_long_1min[scb_p_temp_df_long_1min.temperature != 0]
scb_p_temp_df_long_1hour = scb_p_temp_df_long_1hour[scb_p_temp_df_long_1hour.temperature != 0]

 # Removes 'avg_' from channel name column
scb_p_temp_df_long_1min['channel'] = scb_p_temp_df_long_1min['channel'].str.replace('channel_', '')
scb_p_temp_df_long_1hour['channel'] = scb_p_temp_df_long_1hour['channel'].str.replace('channel_', '')

# Converts pandas dataframe to dask dataframe


#npartitions=multiprocessing.cpu_count()

#scb_p_temp_dask_df_1min = dask.dataframe.from_pandas(scb_p_temp_df_long_1min, npartitions=2)
#scb_p_temp_dask_df_1hour = dask.dataframe.from_pandas(scb_p_temp_df_long_1hour, npartitions=2)
scb_p_temp_dask_df_1min = dask.dataframe.from_pandas(scb_p_temp_df_long_1min, npartitions=multiprocessing.cpu_count()).persist()
scb_p_temp_dask_df_1hour = dask.dataframe.from_pandas(scb_p_temp_df_long_1hour, npartitions=multiprocessing.cpu_count()).persist()

# Set index to date column to be able to use resample method in dask dataframe
#scb_p_temp_dask_df_1hour = scb_p_temp_dask_df_1hour.set_index('date')

In [None]:
scb_p_temp_df_1hour.head()

In [None]:
scb_p_temp_df_long_1hour.head()


In [None]:
scb_p_temp_dask_df_1hour.head()

In [None]:
scb_p_temp_dask_df_1hour

memory_usage = scb_p_temp_df_long_1hour.memory_usage(deep=True)

print(memory_usage)

In [None]:
#dates_with_zero_temp = scb_p_temp_df_long_1hour.loc[scb_p_temp_df_long_1hour['temperature'] == 0, 'date']
dates_with_zero_temp = scb_p_temp_df_long_1hour.loc[scb_p_temp_df_long_1hour['temperature'] == 0]

print(dates_with_zero_temp)

In [None]:
# Custom color map in order to plot temperature colors from 0 to 30ºC being 30ºC color red

#cmap_custom = LinearSegmentedColormap.from_list('mycmap', [(0, (0, 0, 1)), (10/30, (0, 1, 0)), (18/30, (0, 0.85, 0)), (25/30, (1, 0.65, 0)), (30/30, (1, 0, 0)), (1, (1, 0, 0))])

In [None]:
# Test of lines graph creation with holoviews and raster image with datashader
# Problems:
# 1) All lines have a shared color gradient.
# 2) The plot takes at least 5 minutes to generate because of the overlay loop iterating over the 1855 channels.
# 3) Missing information about which channel/pixel a specific data corresponds to when hovering a data point over the plooted lines.

line_plot = hv.NdOverlay({channel: scb_p_temp_dask_df_1hour.loc[scb_p_temp_dask_df_1hour['channel'] == channel].compute().hvplot.line(x='date', y='temperature', tools=['hover'])
                           for channel in scb_p_temp_dask_df_1hour['channel'].unique()})

#line_plot = hv.NdOverlay({channel: scb_p_temp_dask_df_1hour.loc[scb_p_temp_dask_df_1hour['channel'] == channel].compute().hvplot.line(x='date', y='temperature', tools=['hover'], color=channel)
#                          for i, channel in enumerate(scb_p_temp_dask_df_1hour['channel'].unique())})

# Rasterize the plot
rasterized = rasterize(line_plot)

# Set plot options
rasterized.opts(width=600, height=400, padding=0.1, xlabel='Date', ylabel='Temperature (°C)', show_grid=True)

In [None]:
# Test of lines graph creation with holoviews and raster image with datashader and custom color map based on temperature
# Problems:
# 1) Not accurates plot lines in the rasterization since we aren't grouping by channel.
# 2) Missing information about which channel/pixel a specific data corresponds to when hovering a data point over the plotted lines.
# 3) Not accurates temperature color information for average aggregated data, there is a strange pattern of lines showing incorrect information

cmap_custom = LinearSegmentedColormap.from_list('mycmap', [(
    0, (0, 0, 1)), (10/30, (0, 1, 0)), (18/30, (0, 0.85, 0)), (25/30, (1, 0.65, 0)), (30/30, (1, 0, 0)), (1, (1, 0, 0))])

#sample_data = scb_p_temp_dask_df_1hour.loc[:, ['channel', 'date', 'temperature']].compute()
sample_data = scb_p_temp_dask_df_1hour.compute()
# sample data from date 2023-04-10 to today
#sample_data = scb_p_temp_dask_df_1hour.loc[scb_p_temp_dask_df_1hour['date'] >= '2023-04-10', ['channel', 'date', 'temperature']].compute()

#sample_data = sample_data.set_index('channel')

sample_data['channel'] = pd.to_numeric(sample_data['channel'])

#sample_data['channel'] = sample_data['channel'].astype('category')

print(sample_data)

tooltips = [
    ('Date', '$x'),
    ('Temperature', '$y'),
    ('Channel', '@channel') # This is the channel/pixel information that is missing in the plot
]

hover = HoverTool(tooltips=tooltips)


# Create a curve object from the sample data and define the dimensions of the data
curve = hv.Curve(sample_data, kdims=['date'], by='channel', vdims=['temperature', 'channel'], label='SCB Pixel Temperature - All Channels (1 hour resolution)').redim(
    date=hv.Dimension("date", label='Date'),
    temperature=hv.Dimension("temperature", label='Temperature'),
    channel=hv.Dimension("channel", label='Channel'))
    
# print(curve.dimension_values('channel'))

# Rasterize the plot using datashaders
# rasterized = rasterize(curve, aggregator=ds.by('channel', ds.mean('temperature'))) # A lot of channels to group by, so it takes a long time to generate the plot
#rasterized = hd.datashade(curve, aggregator=ds.mean('temperature'), cmap=cmap_custom).redim.nodata()

rasterized = rasterize(curve, aggregator=ds.mean(
    'temperature'), cmap=cmap_custom).redim(
    date=hv.Dimension("date", label='Date'),
    temperature=hv.Dimension("temperature", label='Temperature'),
    channel=hv.Dimension("channel", label='Channel'))


print(rasterized.dimension_values)

# Show the rasterized plot
# rasterized.opts(width=800, height=400, padding=0.1, xlabel='Date', ylabel='Temperature (°C)', show_grid=True, tools=[hover]) # For rasterized and datashade by channel
rasterized.opts(width=800, height=400, padding=0.1, xlabel='Date', ylabel='Temperature (°C)',
                show_grid=True, clim=(0, 30), cmap=cmap_custom, tools=[hover], colorbar=True)  # For rasterize


# rasterized.opts(width=600, height=400, padding=0.1, xlabel='Date', ylabel='Temperature (°C)', show_grid=True, tools=[]) # Fot datashade


In [None]:
# CELL TO TRY TO FIX THE PROBLEMS OF THE PREVIOUS CELL WHEN USING DS.BY

# Test of lines graph creation with holoviews and raster image with datashader and custom color map based on temperature
# Problems:
# 1) Ram usage is excesive and dont allow to finish the plot generation when using ds.by('channel') to group by channel. Need to investigate how to use ds.by properly with dask

cmap_custom = LinearSegmentedColormap.from_list('mycmap', [(0, (0, 0, 1)), (10/30, (0, 1, 0)), (18/30, (0, 0.85, 0)), (25/30, (1, 0.65, 0)), (30/30, (1, 0, 0)), (1, (1, 0, 0))])

#del(sample_data)

# Filter the data and select the columns needed
#sample_data = scb_p_temp_dask_df_1hour[scb_p_temp_dask_df_1hour['date'] >= '2023-04-09'][['channel', 'date', 'temperature']]
sample_data = scb_p_temp_dask_df_1hour[scb_p_temp_dask_df_1hour['channel'] == '1'][['channel', 'date', 'temperature']]

# Set channel to numeric type
#sample_data['channel'] = sample_data['channel'].astype('int64')

# Select just channel 1 and 2
#sample_data = sample_data[(sample_data['channel'] == 1) | (sample_data['channel'] == 2)]

print(sample_data)

#sample_data = sample_data[sample_data['channel'].astype('category')][['channel', 'date', 'temperature']]

print('DF Head')
test = sample_data.compute()
print(test.head())

# Function to compute a HoloViews curve from a partition
def compute_partition(partition):
    return hv.Curve(partition.compute(), kdims=['date'], vdims=['temperature', 'channel'], label='SCB Pixel Temperature - All Channels (1 hour resolution)')


tooltips = [
    ('Date', '$x'),
    ('Temperature', '$y'),
    ('Channel', '@channel') # This is the channel/pixel information that is missing in the plot
]

hover = HoverTool(tooltips=tooltips)

# Loop through the partitions of the Dask DataFrame and compute the curves
curves = [compute_partition(partition) for partition in sample_data.to_delayed()]

print('Curves')
print(type(curves))

# Combine the curves
combined_curves = hv.Overlay(curves)

# Rasterize and display the combined curves
rasterized = hd.rasterize(combined_curves, aggregator=ds.mean('temperature'), cmap=cmap_custom)
rasterized.opts(width=800, height=400, padding=0.1, xlabel='Fecha', ylabel='Temperature (°C)')


# Create a curve object from the sample data and define the dimensions of the data
#curve = hv.Curve(sample_data, kdims=['date'], vdims=['temperature', 'channel'] ,label='SCB Pixel Temperature - All Channels (1 hour resolution)')

# Rasterize the plot using datashaders
#rasterized = hd.rasterize(curve, aggregator=ds.by('channel', ds.mean('temperature')), cmap=cmap_custom).redim.nodata()

# Show the rasterized plot
#rasterized.opts(width=800, height=400, padding=0.1, xlabel='Date', ylabel='Temperature (°C)', show_grid=True, tools=[hover]) # For rasterized and datashade by channel

In [None]:
date_filter = dt.date.today()
df = get_data_by_date(collection=clusco_min_collection, property_name='scb_pixel_temperature', date_time=date_filter, value_field='avg', id_var='date', var_name='channel', value_name='temperature')

start_date = df.compute()['date'].dt.date[0]

date_picker = pn.widgets.DatePicker(name='Date Selection', value=start_date)

test_rasterized_hvplot_lines = df.compute().hvplot.line(x='date', y='temperature', tools=['hover'], by='channel', hover_cols=['channel'], rasterize=True, width=800, height=400, padding=0.1, aggregator=ds.mean('temperature'), xlabel='Date', c='temperature', cmap=cmap_custom, clim=(0, 30), ylabel='Average Temperature (°C)', grid=True)
test_rasterized_hvplot_lines

In [None]:
# Pruebas de creación de gráficas de múltiples líneas con holoviews y datashader
# Problema, genera lineas agrupadas por channel pero se tiene que seleccionar el canal en el widget para que se muestren las lineas.

# Crear un Dataset de HoloViews
hvd_dataset = hv.Dataset(scb_p_temp_dask_df_1hour)

# Crear un gráfico de curva
curve = hvd_dataset.to(hv.Curve, 'date', 'temperature')

# Aplicar la rasterización utilizando datashader
rasterized_curve = hd.datashade(curve)


# # Establecer opciones de estilo para el gráfico
# opts.defaults(
#     opts.RGB(width=600, height=400, show_grid=True, tools=['hover']),
#     opts.Curve(alpha=0.5, line_width=1, tools=['hover'])
# )

# # Mostrar el gráfico rasterizado
rasterized_curve.opts(width=600, height=400, padding=0.1, tools=['hover'], xlabel='Date', ylabel='Average Temperature (°C)', show_grid=True)


In [None]:
# Pruebas de gráfico de multiples lineas al mismo tiempo utilizando holoviews Nd Overlay

# Crear un objeto NdOverlay con las series de datos
line_plot = hv.NdOverlay({channel: scb_p_temp_dask_df_1hour.loc[scb_p_temp_dask_df_1hour['channel'] == channel].compute().hvplot.line(x='date', y='temperature', label='SCB Pixel Temperature (All channels)')
                          for channel in scb_p_temp_dask_df_1hour['channel'].unique()})


# Mostrar el gráfico
line_plot.opts(padding=0.1, tools=['hover'], xlabel='Date', ylabel='Average Temperature (°C)', show_grid=True, default_tools=['hover'], width=600, height=400) 


In [None]:
# Data shading plot with datashade function from datashader. 
# Problem 1: Some information is lost since datashading is discretizing the data to shade with colors
# Problem 2: Datashading line_plot, which is a NdOverlay object with lot of lines overlayed, can take a lot of time to render the plot (more than 1 minute)
# Problem 3: Still no way of show the channel information in the hover tool
# Problem 4: For some reason I can't add aggregator to the data plot...

# Apply datashade to the plot
datashaded_plot = hd.datashade(line_plot) # Adding an aggregator here causes an error, I dont know why. So by default it uses count aggregation and generates bad color information

# Personalizar el gráfico
datashaded_plot.opts(
    width=800,
    height=400,
    padding=0.1,
    tools=['hover'],
    xlabel='Date',
    ylabel='Temperature (°C)',
    show_grid=True,
    title='PACTA Temperature (1 hour resolution)'
)

In [None]:
line_plot * datashaded_plot

In [None]:
# Data shading plot using rasterize function from datashader
# Problem 2: Datashading line_plot, which is a NdOverlay object with lot of lines overlayed, can take a lot of time to render the plot (more than 1 minute)
# Problem 3: Still no way of show the channel information in the hover tool
# # Problem 4: For some reason I can't add aggregator to the data plot...

# Aplicar Datashader al gráfico
print('Aplicando datashader')
rasterized_plot = rasterize(line_plot) # Adding an aggregator here causes an error, I dont know why. So by default it uses count aggregation and generates bad color information

print('Preparando plot')
# Personalizar el gráfico
rasterized_plot.opts(
    width=800,
    height=400,
    padding=0.1,
    tools=['hover'],
    xlabel='Date',
    ylabel='Temperature (°C)',
    show_grid=True,
    cmap=cmap_custom,
    title='PACTA Temperature (1 hour resolution)'
)

In [None]:
# This proof of concept was created in order to see if using dictionary can be more efficient than using a for loop to create a NdOverlay object

cmap_custom = LinearSegmentedColormap.from_list('mycmap', [(0, (0, 0, 1)), (10/30, (0, 1, 0)), (18/30, (0, 0.85, 0)), (25/30, (1, 0.65, 0)), (30/30, (1, 0, 0)), (1, (1, 0, 0))])

curve_dict = {channel: hv.Curve(scb_p_temp_dask_df_1hour.loc[scb_p_temp_dask_df_1hour['channel'] == channel].compute(), 'date', 'temperature', label=channel) for channel in scb_p_temp_dask_df_1hour['channel'].unique()}
overlay = hv.NdOverlay(curve_dict)
agg = rasterize(overlay) # Adding an aggregator here causes an error, I dont know why. So by default it uses count aggregation and generates bad color information

agg.opts(
    width=800,
    height=400,
    padding=0.1,
    tools=['hover'],
    xlabel='Date',
    ylabel='Temperature (°C)',
    show_grid=True,
    cmap=cmap_custom,
    title='PACTA Temperature (1 hour resolution)'
)


In [None]:
temperature_channels_line_plot_1hour = scb_p_temp_dask_df_1hour.compute().hvplot.line(x='date', y='temperature', groupby='channel', width=800, height=400, color='grey', label='test', title='Average PACTA Temperature (1 hour resolution)')
temperature_channels_line_plot_1hour.opts(padding=0.1, tools=['hover'], xlabel='Date', ylabel='Average Temperature (°C)', show_grid=True)
# scatter_plot = dask_df.compute().hvplot.scatter(x='date', y='temperature', alpha=0.1, width=800, height=400, datashade=True, cmap='reds')
# pacta_plot = line_plot * scatter_plot

In [None]:
# Generar el gráfico de puntos de colores basados en la temperatura

# Definir la escala de colores personalizada
cmap_custom = LinearSegmentedColormap.from_list('mycmap', [(0, (0, 0, 1)), (18/30, (0, 1, 0)), (25/30, (1, 0.65, 0)), (26/30, (1, 0, 0)), (1, (1, 0, 0))])

temperature_channels_scatter_plot_1hour = scb_p_temp_dask_df_1hour.compute().hvplot.scatter(x='date', y='temperature', groupby='channel', width=800, height=400, title='Average PACTA Temperature (1 hour resolution)', color='temperature', cmap=cmap_custom, alpha=0.5, size=20, marker='o')
temperature_channels_scatter_plot_1hour.opts(padding=0.1, tools=['hover'], xlabel='Date', ylabel='Average Temperature (°C)', clim=(0, 30))

In [None]:
temperature_channels_scatter_plot_1hour * temperature_channels_line_plot_1hour

In [None]:
# Generar el gráfico de puntos de colores basados en la temperatura

# sample data from date 2023-04-10 to 2023-04-12  and with only channels 1 and 2
sample_data = scb_p_temp_dask_df_1hour.loc[scb_p_temp_dask_df_1hour['date'] >= '2023-04-10', ['channel', 'date', 'temperature']]

sample_data = sample_data.set_index('channel')
# get data only for the channel 1 and 2
#sample_data = sample_data.loc[sample_data['channel'] == '1', ['channel', 'date', 'temperature']]

print(sample_data.head())
#sample_data = scb_p_temp_dask_df_1hour.loc[scb_p_temp_dask_df_1hour['date'] >= '2023-04-10', ['channel', 'date', 'temperature']]
#sample_data['channel'] = pd.to_numeric(sample_data['channel'])
#sample_data['channel'] = sample_data['channel'].astype('category')

# Definir la escala de colores personalizada
cmap_custom = LinearSegmentedColormap.from_list('mycmap', [(0, (0, 0, 1)), (18/30, (0, 1, 0)), (25/30, (1, 0.65, 0)), (26/30, (1, 0, 0)), (1, (1, 0, 0))])


temperature_channels_scatter_plot_1hour = sample_data.hvplot.line(x='date', y='temperature', rasterize=True, by='channel', color='temperature', cmap=cmap_custom, width=800, height=400, title='Average PACTA Temperature (1 hour resolution)', alpha=0.5, )
temperature_channels_scatter_plot_1hour.opts(padding=0.1, tools=['hover'], xlabel='Date', ylabel='Average Temperature (°C)')

In [None]:
temperature_channels_line_plot_1min = scb_p_temp_dask_df_1min.compute().hvplot.line(x='date', y='temperature', hover_cols=['date', 'temperature', 'channel'], groupby='channel', width=800, height=400, title='Average PACTA Temperature (minute resolution)')
temperature_channels_line_plot_1min.opts(padding=0.1, tools=['hover'], xlabel='Date', ylabel='Average Temperature (°C)')

In [None]:
# Generar el gráfico de puntos de colores basados en la temperatura

temperature_channels_scatter_plot_1minute = scb_p_temp_dask_df_1min.compute().hvplot.scatter(x='date', y='temperature', groupby='channel', width=800, height=400, title='Average PACTA Temperature (minute resolution)', color='temperature', cmap=cmap_custom, alpha=0.5, size=30, marker='o')
temperature_channels_scatter_plot_1minute.opts(padding=0.1, tools=['hover'], xlabel='Date', ylabel='Average Temperature (°C)', clim=(0, 30))


In [None]:
temperature_channels_scatter_plot_1minute * temperature_channels_line_plot_1min

In [None]:

# Generates scatter plot for all the channels. Data shaded and rasterized for better performance
 
temperature_channels_scatter_plot_all_channels_1minute = scb_p_temp_dask_df_1min.compute().hvplot.scatter(x='date', y='temperature', datashade=False, rasterize=True, width=800, height=400, title='Average PACTA Temperature (minute resolution)', color='temperature', cmap=cmap_custom)
temperature_channels_scatter_plot_all_channels_1minute.opts(padding=0.1, tools=['hover'], xlabel='Date', ylabel='Average Temperature (°C)', alpha=1, clim=(0, 30))

In [None]:
pacta_plot_1hour = pn.panel(temperature_channels_scatter_plot_1hour * temperature_channels_line_plot_1hour)
pacta_plot_1min = pn.panel(temperature_channels_line_plot_1min * temperature_channels_scatter_plot_1minute * temperature_channels_scatter_plot_all_channels_1minute)
# pane2 = pn.panel(pacta_plot)

pacta_plot_1hour.servable()
pacta_plot_1min.servable() 
# pane2.servable()

In [None]:
# HEAT MAP
# Problems:
# 1) Fast generation but very slow to interact with the plot, need to try with clipped data
#del(heatmap)
cmap_custom = LinearSegmentedColormap.from_list('mycmap', [(0, (0, 0, 1)), (10/30, (0, 1, 0)), (18/30, (0, 0.85, 0)), (25/30, (1, 0.65, 0)), (30/30, (1, 0, 0)), (1, (1, 0, 0))])

# Filter data by daytime 2023-01-24
heatmap_data = scb_p_temp_dask_df_1hour.loc[(scb_p_temp_dask_df_1hour['date'] >= '2023-01-24'), ['channel', 'date', 'temperature']]

# Select the channel 1201
#heatmap_data = heatmap_data.loc[heatmap_data['channel'] == '1201', ['channel', 'date', 'temperature']]

# Create the heatmap using holoviews
#heatmap = hv.HeatMap(data=heatmap_data, kdims=['date', 'channel'], vdims='temperature', groupby='channel')

# Create the heatmap using hvplot
heatmap = heatmap_data.hvplot.heatmap(x='date', y='channel', C='temperature', rasterize=True, width=800, height=10000, title='SCB Pixel Temperature', alpha=0.5)

# Configure heatmap style options and shows the graph
heatmap.opts(padding=0.1, width=800, height=400, cmap=cmap_custom, clim=(0, 30), tools=['hover'], colorbar=True, colorbar_opts={'title': 'Temperature (ºC)'})



In [None]:
def db_connection(dbname):
    # MongoDB Connection (default localhost if)
    print("Connecting to database...")

    try:
        client = MongoClient(serverSelectionTimeoutMS = 2000)
        client.server_info()
    except:
        print('Connection to database (' + str(client.HOST) + ':' + str(client.PORT) + ') failed.\nCheck if the database is running.')
        return false

    print('Database connection successful.')
    db = client[dbname]
    
    return db

In [None]:
def get_data_by_date(collection, property_name, date_time, value_field, id_var, var_name, value_name, retry=True):
    
    data_values = []
    datetime_values = []

    date = date_time
    
    if retry:
        for i in range(0, 30):
            query = {'name': property_name, 'date': {'$gte': dt.datetime(date.year, date.month, date.day), '$lt': dt.datetime(date.year, date.month, date.day) + dt.timedelta(days=1)}}
            print('Retrieving ' + property_name + ' data from date: ' + str(date))

            for document in collection.find(query, {"date": 1, value_field: 1, "_id": 0}):
                data_values.append(document[value_field])
                datetime_values.append(document['date'])

            if len(data_values) > 0: 
                break

            else:
                date = date_time - dt.timedelta(days=i+1)
                print('No data found. Testing previous day...')
                
    else:
        query = {'name': property_name, 'date': {'$gte': dt.datetime(date.year, date.month, date.day), '$lt': dt.datetime(date.year, date.month, date.day) + dt.timedelta(days=1)}}
        
        for document in collection.find(query, {"date": 1, value_field: 1, "_id": 0}):
                data_values.append(document[value_field])
                datetime_values.append(document['date'])
                
        if len(data_values) == 0: 
            print("saliendo porque no hay valores")
            return False
            
            
    print('Building pandas dataframe...')

    # Pandas dataframe
    pandas_df = pd.DataFrame(data_values, columns=[f"channel_{i+1}" for i in range(len(data_values[0]))])

    # Add dates to dataframe
    pandas_df['date'] = pd.to_datetime(datetime_values)


    # Melt dataframe to converts from width df to long,
    # where channel would be a variable and the temperature the value...
    print('Transforms pandas dataframe from wide to long...')
    pandas_df_long = pandas_df.melt(id_vars=[id_var], var_name=var_name, value_name=value_name)

    # Eliminamos valores de 0, hay que ver como generalizar esta solución en la función
    pandas_df_long = pandas_df_long[pandas_df_long.temperature != 0]

    # Removes 'avg_' from channel name column and convert to dask dataframe
    pandas_df_long['channel'] = pandas_df_long['channel'].str.replace('channel_', '')

    dask_df_long = dask.dataframe.from_pandas(pandas_df_long, npartitions=multiprocessing.cpu_count()).persist()
    
    return dask_df_long

In [None]:
caco_db_client = db_connection('CACO')
clusco_min_collection = caco_db_client['CLUSCO_min']

date_filter = dt.date.today()
date_filter = dt.datetime(2023, 4, 19)

scb_pixel_temperature_data = get_data_by_date(clusco_min_collection, 'scb_pixel_temperature', date_filter, 'avg', 'date', 'channel', 'temperature')

print(scb_pixel_temperature_data)
print(scb_pixel_temperature_data.head())
print(scb_pixel_temperature_data.tail())

In [None]:
pn.param.ParamMethod.loading_indicator = True

cmap_custom = LinearSegmentedColormap.from_list('mycmap', [(0, (0, 0, 1)), (10/30, (0, 1, 0)), (18/30, (0, 0.85, 0)), (25/30, (1, 0.65, 0)), (30/30, (1, 0, 0)), (1, (1, 0, 0))])

# Filter data by daytime 2023-01-24
#heatmap_data = scb_p_temp_dask_df_1min.loc[(scb_p_temp_dask_df_1min['date'] >= '2023-04-17'), ['channel', 'date', 'temperature']].persist()
#heatmap_data = scb_pixel_temperature_data.persist()

date_filter = dt.date.today()
heatmap_data = get_data_by_date(collection=clusco_min_collection, property_name='scb_pixel_temperature', date_time=date_filter, value_field='avg', id_var='date', var_name='channel', value_name='temperature')

start_date = heatmap_data.compute()['date'].dt.date[0]

temp_range_slider = pn.widgets.RangeSlider(name='Temperature Range (ºC)', start=0, end=100, value=(0, 40), step=0.1)

date_picker = pn.widgets.DatePicker(name='Date Selection', value=start_date)

# Crear una función que actualiza el heatmap según el rango de temperaturas seleccionado
@pn.depends(temp_range_slider.param.value, date_picker.param.value)
def update_heatmap(temp_range, date_picker):
    
    heatmap_data = get_data_by_date(collection=clusco_min_collection, property_name='scb_pixel_temperature', date_time=date_picker, value_field='avg', id_var='date', var_name='channel', value_name='temperature', retry=False)

    print(type(heatmap_data))
    
    # if heatmap_data == False:
    #     return
        
        
    filtered_data = heatmap_data.loc[(heatmap_data['temperature'] >= temp_range[0]) & (heatmap_data['temperature'] <= temp_range[1])].persist()
    heatmap = hv.HeatMap(data=filtered_data, kdims=['date', 'channel'], vdims='temperature')
    heatmap = heatmap.opts(padding=0.1, width=600, height=400, cmap=cmap_custom, clim=(0, 30), tools=['hover'], colorbar=True, colorbar_opts={'title': 'Temperature (ºC)'})
    return heatmap

# Crear un panel con el widget y el heatmap actualizable
dashboard = pn.Column(temp_range_slider, date_picker, update_heatmap)

# Mostrar el panel
dashboard.servable()

In [None]:
import panel as pn
import holoviews as hv
from holoviews import opts
from bokeh.models import HoverTool
from holoviews.operation.datashader import datashade, dynspread

cmap_custom = LinearSegmentedColormap.from_list('mycmap', [(0, (0, 0, 1)), (10/30, (0, 1, 0)), (18/30, (0, 0.85, 0)), (25/30, (1, 0.65, 0)), (30/30, (1, 0, 0)), (1, (1, 0, 0))])
heatmap_data = scb_p_temp_dask_df_1hour.loc[(scb_p_temp_dask_df_1hour['date'] >= '2023-01-24'), ['channel', 'date', 'temperature']]


# Crear un widget para seleccionar el rango de temperaturas
temp_range_slider = pn.widgets.RangeSlider(name='Temperature Range (ºC)', start=0, end=30, value=(0, 30), step=0.1)
channel_range_slider = pn.widgets.RangeSlider(name='Channel Range Selection', start=1, end=1855, value=(1, 100), step=1)

# Crear una función que actualiza el heatmap según el rango de temperaturas seleccionado
@pn.depends(temp_range_slider.param.value)
def update_heatmap(temp_range):
    filtered_data = heatmap_data.loc[(heatmap_data['temperature'] >= temp_range[0]) & (heatmap_data['temperature'] <= temp_range[1])]
    heatmap = hv.HeatMap(data=filtered_data, kdims=['date', 'channel'], vdims='temperature')
    heatmap = heatmap.opts(padding=0.1, width=800, height=400, cmap=cmap_custom, clim=(0, 30), tools=['hover'], colorbar=True, colorbar_opts={'title': 'Temperature (ºC)'})
    return heatmap

# Crear un panel con el widget y el heatmap actualizable
dashboard = pn.Column(temp_range_slider, update_heatmap)

# Mostrar el panel
dashboard.servable()