# Data Exploration

The idea of this notebook is to explore the data in the satelite images that come in. Aim to explore the different channels of the satelite data as well and understand the strcuture of the data produced by the 'data generator'. 

*Unclear if this work has been done before, but still useful to explore the data for myself

In [52]:
import os
from predict_pv_yield.netcdf_dataset import NetCDFDataset, SAT_VARIABLE_NAMES
import plotly.graph_objects as go
import plotly
import pandas as pd

DATA_PATH = 'gs://solar-pv-nowcasting-data/prepared_ML_training_data/v4/'
TEMP_PATH = '.'

In [44]:
# set up data generator

train_dataset = NetCDFDataset(
    24_900,
    os.path.join(DATA_PATH, 'train'),
    os.path.join(TEMP_PATH, 'train'))

train_dataset.per_worker_init(1)
train_dataset_iterator = iter(train_dataset)

In [45]:
# get batch of data, this may take a few seconds to run
data = next(train_dataset_iterator)

gs://solar-pv-nowcasting-data/prepared_ML_training_data/v4/train/6cb117_0.nc
(32, 19, 64, 64, 12)
(12,)


In [46]:
# get the timestamp of the image
sat_datetime = pd.to_datetime(data['sat_datetime_index'][0, 0],unit='s')

In [47]:
# image of first channel, the 'HRV' channel
z = data['sat_data'][0][0][:, :, 0]

# plot image
fig = go.Figure(data=
go.Contour(
    z=z
))

fig.show()

In [48]:
# plot all channels
image_index = 6
total_channels = 12

from plotly.subplots import make_subplots


N_cols = 4
N_rows = 3
fig = make_subplots(rows=N_rows, cols=N_cols, subplot_titles=SAT_VARIABLE_NAMES)


# loop over the channels and plot them
for channel in range(0, total_channels):
    
    row = channel % N_rows + 1
    col = channel // N_rows + 1
    
    z = data['sat_data'][image_index][2][:, :, channel]
    
    fig.add_trace(go.Heatmap(z=z, showscale=False),row=row, col=col)

fig.update_layout(height=600, width=800, title_text="Satelite Images Channel plots")
fig.show()

In [49]:
# make little video
image_index=6
channel_index=10

frames = []

# loop over frames for different times
for frame in range(0,19):

    z = data['sat_data'][image_index][frame][:, :, channel_index]
    frames.append(go.Frame(data=[go.Heatmap(z=z, showscale=False)]))

# make first frame   
z0 = data['sat_data'][image_index][0][:, :, channel_index]

# create animated plot
fig = go.Figure(
data=[go.Heatmap(z=z0, showscale=False)],
layout=go.Layout(
    title="Timeframe video",
    updatemenus=[dict(
        type="buttons",
        buttons=[dict(label="Play",
                      method="animate",
                      args=[None])])]
),
frames=frames)
    
fig.show()