# 1. Data exploration

In [None]:
#imports
import pandas as pd
import os
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime

#folders
data_folder = "data"

In [None]:
#plot styles
plt_style_c = px.colors.sequential.haline #complex
plt_style_s = px.colors.diverging.Portland #simple

#defualt plot size 
size = {
    "width" : 1500 ,
    "height" : 750 ,
}

#function for plotting
def scale_show(fig):

    #set font
    fig.update_layout(
        font = dict(size=16),
        title_font = dict(size=20),
        xaxis_title_font = dict(size=18),
        yaxis_title_font = dict(size=18),
    )

    #set size
    fig.update_layout(
        width=1500,
        height=750,
    )

    #show
    fig.show()

    return

In [None]:
import warnings
warnings.filterwarnings('ignore')

## 1.0 General

In [None]:
df = pd.read_csv(os.path.join(data_folder, "df.csv"))
#df_pv = pd.read_csv(os.path.join(data_folder, "df_pv.csv"))

In [None]:
df["date"] = pd.to_datetime(df["date"])

In [None]:
df.shape

In [None]:
df.head(5)

In [None]:
#df_pv.head()

In [None]:
df.describe()

In [None]:
#df_pv.describe()

In [None]:
df.isna().sum()

In [None]:
#df_pv.isna().sum()

In [None]:
# Correlation
df_corr = df.corr().round(1)

# Mask to matrix
mask = np.zeros_like(df_corr, dtype=bool)
mask[np.triu_indices_from(mask)] = True

# Viz
df_corr_viz = df_corr.mask(mask).dropna(how='all').dropna('columns', how='all')

fig = px.imshow(

    df_corr_viz,
    text_auto=True,
    color_continuous_scale = plt_style_c,

    title = "Correlation matrix",
    width = 700,
    height = 700,
    )

fig.show()

## 1.1 Temperature, pressure, wind speeds

In [None]:
fig = px.box(
    data_frame = df,
    x = "month",
    y = "u10",

    title = "Wind speed: u10",
    labels = {"u10" :"u10 [m/s]"},
    
    color_discrete_sequence = plt_style_s
)

scale_show(fig)

In [None]:
fig = px.box(
    data_frame = df,
    x = "month",
    y = "v10",

    title = "Wind speed: v10",
    labels = {"v10" :"v10 [m/s]"},
    
    color_discrete_sequence = plt_style_s
)

scale_show(fig)

In [None]:
fig = px.histogram(
    data_frame = df,
    x = ["v10", "u10"],
    histnorm = "probability density",
    title = "Distribution: v10",
 
    barmode = "overlay",
    opacity = 0.9,

    color_discrete_sequence = plt_style_s,
)

scale_show(fig)

In [None]:
fig = px.box(
    data_frame = df,
    x = "month",
    y = "t2m",

    title = "Temperature",
    labels = {"t2m" :"t2m [°k]"},
    
    color_discrete_sequence = plt_style_s
)

scale_show(fig)

In [None]:
fig = px.box(
    data_frame = df,
    x = "year",
    y = "t2m",

    title = "Temperature",
    labels = {"t2m" :"t2m [°k]"},
    
    color_discrete_sequence = plt_style_s
)

scale_show(fig)

In [None]:
fig = px.line(
    data_frame = df.iloc[::10],
    x = "date",
    y = "t2m",

    title = "Temperature",
    labels = {"t2m" :"t2m [k]"},

    color_discrete_sequence = plt_style_s
)

scale_show(fig)

In [None]:
fig = px.scatter(
    data_frame = df.iloc[::2],
    x = "date",
    y = "t2m",

    title = "Temperature",
    labels = {"t2m" :"t2m [k]"},

    color_discrete_sequence = plt_style_c,
    trendline = "ols",
    trendline_color_override = "red",

)

scale_show(fig)

In [None]:
fig = px.histogram(
    data_frame = df,
    x = "t2m",
    histnorm = "probability density",
    title = "Distribution: t2m",
 
    color = "month",
    barmode = "stack",
    opacity = 1,

    nbins = 200,

    labels = {"t2m" : "t2m [°k]"},

    color_discrete_sequence = plt_style_c,
)

scale_show(fig)

In [None]:
fig = px.line(
    data_frame = df.iloc[15000:],
    x = "date",
    y = "cdir",

    title = "Clear sky solar iradiation",
    labels = {"cdir" :"cdir [j/m^2]"},

    color_discrete_sequence = plt_style_s
)

scale_show(fig)

In [None]:
fig = px.box(
    data_frame = df,
    x = "month",
    y = "cdir",

    title = "Clear sky solar iradiation",
    labels = {"cdir" :"cdir [j/m^2]"},

    color_discrete_sequence = plt_style_s
)

scale_show(fig)

In [None]:
fig = px.line(
    data_frame = df.iloc[15000:],
    x = "date",
    y = "sp",

    title = "Surface pressure",
    labels = {"sp" :"sp [hpa]"},

    color_discrete_sequence = plt_style_s
)

scale_show(fig)

In [None]:
fig = px.box(
    data_frame = df,
    x = "month",
    y = "sp",

    title = "Surface pressure",
    labels = {"sp" :"sp [hpa]"},

    color_discrete_sequence = plt_style_s
)

scale_show(fig)

## 1.2 ENSO / SOI

In [None]:
#some code

fig = px.line(
    data_frame = df.loc[15000:],
    x = "date",
    y = "soi",
    
    title = "SOI",
    color_discrete_sequence = plt_style_s,
)

fig.add_hline(
    y = 0,
    line_color="red",
)

scale_show(fig)

## 1.3 MJO

In [None]:
#some code
#mjo_rmm1, mjo_rmm2, mjo_phase, mjo_amplitude

fig = px.line(
    data_frame = df.loc[::2],
    x = "date",
    y = "mjo_amplitude",

    title = "MJO amplitude",
    color_discrete_sequence = plt_style_s,
)

scale_show(fig)

In [None]:
#some code
#mjo_rmm1, mjo_rmm2, mjo_phase, mjo_amplitude

fig = px.line(
    data_frame = df.loc[15000:],
    x = "date",
    y = "mjo_amplitude",

    title = "MJO amplitude",
    color_discrete_sequence = plt_style_s,
)

scale_show(fig)

In [None]:
fig = px.scatter(
    data_frame = df.iloc[300:5000],
    x = "mjo_rmm1",
    y = "mjo_rmm2",
    color = "mjo_phase",

    title = "MJO with pahse",
    color_discrete_sequence = plt_style_s,

    width = 700,
    height = 700,

    range_x = [-3,3],
    range_y = [-3,3],
)

fig.show()

In [None]:
fig = px.line(
    data_frame = df.iloc[1000:1100],
    x = "mjo_rmm1",
    y = "mjo_rmm2",

    title = "MJO with pahse",
    color_discrete_sequence = plt_style_s,

    width = 700,
    height = 700,

    range_x = [-3,3],
    range_y = [-3,3],
)

fig.show()

In [None]:
fig = px.histogram(
    data_frame = df,
    x = "mjo_amplitude",

    histfunc = "count",
    histnorm = "probability",

    title = "MJO amplitude distribution",
    color_discrete_sequence = plt_style_c,
    barmode = "stack",
)

scale_show(fig)

In [None]:
fig = px.box(
    data_frame = df,
    x = "month",
    y = "mjo_amplitude",
    title = "MJO amplitude distribution",
    color_discrete_sequence = plt_style_s,
)

scale_show(fig)

## 1.4 AO

In [None]:
#some code
fig = px.line(
    data_frame = df.iloc[14000:],
    y = "ao",
    x = "date",

    title = "AO",
    color_discrete_sequence = plt_style_s,
)

scale_show(fig)

In [None]:
fig = px.histogram(
    data_frame = df,
    x = "ao",

    histfunc = "count",
    histnorm = "probability",

    title = "AO distribution",
    color_discrete_sequence = plt_style_c,
    barmode = "stack",
)

scale_show(fig)

In [None]:
fig = px.box(
    data_frame = df,
    x = "month",
    y = "ao",
    title = "AO distribution",
    color_discrete_sequence = plt_style_s,
)

scale_show(fig)

## 1.5 NAO

In [None]:
#some code
fig = px.line(
    data_frame = df.iloc[14000:],
    y = "nao",
    x = "date",

    title = "NAO",
    color_discrete_sequence = plt_style_s,
)

scale_show(fig)

In [None]:
fig = px.histogram(
    data_frame = df,
    x = "nao",

    histfunc = "count",
    histnorm = "probability",

    title = "NAO distribution",
    color_discrete_sequence = plt_style_c,
    barmode = "stack",
)

scale_show(fig)

In [None]:
fig = px.box(
    data_frame = df,
    x = "month",
    y = "nao",
    title = "NAO  distribution",
    color_discrete_sequence = plt_style_s,
)

scale_show(fig)

## 1.6 Variouse

In [None]:
#some code
fig = px.line(
    data_frame = df.iloc[14000:],
    y = [df["ao"].iloc[14000:], df["mjo_amplitude"].iloc[14000:], df["soi"].iloc[14000:]],
    x = "date",

    title = "Climate oscilation indexes",
    color_discrete_sequence = plt_style_s,
)

scale_show(fig) 

## 1.6 PV

In [None]:
#some code
df = pd.read_csv(os.path.join(data_folder, "df_pv.csv"))
df["size"] = 1

In [None]:
df.head(10)

In [None]:
fig = px.scatter_geo(
    df.loc[(df["date"] >= "1979-01-01") & (df["date"] <= "1979-02-01") & (df["level"] == 100)],
    lat="latitude",
    lon="longitude",
    color="speed",
    size = "size",
    color_continuous_scale = plt_style_s,
    size_max=7,
    opacity = 0.75,
    animation_frame = "date",
    labels = {"speed" : "wind speed [m/s]"},
    title = "Polar vortex"
    #zoom=10,
    )


#adjust view
fig.update_layout(
    margin ={'l':0,'t':0,'b':0,'r':0},
    mapbox = {
        'center': {'lon': 8.4, 'lat': 60},
        'style': "carto-positron",
        #'zoom': 3,
        #"projection": "albers usa",
    }
)

#fig.update_geos(projection_type="natural earth")

#update markers

scale_show(fig)

In [None]:
fig = px.scatter_mapbox(
    df.loc[(df["date"] >= "1979-01-01") & (df["date"] <= "1979-02-01")],
    lat="latitude",
    lon="longitude",
    color="speed",
    size = "size",
    color_continuous_scale = plt_style_s,
    size_max=10,
    opacity = 0.5,
    animation_frame = "date",

    labels = {"speed" : "wind speed [m/s]"},
    )


#adjust view
fig.update_layout(
    margin ={'l':0,'t':0,'b':0,'r':0},
    mapbox = {
        'center': {'lon': 8.4, 'lat': 60},
        'style': "carto-positron",
        'zoom': 3,
        #"projection": "albers usa",
    }
)

#fig.update_geos(projection_type="natural earth")

#update markers

scale_show(fig)

In [None]:
plot_scaler = 15
fig = px.scatter(
    data_frame = df.loc[(df["date"] >= "1979-01-01") & (df["date"] <= "1979-01-07")],
    x = "longitude",
    y = "latitude",
    color = "speed",
    size = "size",
    size_max = 1 * plot_scaler - 3,
    opacity = 1,
    facet_col = "date",
    #animation_frame = "date",

    height = 45 * plot_scaler,
    width = (15) * plot_scaler * 6,
    color_continuous_scale  = plt_style_s,

    title = "Polar vortex wind speed",

    labels = {"speed" : "speed [m/s]"},
)


fig.update_traces(
    marker=dict(symbol="square",),
    selector=dict(mode='markers')
)

fig.show()

In [None]:
plot_scaler = 15
fig = px.scatter(
    data_frame = df.loc[(df["date"] >= "1979-01-01") & (df["date"] <= "1979-01-07")],
    x = "longitude",
    y = "latitude",
    color = "t",
    size = "size",
    size_max = 1 * plot_scaler -3,
    opacity = 1,
    facet_col = "date",
    #animation_frame = "date",

    height = 45 * plot_scaler,
    width = (13 + 2) * plot_scaler * 6,
    color_continuous_scale  = plt_style_s,

    title = "Polar vortex temperatures",

    labels = {"t" : "t [°k]"},
)


fig.update_traces(
    marker=dict(symbol="square",),
    selector=dict(mode='markers')
)

fig.show()

In [None]:
plot_scaler = 15
fig = px.scatter(
    data_frame = df.loc[(df["date"] >= "2019-03-01") & (df["date"] <= "2019-03-04")],
    y = "level",
    x = "latitude",
    color = "speed",
    size = "size",
    size_max = 1 * plot_scaler - 3,
    opacity = 1,
    facet_row = "date",
    #animation_frame = "date",

    height = (15 * 4)  * plot_scaler,
    width = 60 * plot_scaler,
    color_continuous_scale =  plt_style_s,

    title = "Polar vortex wind speed",

    labels = {"speed" : "speed [m/s]"},
)

fig['layout']['yaxis']['autorange'] = "reversed"

fig.update_traces(
    marker=dict(symbol="square",),
    selector=dict(mode='markers')
)

fig.show()

In [None]:
plot_scaler = 15
fig = px.scatter(
    data_frame = df.loc[(df["date"] >= "2019-03-01") & (df["date"] <= "2019-03-04")],
    y = "level",
    x = "latitude",
    color = "t",
    size = "size",
    size_max = 1 * plot_scaler - 3,
    opacity = 1,
    facet_row = "date",
    #animation_frame = "date",

    height = (15 * 4)  * plot_scaler,
    width = 60 * plot_scaler,
    color_continuous_scale =  plt_style_s,

    title = "Polar vortex temperature",

    labels = {"t" : "t [°k]"},
)

fig['layout']['yaxis']['autorange'] = "reversed"

fig.update_traces(
    marker=dict(symbol="square",),
    selector=dict(mode='markers')
)

fig.show()

In [None]:
del df