# Playground notebook

## Imports and settings

In [None]:
import src

In [None]:
import os
import numpy as np
import sys
import re
import math
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from  matplotlib.animation import FuncAnimation
from matplotlib import colors
from netCDF4 import Dataset
from IPython.display import clear_output
#data folder
sys.path.insert(0, 'C:/Users/pkicsiny/Desktop/TUM/3/ADL4CV/data/rx')

In [None]:
#forces CPU usage
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = "0" #"" for CPU
import tensorflow as tf
from tensorflow import keras
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

______________________________________-
## Data preprocessing

### Rain measurements
Measurements are downloaded from the DWD (German weather service) open data server: __ftp://ftp-cdc.dwd.de/pub/CDC/grids_germany/hourly/__<br>
I'm working with the data of August 2010 (based on [this](https://tradingeconomics.com/germany/precipitation)), so I have downloaded this: __ftp://ftp-cdc.dwd.de/pub/CDC/grids_germany/hourly/radolan/historical/asc/2010/RW-201008.tar__<br>
_DWD manual:_<br>
__ftp://ftp-cdc.dwd.de/pub/CDC/grids_germany/hourly/radolan/historical/asc/BESCHREIBUNG_gridsgermany_hourly_radolan_historical_asc_de.pdf__<br><br>
This contains radar maps recorded in every hour. Each map has a resolution of $900\times900$ pixels and each pixel corresponds to an $1\,km\times1\,km$ area in reality. Pixel values are the precipitation height in $0.1\,mm$.
Below I'm importing the data of this as a series of numpy arrays and plot them to see the acual radar map. The _sys.path[0]_ is the path on my computer and it can be different for you.

In [None]:
files = os.listdir("../data/rx")
total_length = len(files)
inputs = np.zeros((total_length,900,900))

In [None]:
title_5min = f"{files[i][19:21]}.{files[i][17:19]}.20{files[i][15:17]} {files[i][21:23]}:{files[i][23:25]}"
title_h = f"{files[i][9:11]}.{files[i][7:9]}.{files[i][3:7]} {files[i][12:14]}:{files[i][14:16]}"

In [None]:
title_5min

In [None]:
for i, file in enumerate(files):
    if i%10 == 0:
        print(i, end=", ")
    with open(sys.path[0] + '/' + file, "rb") as f:
        byte = f.read()
        start = 0
        for j in range(len(byte)) :
            if byte[j] == 62 :
                start = j
                break
        inputs[i] = np.flip(np.reshape(np.asarray([c for c in byte[start+3:]]),(900,900)), axis=0)
        inputs[i][inputs[i] == 250] = -1

Hourly data

In [None]:
totalLength = len(files)
#array of inputs
inputs = np.empty((totalLength,900,900))
print(inputs.shape)

In [None]:
#import data to the inputs array
for i, file in enumerate(files):
    clear_output(wait=True)
    print(f"[{i+1}/{totalLength}]")
    ascii_grid = np.loadtxt(f"{dataDir}/{files[i]}", skiprows=6)
    inputs[i] = ascii_grid

In [None]:
inputs = preproc.mask_data(inputs,100)

In [None]:
# look over the data
%matplotlib notebook
from  matplotlib.animation import FuncAnimation

fig = plt.figure()
plot = plt.imshow(np.ma.masked_where(inputs[0] < 0, inputs[0])) #inputs 100 has a good mask
plt.colorbar()
plt.xlabel("Longitude pixels")
plt.ylabel("Latitude pixels")
def update(i):
    plot.set_data(np.ma.masked_where(inputs[i] < 0, inputs[i]))
    plt.title(f"Index in array: {i}\nDate: "+f"{files[i][19:21]}.{files[i][17:19]}.20{files[i][15:17]} {files[i][21:23]}:{files[i][23:25]}")
    plt.savefig(f"{i}")
    return plot

anim = FuncAnimation(fig, update, frames=100, interval = 200, blit=True, repeat=False)

plt.show()

### Wind measurements
Here I'm loading wind measurements. This is important because later we might experiment not only with rain (the density field) but with the wind (velocity field) too. This is because in the tempoGAN paper they also use the velocity and without it that might be very difficult to build the GAN (unless you have some other ideas). So for now I'll try this way. I have downloaded two datasets for the wind:<br><br>
_Wind direction:_ __ftp://ftp-cdc.dwd.de/pub/CDC/grids_germany/hourly/Project_TRY/wind_direction/DD_201208.nc.gz__<br>
_Wind speed:_ __ftp://ftp-cdc.dwd.de/pub/CDC/grids_germany/hourly/Project_TRY/wind_speed/FF_201208.nc.gz__<br>
_DWD manuals:_<br>
__ftp://ftp-cdc.dwd.de/pub/CDC/grids_germany/hourly/Project_TRY/wind_direction/DESCRIPTION_gridsgermany_hourly_Project_TRY_wind_direction_en.pdf__
<br>
__ftp://ftp-cdc.dwd.de/pub/CDC/grids_germany/hourly/Project_TRY/wind_speed/DESCRIPTION_gridsgermany_hourly_Project_TRY_wind_speed_en.pdf__<br><br>
I create two datasets out of these. They contain the $x$ and $y$ velocity components.
The measurements are in $\displaystyle0.1\,\frac{m}{s}$.
<br><br>
_<font color='red'>__Issue__:</font> [Mostly](https://www.nationalgeographic.com/science/earth/earths-atmosphere/clouds/) an average rain cloud is at $2000\,m$ high. Normal clouds can be up to $6000\,m$ high. We only have wind measurements recorded at $10\,m$. 
We can use e.g. [this](https://websites.pmc.ucsc.edu/~jnoble/wind/extrap/) to calculate wind speed at higher altitudes. We can't correct the change in wind direction so this remains a weak point in the simulations._

In [None]:
#the paths are probably different for you
w_dir = Dataset(sys.path[0]+'/wind_direction/DD_201008_CF.nc')  # direction
w_vel = Dataset(sys.path[0]+'/wind_speed/FF_201008_CF.nc')  # velocity
cloud = Dataset(sys.path[0]+'/cloud/N_201008_CF.nc')['CF']  # cloud cover

In [None]:
w_dir.variables.keys()

In [None]:
#creating vx and vy datasets from |v| and phi
vx = np.empty_like(w_dir['DD'])
vy = np.empty_like(w_vel['FF'])
for t in range(np.shape(vx)[0]):
    vx[t] = w_vel['FF'][t]*np.sin(np.deg2rad(w_dir['DD'][t]))
    vy[t] = w_vel['FF'][t]*np.cos(np.deg2rad(w_dir['DD'][t]))

In [None]:
cloud['datum'][0]

In [None]:
np.shape(rho[1])

In [None]:
# look over the data
%matplotlib notebook
from  matplotlib.animation import FuncAnimation

fig = plt.figure()
plot = plt.imshow(np.flip(vx[0][300:332,300:332], axis=0)) #inputs 100 has a good mask
plt.colorbar()
plt.xlabel("Longitude pixels")
plt.ylabel("Latitude pixels")
def update(i):
    plot.set_data(np.flip(vx[i][300:332,300:332], axis=0))
    return plot

anim = FuncAnimation(fig, update, frames=744, interval = 200, blit=True, repeat=False)

plt.show()

In [None]:
plt.imshow(np.flip(rho, axis=0))
plt.title(cloud['datum'][740])

In [None]:
#sanity checks
%matplotlib inline

# NE, N, NW
plt.subplot(2,2,1)
plt.imshow(np.flip(np.ma.masked_where(vy >0, vy), axis=0))
plt.colorbar(shrink=0.7)
plt.title("All northern wind")

plt.subplot(2,2,2)
plt.imshow(np.flip(np.ma.masked_where((w_dir['DD'][0]<90)| (w_dir['DD'][0]>270),w_dir['DD'][0] ), axis=0))
plt.colorbar(shrink=0.7)
plt.title("All northern wind truth")

# SW
plt.subplot(2,2,3)
plt.imshow(np.flip(np.ma.masked_where((vx<0)|(vy<0), vy), axis=0))
plt.colorbar(shrink=0.7)
plt.title("Southwestern wind")

plt.subplot(2,2,4)
plt.imshow(np.flip(np.ma.masked_where((w_dir['DD'][0]>90)| (w_dir['DD'][0]>360),w_dir['DD'][0] ), axis=0))
plt.colorbar(shrink=0.7)
plt.title("Southwestern wind truth")

plt.subplots_adjust(hspace=0.3, wspace=0.3)

### Germany
GPS coordinates of Germany.

In [None]:
germany = pd.DataFrame(data={'LON':np.floor(w_vel['lon'][:][~w_vel['FF'][0].mask]*100)/100,
                             'LAT':np.floor(w_vel['lat'][:][~w_vel['FF'][0].mask]*100)/100})[['LON','LAT']]

In [None]:
test = pd.DataFrame(data={'gLON':germany['LON'], 'cLON':coords['LON']})
test[test['cLON'] <=10.1844]

In [None]:
trunc = ((100*coords[['LAT','LON']]).apply(np.floor)/100).reset_index().rename(columns={'index':'CELL_ID'})
trunc

In [None]:
everything = pd.concat([trunc, germany])
res = everything[everything.duplicated(subset=('LAT','LON'), keep='last')].dropna()
len(res)

In [None]:
vx.max

In [None]:
everything[everything['LAT'] == 46.96]

In [None]:
%matplotlib inline
plt.scatter(res['LON'],res['LAT'])

In [None]:
inside = ((res['CELL_ID'].values//900).astype(int), (res['CELL_ID'].values%900).astype(int))
inside

In [None]:
c = np.zeros(inputs[0].shape,bool)
c[inside] = True

In [None]:
len(c[c])

In [None]:
%matplotlib notebook
plt.imshow(np.ma.masked_where((~np.flip(c, axis=0)),inputs[1]))

### Radar coordinates
The gps coordinates of the radar grid for the rain measurements.

In [None]:
lon, lat = [pd.DataFrame([re.findall('..\......',row[0]) for idx,
                    row in pd.read_table(sys.path[0]+f"/rain_grid_coordinates/{file}_center.txt",
                    header=None).iterrows()]) for file in ['lambda', 'phi']]

In [None]:
coords = pd.DataFrame(columns={"LAT", "LON"})
coords["LAT"] = pd.Series([item for sublist in lat.values.tolist() for item in sublist]).astype(float)
coords["LON"] = pd.Series([item for sublist in lon.values.tolist() for item in sublist]).astype(float)
coords["CELL_ID"] = coords.index.values

In [None]:
#radar map
%matplotlib inline
plt.scatter(coords['LON'],coords['LAT'])