In [1]:
import pandas as pd
import numpy as np
import xarray as xr
from IPython.display import clear_output
from glob import glob

### Loading NetCDF (nc) files to pandas DataFrame

In [2]:
file_name = '1.1.0'
data_df = xr.open_dataset('../raw_data/'+file_name+'.nc').to_dataframe().reset_index()
print(data_df.shape)
data_df.head(2)

(36189648, 10)


Unnamed: 0,latitude,longitude,time,u10,v10,t2m,lai_hv,lai_lv,sp,tp
0,36.0,68.0,2019-01-01 00:00:00,0.664481,2.946031,271.367676,1.022075,0.571653,80007.523438,0.0
1,36.0,68.0,2019-01-01 01:00:00,1.381402,2.377072,272.530304,1.022075,0.571653,80046.03125,1.2e-05


### Calculate and add magnitude of wind speed

In [3]:
data_df['wind_magnitude_10m'] = np.sqrt(np.square(data_df['u10'].values) + np.square(data_df['v10'].values))
print(data_df.shape)
data_df.head(2)

(36189648, 11)


Unnamed: 0,latitude,longitude,time,u10,v10,t2m,lai_hv,lai_lv,sp,tp,wind_magnitude_10m
0,36.0,68.0,2019-01-01 00:00:00,0.664481,2.946031,271.367676,1.022075,0.571653,80007.523438,0.0,3.020039
1,36.0,68.0,2019-01-01 01:00:00,1.381402,2.377072,272.530304,1.022075,0.571653,80046.03125,1.2e-05,2.749317


### Convert time to pandas datetime

In [4]:
data_df['time'] = pd.to_datetime(data_df['time'])
print(data_df.shape)
data_df.head(2)

(36189648, 11)


Unnamed: 0,latitude,longitude,time,u10,v10,t2m,lai_hv,lai_lv,sp,tp,wind_magnitude_10m
0,36.0,68.0,2019-01-01 00:00:00,0.664481,2.946031,271.367676,1.022075,0.571653,80007.523438,0.0,3.020039
1,36.0,68.0,2019-01-01 01:00:00,1.381402,2.377072,272.530304,1.022075,0.571653,80046.03125,1.2e-05,2.749317


### Rename column headers to sensible names

In [5]:
data_df = data_df.rename(columns={'u10':'wind_u_10m', 'v10':'wind_v_10m', 'sp':'surface_pressure', 
                                  'tp':'total_precipitation', 't2m':'temp_2m'})
print(data_df.shape)
data_df.head(2)

(36189648, 11)


Unnamed: 0,latitude,longitude,time,wind_u_10m,wind_v_10m,temp_2m,lai_hv,lai_lv,surface_pressure,total_precipitation,wind_magnitude_10m
0,36.0,68.0,2019-01-01 00:00:00,0.664481,2.946031,271.367676,1.022075,0.571653,80007.523438,0.0,3.020039
1,36.0,68.0,2019-01-01 01:00:00,1.381402,2.377072,272.530304,1.022075,0.571653,80046.03125,1.2e-05,2.749317


### Saving processed file as pickle

In [6]:
data_df.to_pickle('../processed_data/weather_v'+file_name+'.pickle')