In general correlation increases as the number of days increases (although this is not as true for the temperature variable)

In [1]:
import xarray as xr

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
DATADIR = '/rds/general/user/mc4117/home/WeatherBench/data/'  

In [17]:
temp = xr.open_mfdataset(f'{DATADIR}temperature/*.nc', combine='by_coords')
temp850 = xr.open_mfdataset(f'{DATADIR}temperature_850/*.nc', combine='by_coords')
spec_humid = xr.open_mfdataset(f'{DATADIR}specific_humidity/*.nc', combine='by_coords')
geo = xr.open_mfdataset(f'{DATADIR}geopotential/*.nc', combine='by_coords')
geo500 = xr.open_mfdataset(f'{DATADIR}geopotential_500/*.nc', combine='by_coords')
pot_vort = xr.open_mfdataset(f'{DATADIR}potential_vorticity/*.nc', combine='by_coords')
u_wind = xr.open_mfdataset(f'{DATADIR}u_component_of_wind/*.nc', combine='by_coords')
v_wind = xr.open_mfdataset(f'{DATADIR}v_component_of_wind/*.nc', combine='by_coords')
rel_hum = xr.open_mfdataset(f'{DATADIR}relative_humidity/*.nc', combine='by_coords')
vort = xr.open_mfdataset(f'{DATADIR}vorticity/*.nc', combine='by_coords')

In [4]:
def covariance(x, y, dim=None):
    valid_values = x.notnull() & y.notnull()
    valid_count = valid_values.sum(dim)

    demeaned_x = (x - x.mean(dim)).fillna(0)
    demeaned_y = (y - y.mean(dim)).fillna(0)
    
    return xr.dot(demeaned_x, demeaned_y, dims=dim) / valid_count

def correlation(x, y, dim=None):
    # dim should default to the intersection of x.dims and y.dims
    return covariance(x, y, dim) / (x.std(dim) * y.std(dim))

In [5]:
def correlation_table(data1, data2_level, lead_time):

    data1_correct = data1.sel(time = slice(data1.time[lead_time], None))
    data2_known = data2_level.sel(time = slice(None, data2_level.time[-(lead_time +1)]))
    
    level_values = data2_known.level.values
    
    level_sets = [data2_known.sel(level = i) for i in level_values]
    
    corr = [correlation(i, data1_correct).values for i in level_sets]

    df = pd.concat([pd.DataFrame(level_values, columns = ['Levels']), pd.DataFrame(corr, columns = ['Correlation_' + str(lead_time)])], axis = 1)
    
    return level_values, corr, df

## Correlation to tmp850

In [56]:
level_values_temp_0, corr_temp_0, df_temp_0 = correlation_table(temp850.t, temp.t, 0)
level_values_temp_72, corr_temp_72, df_temp_72 = correlation_table(temp850.t, temp.t, 72)
level_values_temp_120, corr_temp_120, df_temp_120 = correlation_table(temp850.t, temp.t, 120)

pd.concat([df_temp_0, df_temp_72.drop(['Levels'], axis =1), df_temp_120.drop(['Levels'], axis =1)], axis = 1)

Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,-0.034967,-0.034809,-0.034708
1,100,-0.458015,-0.457921,-0.457855
2,150,-0.284129,-0.284021,-0.283943
3,200,0.269933,0.27002,0.270088
4,250,0.762717,0.762748,0.762777
5,300,0.908472,0.908493,0.908513
6,400,0.94327,0.943287,0.943301
7,500,0.95395,0.953968,0.95398
8,600,0.963933,0.963956,0.96397
9,700,0.979782,0.979813,0.979832


In [6]:
level_values_pv_0, corr_pv_0, df_pv_0 = correlation_table(temp850.t, pot_vort.pv, 0)
level_values_pv_72, corr_pv_72, df_pv_72 = correlation_table(temp850.t, pot_vort.pv, 72)
level_values_pv_120, corr_pv_120, df_pv_120 = correlation_table(temp850.t, pot_vort.pv, 120)

pd.concat([df_pv_0, df_pv_72.drop(['Levels'], axis =1), df_pv_120.drop(['Levels'], axis =1)], axis = 1)

Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,0.181532,0.181702,0.181811
1,100,0.155364,0.155497,0.155585
2,150,0.140742,0.140869,0.140952
3,200,0.111629,0.11174,0.111817
4,250,0.071633,0.071718,0.071777
5,300,0.053479,0.053574,0.053638
6,400,0.085946,0.086086,0.086184
7,500,0.149889,0.150038,0.15014
8,600,0.15961,0.159679,0.159724
9,700,0.247689,0.24778,0.247838


In [51]:
level_values_hum_0, corr_hum_0, df_hum_0 = correlation_table(temp850.t, spec_humid.q, 0)
level_values_hum_72, corr_hum_72, df_hum_72 = correlation_table(temp850.t, spec_humid.q, 72)
level_values_hum_120, corr_hum_120, df_hum_120 = correlation_table(temp850.t, spec_humid.q, 120)

pd.concat([df_hum_0, df_hum_72.drop(['Levels'], axis =1), df_hum_120.drop(['Levels'], axis =1)], axis = 1)

Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,0.080888,0.080938,0.080969
1,100,0.210104,0.2103,0.210436
2,150,0.648467,0.648493,0.64851
3,200,0.619775,0.619786,0.619792
4,250,0.577071,0.57708,0.577082
5,300,0.547593,0.547605,0.547607
6,400,0.515883,0.515896,0.515898
7,500,0.535203,0.535213,0.535216
8,600,0.602929,0.602939,0.602941
9,700,0.671112,0.671127,0.671132


In [52]:
level_values_geo_0, corr_geo_0, df_geo_0 = correlation_table(temp850.t, geo.z, 0)
level_values_geo_72, corr_geo_72, df_geo_72 = correlation_table(temp850.t, geo.z, 72)
level_values_geo_120, corr_geo_120, df_geo_120 = correlation_table(temp850.t, geo.z, 120)

pd.concat([df_geo_0, df_geo_72.drop(['Levels'], axis =1), df_geo_120.drop(['Levels'], axis =1)], axis = 1)

Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,0.727277,0.72734,0.727388
1,100,0.902568,0.902603,0.90263
2,150,0.944963,0.944992,0.945013
3,200,0.951948,0.951973,0.95199
4,250,0.952404,0.952429,0.952444
5,300,0.950823,0.950847,0.950862
6,400,0.945276,0.945301,0.945316
7,500,0.93468,0.934707,0.934722
8,600,0.915054,0.915083,0.915099
9,700,0.87698,0.877011,0.877028


In [9]:
level_values_uwind_0, corr_uwind_0, df_uwind_0 = correlation_table(temp850.t, u_wind.u, 0)
level_values_uwind_72, corr_uwind_72, df_uwind_72 = correlation_table(temp850.t, u_wind.u, 72)
level_values_uwind_120, corr_uwind_120, df_uwind_120 = correlation_table(temp850.t, u_wind.u, 120)

pd.concat([df_uwind_0, df_uwind_72.drop(['Levels'], axis =1), df_uwind_120.drop(['Levels'], axis =1)], axis = 1)

Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,-0.447923,-0.447946,-0.447961
1,100,-0.182675,-0.18274,-0.182783
2,150,0.02884,0.028778,0.028738
3,200,0.062356,0.062315,0.062288
4,250,0.037009,0.036977,0.036957
5,300,0.010063,0.010035,0.010018
6,400,-0.030779,-0.0308,-0.030811
7,500,-0.059573,-0.059584,-0.059588
8,600,-0.07134,-0.071337,-0.071333
9,700,-0.068216,-0.068199,-0.068185


In [10]:
level_values_vwind_0, corr_vwind_0, df_vwind_0 = correlation_table(temp850.t, v_wind.v, 0)
level_values_vwind_72, corr_vwind_72, df_vwind_72 = correlation_table(temp850.t, v_wind.v, 72)
level_values_vwind_120, corr_vwind_120, df_vwind_120 = correlation_table(temp850.t, v_wind.v, 120)

pd.concat([df_vwind_0, df_vwind_72.drop(['Levels'], axis =1), df_vwind_120.drop(['Levels'], axis =1)], axis = 1)

Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,-0.008289,-0.008285,-0.008277
1,100,-0.006562,-0.006567,-0.006567
2,150,-0.01076,-0.010781,-0.010789
3,200,-0.01204,-0.012061,-0.012072
4,250,-0.011531,-0.011548,-0.011559
5,300,-0.011553,-0.011568,-0.011576
6,400,-0.013114,-0.01313,-0.013139
7,500,-0.015544,-0.015567,-0.015579
8,600,-0.018919,-0.018945,-0.018958
9,700,-0.032393,-0.03242,-0.032437


In [14]:
level_values_rh_0, corr_rh_0, df_rh_0 = correlation_table(temp850.t, rel_hum.r, 0)
level_values_rh_72, corr_rh_72, df_rh_72 = correlation_table(temp850.t, rel_hum.r, 72)
level_values_rh_120, corr_rh_120, df_rh_120 = correlation_table(temp850.t, rel_hum.r, 120)

pd.concat([df_rh_0, df_rh_72.drop(['Levels'], axis =1), df_rh_120.drop(['Levels'], axis =1)], axis = 1)


Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,-0.371858,-0.371966,-0.372045
1,100,0.447297,0.447228,0.447178
2,150,0.427615,0.427555,0.427514
3,200,0.325084,0.325016,0.324968
4,250,0.067658,0.067601,0.067554
5,300,-0.245612,-0.245626,-0.24566
6,400,-0.429238,-0.429233,-0.429242
7,500,-0.412299,-0.412297,-0.412304
8,600,-0.344237,-0.344237,-0.344243
9,700,-0.360056,-0.36006,-0.360072


In [18]:
level_values_rv_0, corr_rv_0, df_rv_0 = correlation_table(temp850.t, vort.vo, 0)
level_values_rv_72, corr_rv_72, df_rv_72 = correlation_table(temp850.t, vort.vo, 72)
level_values_rv_120, corr_rv_120, df_rv_120 = correlation_table(temp850.t, vort.vo, 120)

pd.concat([df_rv_0, df_rv_72.drop(['Levels'], axis =1), df_rv_120.drop(['Levels'], axis =1)], axis = 1)


Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,0.142286,0.142445,0.142555
1,100,0.082952,0.083057,0.083131
2,150,0.042654,0.042718,0.042763
3,200,0.021706,0.021748,0.021778
4,250,0.009101,0.009129,0.00915
5,300,0.003251,0.003276,0.003291
6,400,-0.00114,-0.001117,-0.001103
7,500,-0.004858,-0.004834,-0.00482
8,600,-0.012682,-0.012665,-0.012652
9,700,-0.034512,-0.034505,-0.034502


## Correlation to z500

In [53]:
level_values_temp_0, corr_temp_0, df_temp_0 = correlation_table(geo500.z, temp.t, 0)
level_values_temp_72, corr_temp_72, df_temp_72 = correlation_table(geo500.z, temp.t, 72)
level_values_temp_120, corr_temp_120, df_temp_120 = correlation_table(geo500.z, temp.t, 120)

pd.concat([df_temp_0, df_temp_72.drop(['Levels'], axis =1), df_temp_120.drop(['Levels'], axis =1)], axis = 1)

Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,-0.079411,-0.079202,-0.079072
1,100,-0.486386,-0.486253,-0.486165
2,150,-0.330765,-0.330615,-0.330512
3,200,0.201631,0.201758,0.201852
4,250,0.720894,0.720935,0.720975
5,300,0.894456,0.894469,0.89449
6,400,0.943129,0.943133,0.943146
7,500,0.953557,0.95356,0.953571
8,600,0.955405,0.95541,0.955421
9,700,0.946261,0.94627,0.946282


In [54]:
level_values_hum2_0, corr_hum2_0, df_hum2_0 = correlation_table(geo500.z, spec_humid.q, 0)
level_values_hum2_72, corr_hum2_72, df_hum2_72 = correlation_table(geo500.z, spec_humid.q, 72)
level_values_hum2_120, corr_hum2_120, df_hum2_120 = correlation_table(geo500.z, spec_humid.q, 120)

pd.concat([df_hum2_0, df_hum2_72.drop(['Levels'], axis =1), df_hum2_120.drop(['Levels'], axis =1)], axis = 1)

Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,0.077275,0.077314,0.077337
1,100,0.233139,0.233331,0.233462
2,150,0.645022,0.645038,0.645052
3,200,0.612807,0.612808,0.612811
4,250,0.568087,0.568091,0.568092
5,300,0.536333,0.536341,0.536342
6,400,0.498808,0.498816,0.498817
7,500,0.504809,0.504813,0.504815
8,600,0.556295,0.556296,0.556296
9,700,0.614486,0.614491,0.614492


In [7]:
level_values_pv2_0, corr_pv2_0, df_pv2_0 = correlation_table(geo500.z, pot_vort.pv, 0)
level_values_pv2_72, corr_pv2_72, df_pv2_72 = correlation_table(geo500.z, pot_vort.pv, 72)
level_values_pv2_120, corr_pv2_120, df_pv2_120 = correlation_table(geo500.z, pot_vort.pv, 120)

pd.concat([df_pv2_0, df_pv2_72.drop(['Levels'], axis =1), df_pv2_120.drop(['Levels'], axis =1)], axis = 1)

Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,0.2522,0.252337,0.252416
1,100,0.237994,0.238097,0.238158
2,150,0.224988,0.225086,0.225142
3,200,0.196488,0.19657,0.196619
4,250,0.154033,0.154091,0.154124
5,300,0.130113,0.130187,0.13023
6,400,0.14587,0.145991,0.146068
7,500,0.20243,0.202548,0.202622
8,600,0.154837,0.154887,0.154918
9,700,0.219506,0.219571,0.219613


In [55]:
level_values_geo2_0, corr_geo2_0, df_geo2_0 = correlation_table(geo500.z, geo.z, 0)
level_values_geo2_72, corr_geo2_72, df_geo2_72 = correlation_table(geo500.z, geo.z, 72)
level_values_geo2_120, corr_geo2_120, df_geo2_120 = correlation_table(geo500.z, geo.z, 120)

pd.concat([df_geo2_0, df_geo2_72.drop(['Levels'], axis =1), df_geo2_120.drop(['Levels'], axis =1)], axis = 1)

Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,0.729091,0.729174,0.729239
1,100,0.919129,0.919166,0.9192
2,150,0.968071,0.968093,0.968115
3,200,0.981644,0.981658,0.981675
4,250,0.98873,0.988741,0.988755
5,300,0.992908,0.992918,0.99293
6,400,0.998017,0.998027,0.998038
7,500,0.999999,1.000011,1.000022
8,600,0.997165,0.997179,0.99719
9,700,0.984928,0.984944,0.984956


In [11]:
level_values_uwind2_0, corr_uwind2_0, df_uwind2_0 = correlation_table(geo500.z, u_wind.u, 0)
level_values_uwind2_72, corr_uwind2_72, df_uwind2_72 = correlation_table(geo500.z, u_wind.u, 72)
level_values_uwind2_120, corr_uwind2_120, df_uwind2_120 = correlation_table(geo500.z, u_wind.u, 120)

pd.concat([df_uwind2_0, df_uwind2_72.drop(['Levels'], axis =1), df_uwind2_120.drop(['Levels'], axis =1)], axis = 1)

Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,-0.504378,-0.504442,-0.504492
1,100,-0.208922,-0.209025,-0.209098
2,150,0.025223,0.025135,0.025074
3,200,0.064873,0.064809,0.064765
4,250,0.038595,0.038543,0.038507
5,300,0.010569,0.010522,0.010489
6,400,-0.032903,-0.032944,-0.032971
7,500,-0.067433,-0.067465,-0.067485
8,600,-0.088101,-0.088118,-0.08813
9,700,-0.09817,-0.098172,-0.098174


In [12]:
level_values_vwind2_0, corr_vwind2_0, df_vwind2_0 = correlation_table(geo500.z, v_wind.v, 0)
level_values_vwind2_72, corr_vwind2_72, df_vwind2_72 = correlation_table(geo500.z, v_wind.v, 72)
level_values_vwind2_120, corr_vwind2_120, df_vwind2_120 = correlation_table(geo500.z, v_wind.v, 120)

pd.concat([df_vwind2_0, df_vwind2_72.drop(['Levels'], axis =1), df_vwind2_120.drop(['Levels'], axis =1)], axis = 1)

Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,-0.000892,-0.000862,-0.000855
1,100,0.000958,0.000978,0.000984
2,150,-0.003983,-0.003986,-0.003989
3,200,-0.004112,-0.004115,-0.00412
4,250,-0.001972,-0.001972,-0.001975
5,300,-0.000265,-0.000262,-0.000262
6,400,-0.000126,-0.000123,-0.000122
7,500,-0.0019,-0.001903,-0.001903
8,600,-0.004745,-0.004751,-0.004752
9,700,-0.019109,-0.019117,-0.019124


In [15]:
level_values_rh2_0, corr_rh2_0, df_rh2_0 = correlation_table(geo500.z, rel_hum.r, 0)
level_values_rh2_72, corr_rh2_72, df_rh2_72 = correlation_table(geo500.z, rel_hum.r, 72)
level_values_rh2_120, corr_rh2_120, df_rh2_120 = correlation_table(geo500.z, rel_hum.r, 120)

pd.concat([df_rh2_0, df_rh2_72.drop(['Levels'], axis =1), df_rh2_120.drop(['Levels'], axis =1)], axis = 1)


Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,-0.329358,-0.32947,-0.329547
1,100,0.447418,0.447344,0.447294
2,150,0.440599,0.440531,0.440488
3,200,0.368928,0.368839,0.368783
4,250,0.119109,0.119036,0.118981
5,300,-0.218789,-0.218797,-0.218828
6,400,-0.449688,-0.449667,-0.44967
7,500,-0.459123,-0.459107,-0.459109
8,600,-0.411524,-0.41151,-0.411512
9,700,-0.425892,-0.425879,-0.425883


In [19]:
level_values_rv2_0, corr_rv2_0, df_rv2_0 = correlation_table(geo500.z, vort.vo, 0)
level_values_rv2_72, corr_rv2_72, df_rv2_72 = correlation_table(geo500.z, vort.vo, 72)
level_values_rv2_120, corr_rv2_120, df_rv2_120 = correlation_table(geo500.z, vort.vo, 120)

pd.concat([df_rv2_0, df_rv2_72.drop(['Levels'], axis =1), df_rv2_120.drop(['Levels'], axis =1)], axis = 1)


Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,0.185843,0.186012,0.186127
1,100,0.136776,0.136896,0.136975
2,150,0.08726,0.087335,0.087383
3,200,0.058316,0.058368,0.058399
4,250,0.03914,0.039176,0.039197
5,300,0.029448,0.029478,0.029495
6,400,0.024364,0.024395,0.02441
7,500,0.024222,0.024255,0.024272
8,600,0.019895,0.019922,0.019939
9,700,-0.002002,-0.001985,-0.001977
