In general correlation increases as the number of days increases (although this is not as true for the temperature variable)

In [1]:
import xarray as xr

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
DATADIR = '/rds/general/user/mc4117/home/WeatherBench/data/'  

In [3]:
temp = xr.open_mfdataset(f'{DATADIR}temperature/*.nc', combine='by_coords')
temp850 = xr.open_mfdataset(f'{DATADIR}temperature_850/*.nc', combine='by_coords')
spec_humid = xr.open_mfdataset(f'{DATADIR}specific_humidity/*.nc', combine='by_coords')
geo = xr.open_mfdataset(f'{DATADIR}geopotential/*.nc', combine='by_coords')
geo500 = xr.open_mfdataset(f'{DATADIR}geopotential_500/*.nc', combine='by_coords')

In [4]:
def covariance(x, y, dim=None):
    valid_values = x.notnull() & y.notnull()
    valid_count = valid_values.sum(dim)

    demeaned_x = (x - x.mean(dim)).fillna(0)
    demeaned_y = (y - y.mean(dim)).fillna(0)
    
    return xr.dot(demeaned_x, demeaned_y, dims=dim) / valid_count

def correlation(x, y, dim=None):
    # dim should default to the intersection of x.dims and y.dims
    return covariance(x, y, dim) / (x.std(dim) * y.std(dim))

In [45]:
def correlation_table(data1, data2_level, lead_time):

    data1_correct = data1.sel(time = slice(data1.time[lead_time], None))
    data2_known = data2_level.sel(time = slice(None, data2_level.time[-(lead_time +1)]))
    
    level_values = data2_known.level.values
    
    level_sets = [data2_known.sel(level = i) for i in level_values]
    
    corr = [correlation(i, data1_correct).values for i in level_sets]

    df = pd.concat([pd.DataFrame(level_values, columns = ['Levels']), pd.DataFrame(corr, columns = ['Correlation_' + str(lead_time)])], axis = 1)
    
    return level_values, corr, df

## Correlation to tmp850

In [56]:
level_values_temp_0, corr_temp_0, df_temp_0 = correlation_table(temp850.t, temp.t, 0)
level_values_temp_72, corr_temp_72, df_temp_72 = correlation_table(temp850.t, temp.t, 72)
level_values_temp_120, corr_temp_120, df_temp_120 = correlation_table(temp850.t, temp.t, 120)

pd.concat([df_temp_0, df_temp_72.drop(['Levels'], axis =1), df_temp_120.drop(['Levels'], axis =1)], axis = 1)

Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,-0.034967,-0.034809,-0.034708
1,100,-0.458015,-0.457921,-0.457855
2,150,-0.284129,-0.284021,-0.283943
3,200,0.269933,0.27002,0.270088
4,250,0.762717,0.762748,0.762777
5,300,0.908472,0.908493,0.908513
6,400,0.94327,0.943287,0.943301
7,500,0.95395,0.953968,0.95398
8,600,0.963933,0.963956,0.96397
9,700,0.979782,0.979813,0.979832


In [51]:
level_values_hum_0, corr_hum_0, df_hum_0 = correlation_table(temp850.t, spec_humid.q, 0)
level_values_hum_72, corr_hum_72, df_hum_72 = correlation_table(temp850.t, spec_humid.q, 72)
level_values_hum_120, corr_hum_120, df_hum_120 = correlation_table(temp850.t, spec_humid.q, 120)

pd.concat([df_hum_0, df_hum_72.drop(['Levels'], axis =1), df_hum_120.drop(['Levels'], axis =1)], axis = 1)

Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,0.080888,0.080938,0.080969
1,100,0.210104,0.2103,0.210436
2,150,0.648467,0.648493,0.64851
3,200,0.619775,0.619786,0.619792
4,250,0.577071,0.57708,0.577082
5,300,0.547593,0.547605,0.547607
6,400,0.515883,0.515896,0.515898
7,500,0.535203,0.535213,0.535216
8,600,0.602929,0.602939,0.602941
9,700,0.671112,0.671127,0.671132


In [52]:
level_values_geo_0, corr_geo_0, df_geo_0 = correlation_table(temp850.t, geo.z, 0)
level_values_geo_72, corr_geo_72, df_geo_72 = correlation_table(temp850.t, geo.z, 72)
level_values_geo_120, corr_geo_120, df_geo_120 = correlation_table(temp850.t, geo.z, 120)

pd.concat([df_geo_0, df_geo_72.drop(['Levels'], axis =1), df_geo_120.drop(['Levels'], axis =1)], axis = 1)

Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,0.727277,0.72734,0.727388
1,100,0.902568,0.902603,0.90263
2,150,0.944963,0.944992,0.945013
3,200,0.951948,0.951973,0.95199
4,250,0.952404,0.952429,0.952444
5,300,0.950823,0.950847,0.950862
6,400,0.945276,0.945301,0.945316
7,500,0.93468,0.934707,0.934722
8,600,0.915054,0.915083,0.915099
9,700,0.87698,0.877011,0.877028


## Correlation to z500

In [53]:
level_values_temp_0, corr_temp_0, df_temp_0 = correlation_table(geo500.z, temp.t, 0)
level_values_temp_72, corr_temp_72, df_temp_72 = correlation_table(geo500.z, temp.t, 72)
level_values_temp_120, corr_temp_120, df_temp_120 = correlation_table(geo500.z, temp.t, 120)

pd.concat([df_temp_0, df_temp_72.drop(['Levels'], axis =1), df_temp_120.drop(['Levels'], axis =1)], axis = 1)

Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,-0.079411,-0.079202,-0.079072
1,100,-0.486386,-0.486253,-0.486165
2,150,-0.330765,-0.330615,-0.330512
3,200,0.201631,0.201758,0.201852
4,250,0.720894,0.720935,0.720975
5,300,0.894456,0.894469,0.89449
6,400,0.943129,0.943133,0.943146
7,500,0.953557,0.95356,0.953571
8,600,0.955405,0.95541,0.955421
9,700,0.946261,0.94627,0.946282


In [54]:
level_values_hum2_0, corr_hum2_0, df_hum2_0 = correlation_table(geo500.z, spec_humid.q, 0)
level_values_hum2_72, corr_hum2_72, df_hum2_72 = correlation_table(geo500.z, spec_humid.q, 72)
level_values_hum2_120, corr_hum2_120, df_hum2_120 = correlation_table(geo500.z, spec_humid.q, 120)

pd.concat([df_hum2_0, df_hum2_72.drop(['Levels'], axis =1), df_hum2_120.drop(['Levels'], axis =1)], axis = 1)

Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,0.077275,0.077314,0.077337
1,100,0.233139,0.233331,0.233462
2,150,0.645022,0.645038,0.645052
3,200,0.612807,0.612808,0.612811
4,250,0.568087,0.568091,0.568092
5,300,0.536333,0.536341,0.536342
6,400,0.498808,0.498816,0.498817
7,500,0.504809,0.504813,0.504815
8,600,0.556295,0.556296,0.556296
9,700,0.614486,0.614491,0.614492


In [55]:
level_values_geo2_0, corr_geo2_0, df_geo2_0 = correlation_table(geo500.z, geo.z, 0)
level_values_geo2_72, corr_geo2_72, df_geo2_72 = correlation_table(geo500.z, geo.z, 72)
level_values_geo2_120, corr_geo2_120, df_geo2_120 = correlation_table(geo500.z, geo.z, 120)

pd.concat([df_geo2_0, df_geo2_72.drop(['Levels'], axis =1), df_geo2_120.drop(['Levels'], axis =1)], axis = 1)

Unnamed: 0,Levels,Correlation_0,Correlation_72,Correlation_120
0,50,0.729091,0.729174,0.729239
1,100,0.919129,0.919166,0.9192
2,150,0.968071,0.968093,0.968115
3,200,0.981644,0.981658,0.981675
4,250,0.98873,0.988741,0.988755
5,300,0.992908,0.992918,0.99293
6,400,0.998017,0.998027,0.998038
7,500,0.999999,1.000011,1.000022
8,600,0.997165,0.997179,0.99719
9,700,0.984928,0.984944,0.984956
