# EOF分析——Empirical Orthogonal Function analysis

EOF即经验正交函数分析。EOF分析是通过将时空数据集转化成物理量的空间模态和与之相联系时间上的投影（时间序列），来简化该时空数据集。

这些空间模态就是EOFs，可以被看作是方差对应的基函数（空间中的一组基向量）。相关的时间投影是主要成分（PCs），是EOFs的时间系数。

可以由EOFs和PCs重建出原始数据。

安装：

conda install -c conda-forge eofs

In [2]:
from eofs.standard import Eof

### 创建EOF实例

Eof(dataset,weights,center,ddof)

输入：

1. dataset：numpy.ndarray，numpy.ma.MaskedArray，dask.array.Array ：2维及以上数组，第一维为时间维。允许为掩码数组或np.nan，但缺测值需不随时间维变化(缺测位置固定)。

2. weights：权重，与dataset形状相同或可执行广播形状的数组

3. center：是否中心化(处理为距平)

4. ddof: Delta degrees of freedom。自由度为N - ddof  N为样本数(时间维).默认为1

返回：

eof实例

### 获取EOF的时间序列

eof.pcs(pcscaling=0, npcs=None)

输入：

1. pcscaling：是否缩放时间序列。默认为0。0：不缩放；1：除特征值平方根；2：乘特征值平方根

2. npcs：获取的时间序列数

返回：

时间序列


### 获取空间向量

eof.eofs(eofscaling=0, neofs=None)

### 空间向量对应的方差贡献

eof.varianceFraction(neigs=None)

### 将EOF表示为每个网格点的PC和dataset之间的相关

eof.eofsAsCorrelation(neofs=None)


In [16]:
import xarray as xr
from eofs.standard import Eof
import numpy as np

f_u = xr.open_dataset("/home/mw/input/moyu1828/uwnd.mon.mean.nc")
u = f_u['uwnd'].loc[f_u.time.dt.month.isin([12,1,2])].loc['1979-12-01':'2019-02-28'].loc[:,1000:100,70:20,60:160]
lat_u = f_u['lat'].loc[70:20]
level = f_u['level'].loc[1000:100]
u_level_lat = np.array(u).reshape(40,3,12,21,41).mean((1,4))

eof = Eof(u_level_lat)
u_eof = eof.eofsAsCorrelation(neofs=2)
# u_eof = eof.eofs(neofs=2)
u_pc = eof.pcs(npcs=2, pcscaling=1)
u_var = eof.varianceFraction(neigs=2)

In [17]:
print(u_eof.shape,u_pc.shape,u_var.shape)

(2, 12, 21) (40, 2) (2,)


In [18]:
years = range(1979, 2019)
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(15,15))
f2_ax1 = fig.add_axes([0.1, 0.4, 0.3, 0.2])
c2 = f2_ax1.contourf(lat_u,level ,u_eof[0,:,:], levels=np.arange(-0.8,0.9,0.1), extend = 'both',zorder=0, cmap=plt.cm.RdBu_r)
f2_ax1.set_ylabel('High (hPa)',fontsize=18)
f2_ax1.set_xlabel('Latitude',fontsize=18)
f2_ax1.set_yscale('symlog')
f2_ax1.set_yticks([1000, 500,300, 200, 100])
f2_ax1.set_yticklabels(['1000','500','300','200','100'])
# f2_ax1.invert_yaxis()#f2_ax1.set_lim(1000,100)
f2_ax1.set_ylim(1000,100)
f2_ax1.set_xticklabels([r'20$^\degree$',r'30$^\degree$N', r'40$^\degree$N',
                        r'50$^\degree$N',r'60$^\degree$N', r'70$^\degree$N'])
f2_ax1.set_title('(a) EOF1',loc='left')
f2_ax1.set_title( '%.2f%%' % (u_var[0]*100),loc='right')

f2_ax1 = fig.add_axes([0.1, 0.1, 0.3, 0.2])
f2_ax1.set_title('(c) EOF1',loc='left')
f2_ax1.set_title( '%.2f%%' % (u_var[1]*100),loc='right')
f2_ax1.set_yscale('symlog')
f2_ax1.set_xticklabels([r'20$^\degree$',r'30$^\degree$N', r'40$^\degree$N',
                        r'50$^\degree$N',r'60$^\degree$N', r'70$^\degree$N'])
f2_ax1.set_yticks([1000, 500,300, 200, 100])
f2_ax1.set_yticklabels(['1000','500','300','200','100'])
f2_ax1.invert_yaxis()
f2_ax1.set_ylabel('High (hPa)',fontsize=18)
f2_ax1.set_xlabel('Latitude',fontsize=18)
f2_ax1.contourf(lat_u,level ,u_eof[1,:,:], levels=np.arange(-0.8,0.9,0.1), extend = 'both',
                zorder=0, cmap=plt.cm.RdBu_r)

f2_ax2 = fig.add_axes([0.45, 0.4, 0.3, 0.2])
f2_ax2.set_title('(b) PC1',loc='left')
plt.ylim(-3,3)
f2_ax2.axhline(0,linestyle="--")
f2_ax2.plot(years,u_pc[:,0],c='k')

f2_ax3 = fig.add_axes([0.45, 0.1, 0.3, 0.2])
f2_ax3.set_title('(d) PC2',loc='left')
plt.ylim(-3,3)
f2_ax3.axhline(0,linestyle="--")
f2_ax3.plot(years,u_pc[:,1],c='k')

position=fig.add_axes([0.1, 0.023, 0.3, 0.017])
fig.colorbar(c2,cax=position,orientation='horizontal',format='%.1f',)

plt.show()

### 特征根

eof.eigenvalues(neigs=None):

### North检验

eof.northTest(self, neigs=None, vfscaled=False)

输入：

neigs:返回典型误差的特征值数。默认为所有特征值的典型误差。如果请求的特征值数量大于可用的数量，则将返回所有可用特征值的典型误差。

vfscaled：如果*True*，则按特征值之和缩放误差。

In [20]:
print(eof.northTest(neigs=3))
print(eof.eigenvalues(neigs=3))

[112.357765  54.045666  42.347656]
[502.47922 241.69957 189.38448]



![Image Name](https://cdn.kesci.com/upload/image/r00nrjp6z0.png?imageView2/0/w/960/h/960)


In [23]:
coslat = np.array(np.sqrt(np.cos(np.deg2rad(lat_u)))).reshape((1,21))

eof = Eof(u_level_lat,weights = coslat)
u_eof = eof.eofsAsCorrelation(neofs=2)
# u_eof = eof.eofs(neofs=2)
u_pc = eof.pcs(npcs=2, pcscaling=1)
u_var = eof.varianceFraction(neigs=2)

years = range(1979, 2019)
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(15,15))
f2_ax1 = fig.add_axes([0.1, 0.4, 0.3, 0.2])
c2 = f2_ax1.contourf(lat_u,level ,u_eof[0,:,:], levels=np.arange(-0.8,0.9,0.1), extend = 'both',zorder=0, cmap=plt.cm.RdBu_r)
f2_ax1.set_ylabel('High (hPa)',fontsize=18)
f2_ax1.set_xlabel('Latitude',fontsize=18)
f2_ax1.set_yscale('symlog')
f2_ax1.set_yticks([1000, 500,300, 200, 100])
f2_ax1.set_yticklabels(['1000','500','300','200','100'])
# f2_ax1.invert_yaxis()#f2_ax1.set_lim(1000,100)
f2_ax1.set_ylim(1000,100)
f2_ax1.set_xticklabels([r'20$^\degree$',r'30$^\degree$N', r'40$^\degree$N',
                        r'50$^\degree$N',r'60$^\degree$N', r'70$^\degree$N'])
f2_ax1.set_title('(a) EOF1',loc='left')
f2_ax1.set_title( '%.2f%%' % (u_var[0]*100),loc='right')

f2_ax1 = fig.add_axes([0.1, 0.1, 0.3, 0.2])
f2_ax1.set_title('(c) EOF1',loc='left')
f2_ax1.set_title( '%.2f%%' % (u_var[1]*100),loc='right')
f2_ax1.set_yscale('symlog')
f2_ax1.set_xticklabels([r'20$^\degree$',r'30$^\degree$N', r'40$^\degree$N',
                        r'50$^\degree$N',r'60$^\degree$N', r'70$^\degree$N'])
f2_ax1.set_yticks([1000, 500,300, 200, 100])
f2_ax1.set_yticklabels(['1000','500','300','200','100'])
f2_ax1.invert_yaxis()
f2_ax1.set_ylabel('High (hPa)',fontsize=18)
f2_ax1.set_xlabel('Latitude',fontsize=18)
f2_ax1.contourf(lat_u,level ,u_eof[1,:,:], levels=np.arange(-0.8,0.9,0.1), extend = 'both',
                zorder=0, cmap=plt.cm.RdBu_r)

f2_ax2 = fig.add_axes([0.45, 0.4, 0.3, 0.2])
f2_ax2.set_title('(b) PC1',loc='left')
plt.ylim(-3,3)
f2_ax2.axhline(0,linestyle="--")
f2_ax2.plot(years,u_pc[:,0],c='k')

f2_ax3 = fig.add_axes([0.45, 0.1, 0.3, 0.2])
f2_ax3.set_title('(d) PC2',loc='left')
plt.ylim(-3,3)
f2_ax3.axhline(0,linestyle="--")
f2_ax3.plot(years,u_pc[:,1],c='k')

position=fig.add_axes([0.1, 0.023, 0.3, 0.017])
fig.colorbar(c2,cax=position,orientation='horizontal',format='%.1f',)

plt.show()

In [24]:
coslat = np.array(np.sqrt(np.cos(np.deg2rad(lat_u)))).reshape((1,1,21))

eof = Eof(u_level_lat*coslat)
u_eof = eof.eofsAsCorrelation(neofs=2)
# u_eof = eof.eofs(neofs=2)
u_pc = eof.pcs(npcs=2, pcscaling=1)
u_var = eof.varianceFraction(neigs=2)

years = range(1979, 2019)
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(15,15))
f2_ax1 = fig.add_axes([0.1, 0.4, 0.3, 0.2])
c2 = f2_ax1.contourf(lat_u,level ,u_eof[0,:,:], levels=np.arange(-0.8,0.9,0.1), extend = 'both',zorder=0, cmap=plt.cm.RdBu_r)
f2_ax1.set_ylabel('High (hPa)',fontsize=18)
f2_ax1.set_xlabel('Latitude',fontsize=18)
f2_ax1.set_yscale('symlog')
f2_ax1.set_yticks([1000, 500,300, 200, 100])
f2_ax1.set_yticklabels(['1000','500','300','200','100'])
# f2_ax1.invert_yaxis()#f2_ax1.set_lim(1000,100)
f2_ax1.set_ylim(1000,100)
f2_ax1.set_xticklabels([r'20$^\degree$',r'30$^\degree$N', r'40$^\degree$N',
                        r'50$^\degree$N',r'60$^\degree$N', r'70$^\degree$N'])
f2_ax1.set_title('(a) EOF1',loc='left')
f2_ax1.set_title( '%.2f%%' % (u_var[0]*100),loc='right')

f2_ax1 = fig.add_axes([0.1, 0.1, 0.3, 0.2])
f2_ax1.set_title('(c) EOF1',loc='left')
f2_ax1.set_title( '%.2f%%' % (u_var[1]*100),loc='right')
f2_ax1.set_yscale('symlog')
f2_ax1.set_xticklabels([r'20$^\degree$',r'30$^\degree$N', r'40$^\degree$N',
                        r'50$^\degree$N',r'60$^\degree$N', r'70$^\degree$N'])
f2_ax1.set_yticks([1000, 500,300, 200, 100])
f2_ax1.set_yticklabels(['1000','500','300','200','100'])
f2_ax1.invert_yaxis()
f2_ax1.set_ylabel('High (hPa)',fontsize=18)
f2_ax1.set_xlabel('Latitude',fontsize=18)
f2_ax1.contourf(lat_u,level ,u_eof[1,:,:], levels=np.arange(-0.8,0.9,0.1), extend = 'both',
                zorder=0, cmap=plt.cm.RdBu_r)

f2_ax2 = fig.add_axes([0.45, 0.4, 0.3, 0.2])
f2_ax2.set_title('(b) PC1',loc='left')
plt.ylim(-3,3)
f2_ax2.axhline(0,linestyle="--")
f2_ax2.plot(years,u_pc[:,0],c='k')

f2_ax3 = fig.add_axes([0.45, 0.1, 0.3, 0.2])
f2_ax3.set_title('(d) PC2',loc='left')
plt.ylim(-3,3)
f2_ax3.axhline(0,linestyle="--")
f2_ax3.plot(years,u_pc[:,1],c='k')

position=fig.add_axes([0.1, 0.023, 0.3, 0.017])
fig.colorbar(c2,cax=position,orientation='horizontal',format='%.1f',)

plt.show()

# 站点数据的EOF

In [25]:
import pandas as pd
import numpy as np
from eofs.standard import Eof
t = np.zeros((5,825))
f = pd.read_csv('/home/mw/input/moyu1828/1957_2012_tm_year_avg.txt',sep = '\s+',names=['station','lat','lon','alt','year','tm'], na_values=-99.90)
for i in range(2008,2012):
    data = f.loc[f.year==i]
    data = data.dropna(axis=0,how='any')
    t[i-2008] = data.tm.values
    
lat = data.lat.values/100
lon = data.lon.values/100    

In [28]:
print(t.shape)

# (lat.shape,lon.shape)

(5, 825)


In [29]:
eof = Eof(t)
t_eof = eof.eofsAsCorrelation(neofs=2)
# u_eof = eof.eofs(neofs=2)
t_pc = eof.pcs(npcs=2, pcscaling=1)
t_var = eof.varianceFraction(neigs=2)

In [31]:
t_eof.shape
t_pc.shape

(5, 2)

In [33]:
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy.mpl.ticker as cticker

fig = plt.figure(figsize=(12,8))
ax1 = fig.add_axes([0.1, 0.1, 0.8, 0.4],projection = ccrs.PlateCarree(central_longitude=90))
leftlon, rightlon, lowerlat, upperlat = (60,150,20,80)
img_extent = [leftlon, rightlon, lowerlat, upperlat]
ax1.set_extent(img_extent, crs=ccrs.PlateCarree())
# ax1.add_feature(cfeature.COASTLINE.with_scale('50m')) 
# ax1.add_feature(cfeature.LAKES, alpha=0.5)
ax1.set_xticks(np.arange(60,150+30,30), crs=ccrs.PlateCarree())
ax1.set_yticks(np.arange(0,60+30,30), crs=ccrs.PlateCarree())
lon_formatter = cticker.LongitudeFormatter()
lat_formatter = cticker.LatitudeFormatter()
ax1.xaxis.set_major_formatter(lon_formatter)
ax1.yaxis.set_major_formatter(lat_formatter)
c1 = ax1.scatter(lon,lat,s=2, zorder=0,c=t_eof[0],transform=ccrs.PlateCarree(), cmap=plt.cm.bwr)

position=fig.add_axes([0.35, 0.02,  0.35, 0.025])
fig.colorbar(c1,cax=position,orientation='horizontal',format='%.2f',)

<matplotlib.colorbar.Colorbar at 0x7f398be233d0>