This Script will utilize numpy's built in eigenvector function to find principle components of variable fields 

Procedure:
    1. Turn fields into column vectors is latXlon space
        -> in this way each dimension represents the value at a gridpoint. Each vector represents a variable field     represents a varialble field at a timestep
    2. Center vectors for convenience by subtracting the mean 
    3. Calculate the co variance matrix 

In [7]:
#################################################################
####### import useful packages and define constants #############
#################################################################

import matplotlib.pyplot as plt
import numpy as np

import plotly
import netCDF4 as nc
import plotly.plotly as py 
import plotly.tools as tls
from plotly.graph_objs import * 

#specify useful variables before running 
filename = 'C:\\Users\\Nathaniel\\Desktop\\Summer2018\\HC_Summer2018\\Data\\vwnd.mon.mean_1948-2017.nc'
alt = 1000 

In [9]:
#################################################################
#############      define useful functions       ################
#################################################################

#see_file_data will receive a file with its path as a string and print information about the file...thie file must be net cdf 
#              will not return anything 
def see_file_data(path):
    temp = nc.Dataset(filename,'r')
    print('FILENAME: ', filename, '\n','\n')
    print('FILE HEADER: ', '\n', temp,'\n','\n')
    print('FILE VARIABLES: ','\n',temp.variables)

    return

#fix_lon will be given a data array and a longitude array 
#        will return arrays of longitude and data that have been reformatted
def fix_lon(array_to_fix, lon_of_array):
    
    #make longitude from -180 to 180 degrees east 
    tmp_lon = lon_of_array
    for n in range (tmp_lon.size):
        if tmp_lon[n]>180:
            tmp_lon[n] = lon_of_array[n]-360

    #reorient data to be centered at 0 degrees east 
    i_west    = np.where(tmp_lon<0)
    i_east    = np.where(tmp_lon>0)
    west      = tmp_lon[i_west]
    east      = tmp_lon[i_east]
    fixed_lon = np.array(np.hstack((west,east)))
    
    #make similar adjustments so that vwnd matches new longitude 
    vwnd_west   = np.squeeze(array_to_fix[:,:,i_west])
    vwnd_east   = np.squeeze(array_to_fix[:,:,i_east])
    fixed_array = np.concatenate((vwnd_west,vwnd_east), axis=2)
        
    return fixed_array,fixed_lon

#read_nc_file will be given the path to a netcdf file
#             will return an array of time, level, lat, lon, data
#                  >lon, lat and data will be adjusted so lon is from -180 to 180 degrees east, lat is 0 to 360 degrees north
def read_nc_file( filename ):
    
    print('Reading netCDF file...')
    
    #extract lon, lat, level and data stored in the netCDF file specified 
    with nc.Dataset(filename,'r') as f:
        lon   = f.variables['lon'][::]
        lat   = f.variables['lat'][::-1]
        time  = f.variables['time'][::]
        level = f.variables['level'][::]
        temp0 = f.variables['vwnd'][:,:,::-1,:]

    #find index of appropriate pressure 
    index = np.where(level == alt)
    
    temp1 = temp0[:,index,:,:]
    vwnd  = temp1.squeeze()
      
    vwnd,lon = fix_lon(vwnd,lon)
    
    return time, level, lat, lon, vwnd

#takes data array and centers it returing the centered array and the mean  
def center_data(X):
    mu = X.mean(axis=0)
    X = X - mu
    return mu, X

#takes data fields and the number of eigenvales to returns the specified number of eigenvalues 
def get_principle_components(X,n):
    N     = X.shape[0]
    sigma = np.dot(X.T,X) / N
    l,v   = np.linalg.eigh(sigma)
    v     = np.fliplr(v)
    return l,v[:,:n]

#get dim reduced data ta
def get_dim_reduced_data(X, dprime,vals,vecs):
    l = vals
    v = vecs
    coefs = np.dot(X, v)
    print(coefs.shape)
    reduced_X = np.dot(coefs[:, :dprime], v[:, :dprime].T)
    return reduced_X


In [10]:
################################################################################
##### utilize previously defined functions for preform PCA on data fields ######
################################################################################

#extract important variables from netCDF file at appropriate level
time, level, lat, lon, vwnd = read_nc_file(filename)

mu, centered_vwnd = center_data(vwnd)

l,v = get_principle_components(centered_vwnd,10)


Reading netCDF file...


ValueError: shapes (143,73,833) and (833,73,143) not aligned: 833 (dim 2) != 73 (dim 1)