# To save surface wind diagnostics into CSV files

In [None]:
from ipynb.fs.full.procc import *
from statsmodels.nonparametric.smoothers_lowess import lowess
import seaborn as sns
from scipy.stats import sem, ttest_ind
from windrose import WindroseAxes
import matplotlib.cm as cm
from matplotlib import colors
from matplotlib.ticker import PercentFormatter
import sys
from windrose import WindroseAxes
from metpy import calc
from metpy.units import units
from scipy.signal import savgol_filter

## Function to process files into a dictionary

In [None]:
def procFiles(for_direc, file_type, byDate = False, end_year = ''):
    
    file_dict = {}
    # dates = ['0346','0952']
    
    #creates a dictionary with the file name as the key and the path of the file as the value
    if byDate==True:
        for root, dirs, filenames in os.walk(for_direc):
            for file in filenames:
                if file.endswith(".nc") and file_type in file and any(date in file for date in dates): #** (aijlh1) change format depending on scaleacc processing, may have to add enddate so doesn't go to 0101-1953, also aij
                    file_to_add = os.path.join(root + '/', file) 
                    file_dict[file] = file_to_add
    else:
        for root, dirs, filenames in os.walk(for_direc):
            for file in filenames:
                if file.endswith(".nc") and file_type in file: #** (aijlh1) change format depending on scaleacc processing, may have to add enddate so doesn't go to 0101-1953, also aij
                    file_to_add = os.path.join(root + '/', file) 
                    file_dict[file] = file_to_add

                       
    return file_dict

In [None]:
def flattenLoL(arr): #flatten list of lists
    flat_arr = []
    
    for list in arr:
        for val in list:
            flat_arr.append(val)

    return flat_arr

## Assign constants and obtain input parameters

In [None]:
# fensal eastern edge = 2.0 lat, 2.5 lon
# fensal center = 2.0 lat, - 52.5 lon
# shangrila eastern edge = -6.0 lat, -142.5
# shangrila center = -6.0 lat, -177.5 lon
# belet eastern edge = 2.0 lat, 142.5 lon
# belet center = 2.0 lat, 107.5 lon
z = 10 # (meters) where wind data is calculated
k = 0.4 # van Karman constant
z0 = 6*10**-4 # fensal surface roughness at equator ~= 0.0006 m
coeff = np.log(z/z0)*1/k
loc=''

# UF =[0.01, 0.02, 0.03, 0.04]

#UT = [uf*24.3 for uf in UF] # 24.3 from log wind profile equation

In [None]:
direc = str(sys.argv[1])
file_type = str(sys.argv[2])            # aijlh1
byDate = sys.argv[3].lower() == 'true' 
multiYear = sys.argv[4].lower() == 'true'
run = str(sys.argv[5])
start_year = str(sys.argv[6])
end_year = str(sys.argv[7])
UF = float(sys.argv[8])
zonal = sys.argv[9].lower() == 'true'

if not zonal:
    region = str(sys.argv[10]).lower()
    location = str(sys.argv[11]).lower()
    lat = str(sys.argv[12])
    lon = str(sys.argv[13])
    loc = [(lat,lon)]

In [None]:
if not zonal:
    
    reg_nam = ''
    loc_nam = ''

    if region == 'belet':
        region = 'Belet/'
        reg_nam = 'Belet'
    elif region == 'shangrila':
        region = 'ShangriLa/'
        reg_nam = 'Shang'
    elif region == 'fensal':
        region = 'Fensal/'
        reg_nam = 'Fen'
    elif region == 'equator timestep':
        region = 'Equator/Timestep/'



    if region == 'equator timestep':
        pass
    elif location == 'center':
        location = 'Center/'
        loc_nam = 'C'
    elif location == 'edge':
        location = 'EastEdge/'
        loc_nam = 'E'


In [None]:
if len(loc)>0:
    name = reg_nam + loc_nam + '_' + run + '_' + str(UF)[2:] + '_' + start_year[2:] + str(int(end_year[2:])-1)+'_la' + str(int(float(loc[0][0]))) + 'lo'+str((float(loc[0][1]))).replace('.','p')
elif zonal:
    name = run + '_' + str(UF)[2:] + '_' + start_year[2:] + str(int(end_year[2:])-1) + '_' + 'Zonal'
else:
    name = run + '_' + str(UF)[2:] + '_' + start_year[2:] + str(int(end_year[2:])-1)

In [None]:
#saveloc = "/home/maxqc/Python/data/" + run + '/' + region + location

In [None]:
procDirec = procFiles(direc, file_type, byDate, end_year)

In [None]:
# if end_year:
#     corrected_dict = {}
#     for key in sorted(procDirec.keys()):
#         if end_year not in key:
#             corrected_dict[key] = procDirec[key]
#         else:
#             break
#     procDirec = corrected_dict

if multiYear == True:
    name = name + 'MY' 
    year = 0
    year_dict = {}
    text = 'Year'+str(year)
    year_dict[text] = {}
    
    
    for key in sorted(procDirec.keys()):
        
        if end_year and key[0:4] == end_year:
            break
            
        file = procDirec[key]
            
        if key[0:4] != start_year:
            start_year = key[0:4]
            year+=1
            text='Year'+str(year)
            year_dict[text] = {}

            
        year_dict[text][key] = file

    procDirec = year_dict


In [None]:
if byDate == True:
    name = name + 'byDate'

In [None]:
def WindStatsTS(procDirec, UT, level = '', zonal = False, loc = [('-2','-2.5'),('-2','2.5'),('2','2.5'),('2','-2.5')]):
    
    WAll = []
    DirAll = []
    UAll = []
    VAll = []
    UAllYears = []
    VAllYears = []
    AllRDD = []
    AllDP = []
    AllDP_STD = []
    AllDP_STDERR = []
    
    if zonal:
        
        lats = np.arange(-90., 91., 4)
        lons = np.arange(-177.5, 177.6, 5)
        
        for key in sorted(procDirec.keys()):

            U = []
            V = []

            for lat in lats:
                
                U_temp = []
                V_temp = []
                
                for lon in lons:
                    
                    try: # except as some days are empty (jk doesn't seem to be the case anymore)
                        with xr.open_dataset(procDirec[key]) as ds:
                            
                            if level:
                                
                                U_temp.append(ds.u.sel(lat=lat, lon=lon, level=level))

                                V_temp.append(ds.v.sel(lat=lat,lon=lon, level=level))
                                
                            else:
                                
                                U_temp.append(ds.us.sel(lat=lat, lon=lon))

                                V_temp.append(ds.vs.sel(lat=lat,lon=lon))
                            
                    except ValueError: # had issue with day not having values (jk)
                        pass
                
                U.append(flattenLoL(U_temp))
                V.append(flattenLoL(V_temp))
                
            # len(U) = 46
            # len(U[0]) = first latitude and all longitude = 55152 = 766*72

            U = np.mean(U,axis=0)
            V = np.mean(V,axis=0)
            
            print(len(U))
            print()
            print(U)

            UAll.append(U) # one day, 766
            VAll.append(V)

            metU = units.Quantity(U, "m/s")
            metV = units.Quantity(V, "m/s")

            #Dir=np.mod(np.rad2deg(np.arctan2(U, V)),360) # 'to' direction

            Dir = [calc.wind_direction(metU[i], metV[i], convention='to') for i in range(0,len(V))]
            Dir = [np.rad2deg(float(Dir[i])) for i in range(0,len(Dir))]

            W = np.array([np.sqrt((U[i]**2) + (V[i]**2)) for i in range(0,len(U))])

            bins=[]
            windbins = np.arange(0,361,22.5)
            deg_midP = [(windbins[i]+windbins[i+1])/2 for i in range(0,len(windbins)-1)]

            for i in range(0,len(windbins)-1):
                if i == len(windbins)-1:
                    bins.append((xr.where((Dir>=windbins[i]) & (Dir<=windbins[i+1]), True, False)))
                else:
                    bins.append((xr.where((Dir>=windbins[i]) & (Dir<windbins[i+1]), True, False)))


            t = xr.where(W>UT,1,0)
            total=len(t)
            cum_sum = sum(t)
            t = float(cum_sum/total)

            bins = [(np.ma.masked_equal((bins[i]*W),0)).compressed() for i in range(0,len(bins))]

            binnedWavg = [np.mean(bins[i], axis=0) for i in range(0,len(bins))]

            binnedDP = [(binnedWavg[i]**2 * (binnedWavg[i] - UT) * t) / 100. for i in range(0,len(bins))]

            bin_DP = [(binnedDP[i], windbins[i]) for i in range(0,len(windbins))]

            C = [binnedDP[i]*np.sin(np.deg2rad(deg_midP[i])) for i in range(0,len(binnedDP))]
            D = [binnedDP[i]*np.cos(np.deg2rad(deg_midP[i])) for i in range(0,len(binnedDP))]

            C = np.nansum(C)
            D = np.nansum(D)

            DP = np.nansum(binnedDP)

            DP_STDERR = sem(binnedDP,nan_policy='omit')
            DP_STD = np.nanstd(binnedDP)

            RDP = np.sqrt((C**2) + (D**2))

            RDD = np.mod((np.rad2deg(np.arctan2(D,C))*-1)+90,360) # clockwise from northward


            AllDP.append(DP)
            AllRDD.append(RDD)
            AllDP_STD.append(DP_STD)
            AllDP_STDERR.append(DP_STDERR)
            WAll.append(W)
            DirAll.append(Dir)
        
    else:
    
        for key in sorted(procDirec.keys()):

            U = []
            V = []

            for lat,lon in loc:
                try: # except as some days are empty (jk)
                    with xr.open_dataset(procDirec[key]) as ds:

                            U.append(ds.us.sel(lat=lat, lon=lon))

                            V.append(ds.vs.sel(lat=lat,lon=lon))

                except ValueError: # had issue with day not having values (jk)
                    pass

            #if len(loc) > 1: # average if multiple latitudes (i.e. square around equator)
            U = np.mean(U,axis=0)
            V = np.mean(V,axis=0)

            UAll.append(U) # one day, 766
            VAll.append(V)

            metU = units.Quantity(U, "m/s")
            metV = units.Quantity(V, "m/s")

            #Dir=np.mod(np.rad2deg(np.arctan2(U, V)),360) # 'to' direction

            Dir = [calc.wind_direction(metU[i], metV[i], convention='to') for i in range(0,len(V))]
            Dir = [np.rad2deg(float(Dir[i])) for i in range(0,len(Dir))]

            W = np.array([np.sqrt((U[i]**2) + (V[i]**2)) for i in range(0,len(U))])

            bins=[]
            windbins = np.arange(0,361,22.5)
            deg_midP = [(windbins[i]+windbins[i+1])/2 for i in range(0,len(windbins)-1)]

            for i in range(0,len(windbins)-1):
                if i == len(windbins)-1:
                    bins.append((xr.where((Dir>=windbins[i]) & (Dir<=windbins[i+1]), True, False)))
                else:
                    bins.append((xr.where((Dir>=windbins[i]) & (Dir<windbins[i+1]), True, False)))


            t = xr.where(W>UT,1,0)
            total=len(t)
            cum_sum = sum(t)
            t = float(cum_sum/total)

            bins = [(np.ma.masked_equal((bins[i]*W),0)).compressed() for i in range(0,len(bins))]

            binnedWavg = [np.mean(bins[i], axis=0) for i in range(0,len(bins))]

            binnedDP = [(binnedWavg[i]**2 * (binnedWavg[i] - UT) * t) / 100. for i in range(0,len(bins))]

            bin_DP = [(binnedDP[i], windbins[i]) for i in range(0,len(windbins))]

            C = [binnedDP[i]*np.sin(np.deg2rad(deg_midP[i])) for i in range(0,len(binnedDP))]
            D = [binnedDP[i]*np.cos(np.deg2rad(deg_midP[i])) for i in range(0,len(binnedDP))]

            C = np.nansum(C)
            D = np.nansum(D)

            DP = np.nansum(binnedDP)

            DP_STDERR = sem(binnedDP,nan_policy='omit')
            DP_STD = np.nanstd(binnedDP)

            RDP = np.sqrt((C**2) + (D**2))

            RDD = np.mod((np.rad2deg(np.arctan2(D,C))*-1)+90,360) # clockwise from northward


            AllDP.append(DP)
            AllRDD.append(RDD)
            AllDP_STD.append(DP_STD)
            AllDP_STDERR.append(DP_STDERR)
            WAll.append(W)
            DirAll.append(Dir)

    
    return WAll, DirAll, UAll, VAll, AllDP, AllRDD, AllDP_STD, AllDP_STDERR

    #     # For calculation of threshold velocity at 10 m, need to figure out if right
    #     # https://books.google.com.hk/books?hl=en&lr=&id=eaxTAVp9avkC&oi=fnd&pg=PA137&ots=5nAJ__p7CX&sig=_a4lg7V3Io9mrdCbMY3I7uW0HD4&redir_esc=y#v=onepage&q=threshold&f=false

In [None]:
UT = UF*24.3

if multiYear and zonal: # don't need "and zonal", included in function

    
    year = [str(1960+i) for i in range(0,11)]
    multiYrDP = []
    multiYrRDD = []
    multiYrDP_STD = []
    multiYrDP_STDERR = []
    multiYrU = []
    multiYrV = []
    multiYrDir = []
    multiYrW = []
    year = [str(1960+i) for i in range(0,11)]
    
    for index, key in enumerate(procDirec.keys()):
        
        W, Dir, U, V, AllDP, AllRDD, AllDP_STD, AllDP_STDERR = WindStatsTS(procDirec[key], UT, "1", zonal)
        
        multiYrDP.append(AllDP) # list of 675 * 766 values * N years
        multiYrRDD.append(AllRDD)
        multiYrDP_STD.append(AllDP_STD)
        multiYrDP_STDERR.append(AllDP_STDERR)
        multiYrU.append(U)
        multiYrV.append(V)
        multiYrDir.append(Dir)
        multiYrW.append(W)
        
#         np.savetxt(saveloc+name+year[index]+"TSt_DP.csv", AllDP, delimiter=",")
#         np.savetxt(saveloc+name+year[index]+"TSt_RDD.csv", AllRDD, delimiter=",")
#         np.savetxt(saveloc+name+year[index]+"TSt_DP_STD.csv", AllDP_STD, delimiter=",")
#         np.savetxt(saveloc+name+year[index]+"TSt_DP_STDERR.csv", AllDP_STDERR, delimiter=",")

        
#         np.savetxt(saveloc+name+year[index]+"TSt_U.csv", U, delimiter=",")
#         np.savetxt(saveloc+name+year[index]+"TSt_V.csv", V, delimiter=",")
#         np.savetxt(saveloc+name+year[index]+"TSt_Dir.csv", Dir, delimiter=",")
#         np.savetxt(saveloc+name+year[index]+"TSt_W.csv", W, delimiter=",")
        
    YrAvgDP = np.mean(multiYrDP,axis=0) # list of averaged columns/years, 675 days
    yrAvgRDD = np.mean(multiYrRDD,axis=0)
    YrAvgDP_STD = np.mean(multiYrDP_STD,axis=0)
    YrAvgDP_STDERR = np.mean(multiYrDP_STDERR,axis=0)
    
    YrAvgU = np.mean(multiYrU,axis=0) # list of averaged columns/years, 675 days
    yrAvgV = np.mean(multiYrV,axis=0)
    YrAvgDir = np.mean(multiYrDir,axis=0)
    YrAvgW = np.mean(multiYrW,axis=0)
        
#     np.savetxt(saveloc+name+"YrAvgTSt_U.csv", YrAvgU, delimiter=",")
#     np.savetxt(saveloc+name+"yrAvgTSt_V.csv", yrAvgV, delimiter=",")
#     np.savetxt(saveloc+name+"YrAvgTSt_Dir.csv", YrAvgDir, delimiter=",")
#     np.savetxt(saveloc+name+"YrAvgTSt_W.csv", YrAvgW, delimiter=",")

#     np.savetxt(saveloc+name+"YrAvgTSt_DP.csv", YrAvgDP, delimiter=",")
#     np.savetxt(saveloc+name+"yrAvgTSt_RDD.csv", yrAvgRDD, delimiter=",")
#     np.savetxt(saveloc+name+"YrAvgTSt_DP_STD.csv", YrAvgDP_STD, delimiter=",")
#     np.savetxt(saveloc+name+"YrAvgTSt_DP_STDERR.csv", YrAvgDP_STDERR, delimiter=",")

In [None]:
UT = UF*24.3

if multiYear and not zonal: 
    
    # Flatten? first would flatten years, then days
    
    year = [str(1960+i) for i in range(0,11)]
    multiYrDP = []
    multiYrRDD = []
    multiYrDP_STD = []
    multiYrDP_STDERR = []
    multiYrU = []
    multiYrV = []
    multiYrDir = []
    multiYrW = []
    year = [str(1960+i) for i in range(0,11)]
    
    for index, key in enumerate(procDirec.keys()):
        
        W, Dir, U, V, AllDP, AllRDD, AllDP_STD, AllDP_STDERR = WindStatsTS(procDirec[key], UT, loc)
        
        multiYrDP.append(AllDP) # list of 675 * 766 values * N years
        multiYrRDD.append(AllRDD)
        multiYrDP_STD.append(AllDP_STD)
        multiYrDP_STDERR.append(AllDP_STDERR)
        multiYrU.append(U)
        multiYrV.append(V)
        multiYrDir.append(Dir)
        multiYrW.append(W)
        
        np.savetxt(saveloc+name+year[index]+"TSt_DP.csv", AllDP, delimiter=",")
        np.savetxt(saveloc+name+year[index]+"TSt_RDD.csv", AllRDD, delimiter=",")
        np.savetxt(saveloc+name+year[index]+"TSt_DP_STD.csv", AllDP_STD, delimiter=",")
        np.savetxt(saveloc+name+year[index]+"TSt_DP_STDERR.csv", AllDP_STDERR, delimiter=",")

        
        np.savetxt(saveloc+name+year[index]+"TSt_U.csv", U, delimiter=",")
        np.savetxt(saveloc+name+year[index]+"TSt_V.csv", V, delimiter=",")
        np.savetxt(saveloc+name+year[index]+"TSt_Dir.csv", Dir, delimiter=",")
        np.savetxt(saveloc+name+year[index]+"TSt_W.csv", W, delimiter=",")
        
    YrAvgDP = np.mean(multiYrDP,axis=0) # list of averaged columns/years, 675 days
    yrAvgRDD = np.mean(multiYrRDD,axis=0)
    YrAvgDP_STD = np.mean(multiYrDP_STD,axis=0)
    YrAvgDP_STDERR = np.mean(multiYrDP_STDERR,axis=0)
    
    YrAvgU = np.mean(multiYrU,axis=0) # list of averaged columns/years, 675 days
    yrAvgV = np.mean(multiYrV,axis=0)
    YrAvgDir = np.mean(multiYrDir,axis=0)
    YrAvgW = np.mean(multiYrW,axis=0)
        
    np.savetxt(saveloc+name+"YrAvgTSt_U.csv", YrAvgU, delimiter=",")
    np.savetxt(saveloc+name+"yrAvgTSt_V.csv", yrAvgV, delimiter=",")
    np.savetxt(saveloc+name+"YrAvgTSt_Dir.csv", YrAvgDir, delimiter=",")
    np.savetxt(saveloc+name+"YrAvgTSt_W.csv", YrAvgW, delimiter=",")

    np.savetxt(saveloc+name+"YrAvgTSt_DP.csv", YrAvgDP, delimiter=",")
    np.savetxt(saveloc+name+"yrAvgTSt_RDD.csv", yrAvgRDD, delimiter=",")
    np.savetxt(saveloc+name+"YrAvgTSt_DP_STD.csv", YrAvgDP_STD, delimiter=",")
    np.savetxt(saveloc+name+"YrAvgTSt_DP_STDERR.csv", YrAvgDP_STDERR, delimiter=",")
    
    # can just flatten after

In [None]:
if byDate and not multiYear and not zonals: 
    
    AllDP, AllRDD, AllDP_STD, AllDP_STDERR = WindStatsTS(procDirec[key], UT)
    
    np.savetxt(saveloc+name+"TSt_DP.csv", AllDP, delimiter=",")
    np.savetxt(saveloc+name+"TSt_RDD.csv", AllRDD, delimiter=",")
    np.savetxt(saveloc+name+"TSt_DP_STD.csv", AllDP_STD, delimiter=",")
    np.savetxt(saveloc+name+"TSt_DP_STDERR.csv", AllDP_STDERR, delimiter=",")

In [None]:
# import argparse

# # defined command line options
# # this also generates --help and error handling

# CLI=argparse.ArgumentParser()

# CLI.add_argument(
#   "--loc",  # name on the CLI - drop the `--` for positional/required parameters
#   nargs="*",  # 0 or more values expected => creates a list
#   type=tuple,
#   default=[('-2','-2.5'),('-2','2.5'),('2','2.5'),('2','-2.5')],  # default if nothing is provided
# )


# # parse the command line
# args = CLI.parse_args()
# # access CLI options
# print("loc: %r" % args.loc)

# loc = args.loc