## converting data from xarray to pandas format

Developed by Senya Stein (senyarocks11@gmail.com) in July 2019 for use with Saildrone data
IMMA documentation is at https://rda.ucar.edu/datasets/ds548.0/#!docs
Conversion from IMMA developed by Zhankun Wang and Phillip Brohan https://github.com/oldweather/IMMA/blob/master/Python/IMMA/icoads.py


In [1]:
import numpy as np
import pandas as pn
import xarray as xr
import matplotlib.pyplot as plt
import datetime as dt
import netCDF4

### Saildrone data in xarray form

In [2]:
url = 'https://podaac-opendap.jpl.nasa.gov/opendap/hyrax/allData/insitu/L2/saildrone/Baja/saildrone-gen_4-baja_2018-sd1002-20180411T180000-20180611T055959-1_minutes-v1.nc'
ds = xr.open_dataset(url, drop_variables = {'WING_ANGLE','BARO_PRES_STDDEV', 'ROLL', 'PITCH', 'TEMP_AIR_STDDEV', 'RH_STDDEV', 'UWND_STDDEV', 'VWND_STDDEV', 'GUST_WND_STDDEV', 'TEMP_CTD_STDDEV', 'COND_STDDEV', 'SAL_STDDEV', 'O2_CONC_UNCOR_MEAN', 'O2_CONC_UNCOR_STDDEV', 'O2_SAT_MEAN', 'O2_SAT_STDDEV', 'TEMP_O2_MEAN', 'TEMP_O2_STDDEV', 'CHLOR_MEAN', 'CHLOR_STDDEV', 'BKSCT_RED_MEAN', 'BKSCT_RED_STDDEV', 'CDOM_STDDEV', ' WWND_STDDEV', 'TEMP_IR_UNCOR_STDDEV' })
ds

<xarray.Dataset>
Dimensions:             (obs: 86839, trajectory: 1)
Coordinates:
  * trajectory          (trajectory) float32 1002.0
    time                (trajectory, obs) datetime64[ns] ...
    latitude            (trajectory, obs) float64 ...
    longitude           (trajectory, obs) float64 ...
Dimensions without coordinates: obs
Data variables:
    SOG                 (trajectory, obs) float64 ...
    COG                 (trajectory, obs) float64 ...
    HDG                 (trajectory, obs) float64 ...
    HDG_WING            (trajectory, obs) float64 ...
    BARO_PRES_MEAN      (trajectory, obs) float64 ...
    TEMP_AIR_MEAN       (trajectory, obs) float64 ...
    RH_MEAN             (trajectory, obs) float64 ...
    TEMP_IR_UNCOR_MEAN  (trajectory, obs) float64 ...
    UWND_MEAN           (trajectory, obs) float64 ...
    VWND_MEAN           (trajectory, obs) float64 ...
    WWND_MEAN           (trajectory, obs) float64 ...
    WWND_STDDEV         (trajectory, obs) float64 .

In [3]:
# swap obs for time
ds=ds.isel(trajectory=0)
ds = ds.swap_dims({'obs':'time'})
ds

<xarray.Dataset>
Dimensions:             (time: 86839)
Coordinates:
    trajectory          float32 1002.0
  * time                (time) datetime64[ns] 2018-04-11T18:00:00 ... 2018-06-11T05:57:00
    latitude            (time) float64 ...
    longitude           (time) float64 ...
Data variables:
    SOG                 (time) float64 ...
    COG                 (time) float64 ...
    HDG                 (time) float64 ...
    HDG_WING            (time) float64 ...
    BARO_PRES_MEAN      (time) float64 ...
    TEMP_AIR_MEAN       (time) float64 ...
    RH_MEAN             (time) float64 ...
    TEMP_IR_UNCOR_MEAN  (time) float64 ...
    UWND_MEAN           (time) float64 ...
    VWND_MEAN           (time) float64 ...
    WWND_MEAN           (time) float64 ...
    WWND_STDDEV         (time) float64 ...
    GUST_WND_MEAN       (time) float64 ...
    TEMP_CTD_MEAN       (time) float64 ...
    COND_MEAN           (time) float64 ...
    SAL_MEAN            (time) float64 ...
    CDOM_MEAN

In [6]:
#resample to make time = 1 hr increments

#pd_ds = ds.to_dataframe()
#dshr = pd_ds.set_index('time').groupby(pd.Grouper(freq='1H')).mean()
dshr = ds
#dshr=ds.resample(time='1h', skipna=True, label='left').mean()
#dshr
# does data need to be averaged from beginning of hour to end, or from half way through one hour and half through the other??
#for now, we will average hours from beginning of hour to the end, how resample defaults
#data showed as 18:00 is data from the hour of 18:00 to 18:59

# Do the math stuff in xarray
#then save as a dataframe, then to strings of the data 

In [19]:
time_shift = dshr.time + np.timedelta64(30,'m')

for i in range(10):#dshr.size):
   # HR = str(int(100*(time_shift.dt.hour[i]+time_shift.dt.minute[i]/60)/100))
    #format string output and put hours in correct units (.01hr)
    HR = '{time_shift.dt.hour[i]+time_shift.dt.minute[i]/60:.2f}'
    HR = str(HR)
           
    time_str = str(time_shift.dt.year[i])+str(time_shift.dt.month)+str(time_shift.dt.day)+HR
print(time_str)

#format output to 2 decimal places
ds_hour = str(dshr['time.hour'])
precision = 2
ds_hour = '{ds_hour:.2f}'

# do the same for lat and lon 
lat_shift = ds.latitude
lon_shift = ds.longitude

# do lat and lon need shifted units or not?
for j in range(10):#dshr.size): #dshr has no attribute 'size' ??
    LAT = '{lat_shift:.2f}'
    LON = '{lon_shift:.2f}'
    pos_str = str(LAT[i])+str(LON[i])
print(pos_str)

#ds_hour = dshr.resample(time = '.01H', skipna = True, label = 'left')
#put it in .01 hour for IMMA, need to do the same for LAT and LON

#ds_day = dshr['time.day']
#ds_day 

#ds_month = dshr['time.month']
#ds_month

#ds_year = dshr['time.year']
#ds_year

<xarray.DataArray 'year' ()>
array(2018)
Coordinates:
    trajectory  float32 1002.0
    time        datetime64[ns] 2018-04-11T18:09:00
    latitude    float64 ...
    longitude   float64 ...<xarray.DataArray 'month' (time: 86839)>
array([4, 4, 4, ..., 6, 6, 6])
Coordinates:
    trajectory  float32 1002.0
  * time        (time) datetime64[ns] 2018-04-11T18:00:00 ... 2018-06-11T05:57:00
    latitude    (time) float64 ...
    longitude   (time) float64 ...<xarray.DataArray 'day' (time: 86839)>
array([11, 11, 11, ..., 11, 11, 11])
Coordinates:
    trajectory  float32 1002.0
  * time        (time) datetime64[ns] 2018-04-11T18:00:00 ... 2018-06-11T05:57:00
    latitude    (time) float64 ...
    longitude   (time) float64 ...{time_shift.dt.hour[i]+time_shift.dt.minute[i]/60:.2f}
tt


In [None]:
for i in range(10):#dshr.size):
    IM = '02' 
print(IM)

#ATTC-1 p.19
for i in range(10):#dshr.size):
    ATTC = '  '
print(ATTC) #bc saildrone doesnt have the above
#rename blank strings to something more intuitive later?

for i in range(10):#dshr.size):
    TI_var = 
    LI_var = (pg 19 of manual)

#COG and SOG now
for i in range(10):#dshr.size):
    DS_var = ds.COG
    DV_var = ds.SOG
    
for i in range(10):#dshr.size):
    DSVS = str(DS_var[i])+str(DV_var[i])
print(DSVS)
    
#NID- 2 II-2 ## redo this for correct vars
for i in range(10):#dshr.size):
    NIDtoC1 = '    '
print(NIDtoC1)

for i in range(10):#dshr.size):
    II = p. 20
    ID = 
    C1 = '02'

#need for statements to print out whole data set... nest cell too !!!! READ THIS SENYA

In [9]:
for i in range(10):#dshr.size):
    #DI_var = p 21
    D_var = np.arctan2(ds.VWND_MEAN, ds.UWND_MEAN)*180/np.pi #this is wind TO direction, i think imma format is wind FROM
    #WI_var = p 22
    W_var = np.sqrt(ds.UWND_MEAN**2 + ds.VWND_MEAN**2) #convert vectors to speed
    W_var = W_var/1.9444  #unit conversion (kn to .1 m/s)
    W_var = '{W_var:.1f}'


#is wind speed given at 10m height for IMMA?  if so, you need to convert to 10m wind as follows:
#WS_height=int(ds.UWND_MEAN.installed_height)
#wind_speed_10m = (wind_speed*log(10./1e-4))/log(WS_height/1e-4)


#VI-1 VV-2 WW-2 W1- 1
for i in range(10):#dshr.size):
    VItoW1 = '      '
print(VItoW1)

for i in range(10):#dshr.size):
#reformat the SLP and put in correcnt units
    SLP_var = ds.BARO_PRES_MEAN
    SLP_var = SLP_var*.1
    SLP_var = '{SLP_var:.1f}'

#A-1 PPP-3 IT-1
    AtoIT = '     '
print(AtoIT)

for i in range(10):#dshr.size):
    IT_var =

    AT_var = ds.TEMP_AIR_MEAN
    AT_var = AT_var*.1
    AT_var = '{AT_var:.1f}'

#WBTI-1 WBT-4 DPTI-1 DPT-4 
    WBTItoDPT = '          '

    SI_var = p. 28

    SST_var = ds.TEMP_CTD_MEAN
    SST_var = SST_var*.1
    SST_var = '{SST_var:.1f}'

#N-1 NH-1 CL-1 HI-1 H-1 CM-1 CH-1 
    NtoCH = '       '
print(NtoCH)

#if #wave data (more recent sets) = true
   # for i in range(10):#dshr.size):
     #WD_var =
    #WP_var =
   # WH_var =
#else
    #for i in range(10):#dshr.size):
    #WD-2 WP-2 WH-2
    WDtoWH = '      '
print(WDtoWH)

for i in range(10):#dshr.size):
#SD-2 SP-2 SH-2
    SDtoSH = '      '
print(SDtoSH)

for i in range(10):#dshr.size):
    ATTI_var = '01' (p.32)
    ATTL_var = '65'
    PT = ? #(2 len and included in list below)
    
#everything beyond that I dont think we use (the lengths of them):
##DOUBLE CHECK THIS
#1 3 2 3 3 2 2 1 1 1 1 1 2 12 1 6 14 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 2 2 1 1 1 1 1 1 1 2 2 2 1 2 1 1 1 1 1 1 1 3 1 1 1 20 1 3 3 2 2 3 3 3 8 4 1 1 7 2 2 4 6 1 5 4 4 4 4 4 3 3 5 1 4 4 2 2 2 2 2 2 1 2 2 2 2 3 3 2 2 1 2 3 3 2 3 3 3 3 5 5 2 2 2 5 4 5 4 4 4 4 4 5 4 5 4 3 4 4 4 3 4 4 4 2 4 10 2 2 1 2 1 1 1 2 2 1 3 3 1 1 1 4 4 2 2 2 2 2 2 1 7 7 7 7 7 4 8 1 2 2 2 2 1 6 1 1 6 1 1 6 1 1 6 1 4 8 1 2 2 2 2 1 10 4 8 1 2 2 6 1 1 1 1 1 2 2 1 
#1+3+2+3+3+2+2+1+1+1+1+1+2+12+1+6+14+2+1+2+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+2+1+2+2+2+1+1+1+1+1+1+1+2+2+2+1+2+1+1+1+1+1+1+1+3+1+1+1+20+1+3+3+2+2+3+3+3+8+4+1+1+7+2+2+4+6+1+5+4+4+4+4+4+3+3+5+1+4+4+2+2+2+2+2+2+1+2+2+2+2+3+3+2+2+1+2+3+3+2+3+3+3+3+5+5+2+2+2+5+4+5+4+4+4+4+4+5+4+5+4+3+4+4+4+3+4+4+4+2+4+10+2+2+1+2+1+1+1+2+2+1+3+3+1+1+1+4+4+2+2+2+2+2+2+1+7+7+7+7+7+4+8+1+2+2+2+2+1+6+1+1+6+1+1+6+1+1+6+1+4+8+1+2+2+2+2+1+10+4+8+1+2+2+6+1+1+1+1+1+2+2+1
#607

ALL_var = ' '
for i in range(607):
       ALL_var += ' '
print(ALL_var)

SyntaxError: invalid syntax (<ipython-input-9-2f22f2e374df>, line 31)

In [17]:
#testing printing the rest of the blank string
x = 1+3+2+3+3+2+2+1+1+1+1+1+2+12+1+6+14+2+1+2+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+2+1+2+2+2+1+1+1+1+1+1+1+2+2+2+1+2+1+1+1+1+1+1+1+3+1+1+1+20+1+3+3+2+2+3+3+3+8+4+1+1+7+2+2+4+6+1+5+4+4+4+4+4+3+3+5+1+4+4+2+2+2+2+2+2+1+2+2+2+2+3+3+2+2+1+2+3+3+2+3+3+3+3+5+5+2+2+2+5+4+5+4+4+4+4+4+5+4+5+4+3+4+4+4+3+4+4+4+2+4+10+2+2+1+2+1+1+1+2+2+1+3+3+1+1+1+4+4+2+2+2+2+2+2+1+7+7+7+7+7+4+8+1+2+2+2+2+1+6+1+1+6+1+1+6+1+1+6+1+4+8+1+2+2+2+2+1+10+4+8+1+2+2+6+1+1+1+1+1+2+2+1
y = x 
print(y)
ALL_var = 'i'
for i in range(607):
    ALL_var += 'i'
print(ALL_var)

607
iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii


In [None]:
#combine all seperate strings to one string to export to txt file

### Decode and store data as Pandas

In [None]:
pd_ds = ds.to_dataframe()
pd_ds

In [None]:
pd_ds[['time', 'PITCH']]

In [None]:
#write dataframe to file
pd_ds.to_csv("test1.csv")  
#read it back
pd.read_csv("test1.csv").head()

### Convert new csv file to a text file

In [None]:
# A simple program to create a formatted text file from a *.csv file.

csv_file = input('test1.csv')
txt_file = input('test1.txt')

try:
    my_input_file = open(csv_file, "r")
except IOError as e:
    print("I/O error({0}): {1}".format(e.errno, e.strerror))

if not my_input_file.closed:
    text_list = [];
    for line in my_input_file.readlines():
        line = line.split(",", 2)
        text_list.append(" ".join(line))
    my_input_file.close()

try:
    my_output_file = open(txt_file, "w")
except IOError as e:
    print("I/O error({0}): {1}".format(e.errno, e.strerror))

if not my_output_file.closed:
    my_output_file.write("#1\n")
    my_output_file.write("double({},{})\n".format(len(text_list), 2))
    for line in text_list:
        my_output_file.write("  " + line)
    print('File Successfully written.')
    my_output_file.close()