# Import modules

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import copy
import pytz
import datetime 
import cPickle as pickle
import astral
import astropy.time as aptime
from scipy import stats
from scipy import interpolate
import os
import glob

# Load GPI raw data

In [5]:
txt_file1 = pd.read_csv('IFS_AllOnSky_RawDistorcorr_CleanSee30_20190212.txt')
txt_file1_copy = copy.copy(txt_file1)
raw_contrast_data= pd.DataFrame(txt_file1_copy)
print len(raw_contrast_data)

raw_contrast_data['dts'] = aptime.Time(raw_contrast_data['MJDOBS'], format='mjd').datetime 
raw_contrast_data['dts'] = raw_contrast_data['dts'].dt.tz_localize(pytz.UTC)

27538


# GPI logs

In [8]:
#  Get list of all the datafiles we want to read in  
path = os.environ["HOME"] + r'/Downloads/individual/'  
all_data_files = pd.Series(glob.glob(os.path.join(path, "*.log")))
data_files = all_data_files[all_data_files.str.contains('OMSS_AO_Bench_temperature|OE_Interior_Air_temperature|OMSS_Interface_Structure_temperature|OMSS_Mounting_Frame_temperature|Outside_OE_temperature')]

#  load and concat csv files 
gpi_logs = pd.concat(map(lambda file: pd.read_csv(file, header=None, dtype={'0':object,'1': np.float64}, 
        names = ['dts',os.path.splitext(os.path.basename(file))[0]]), data_files),axis = 1)

#  remove duplicated columns and sort by date
gpi_logs = gpi_logs.iloc[:, ~gpi_logs.columns.duplicated()] # remove duplicate columns
gpi_logs['dts'] = pd.to_datetime(gpi_logs['dts']).dt.tz_localize(pytz.UTC) 
gpi_logs = gpi_logs.sort_values(by='dts').set_index(np.arange(len(gpi_logs)))

#  Remove outlier points using a mask
mask = (gpi_logs.loc[:,list(gpi_logs.columns != 'dts')] < 40.) & (gpi_logs.loc[:,list(gpi_logs.columns != 'dts')] != 0) & (gpi_logs.loc[:,list(gpi_logs.columns != 'dts')] > -40.)
mask['dts'] = True
gpi_logs = gpi_logs[mask]


merge gpi logs with raw contrast data

In [9]:
raw_contrast_data = pd.merge_asof(raw_contrast_data,gpi_logs, on='dts',tolerance=pd.Timedelta('60min'))

In [10]:
sensor_list = raw_contrast_data.columns[-5:]

print len(raw_contrast_data)
for sensor in sensor_list:
    print sensor,sum(~np.isfinite(pd.to_numeric(raw_contrast_data[sensor],errors='coerce')))  

24155
OE_Interior_Air_temperature 3114
OMSS_AO_Bench_temperature 7573
OMSS_Interface_Structure_temperature 3114
OMSS_Mounting_Frame_temperature 3114
Outside_OE_temperature 3983


# Load GS env data

In [11]:
txt_file2 = pd.read_csv('GS_env_20180527.csv')
txt_file2_copy = copy.copy(txt_file2)
GS_env_data = pd.DataFrame(txt_file2_copy)
print len(GS_env_data)

GS_env_data['dts'] = aptime.Time(GS_env_data['MJDOBS'], format='mjd').datetime 
GS_env_data['dts'] = GS_env_data['dts'].dt.tz_localize(pytz.UTC)

320689


In [13]:
GS_env_data = GS_env_data.drop(['MJDOBS'], axis=1)

# Calibrate raw WFE measurements

In [14]:
raw_contrast_data['cal_wfe']= raw_contrast_data['RawDPwfe']*.81 - 20.06

# Merge GS and GPI data

In [15]:
GS_GPI_data = pd.merge_asof(raw_contrast_data,GS_env_data,on='dts',tolerance=pd.Timedelta('30min'))

In [16]:
GS_GPI_data.keys()

Index([u'DATAFILE', u'DATALAB', u'OBJNAME', u'DATESTR', u'UTSTART', u'MJDOBS',
       u'ITIME', u'COADDS', u'OBSMODE', u'DISPERSR', u'IFSFILT', u'AOFRAMES',
       u'AOSPATIA', u'HMAG', u'IMAG', u'AOFLUX', u'RawDPwfe', u'PAR_ANG',
       u'PA', u'IAA', u'AZIMUTH', u'ELEVATIO', u'AIRMASS', u'AMSTART',
       u'AMEND', u'WINDM2', u'WINDM2DR', u'WINDDIRE', u'WINDSPEE', u'TAMBIENT',
       u'OMSATEMP', u'GLITEMP', u'GLOTEMP', u'MASSSEE', u'MASSTAU', u'DIMMSEE',
       u'MASS05CN', u'MASS1CN2', u'MASS2CN2', u'MASS4CN2', u'MASS8CN2',
       u'MASS16CN', u'MASSISOP', u'DRPDATE', u'CONTR025', u'CONTR040',
       u'CONTR080', u'dts', u'OE_Interior_Air_temperature',
       u'OMSS_AO_Bench_temperature', u'OMSS_Interface_Structure_temperature',
       u'OMSS_Mounting_Frame_temperature', u'Outside_OE_temperature',
       u'cal_wfe', u'T_M1+Y', u'T_M1-Y', u'T_TrussAirLow+X',
       u'T_TrussAirMid+X', u'T_TrussAirTop+X', u'T_TrussSurLow+X',
       u'T_TrussSurMid+X', u'T_TrussSurTop+X', u'T_twr', u'

# Add sunset, sunrise, night number columns to raw gpi data

Need to include a description of the astral package and how it's used to compute dates...

In [17]:
def sunrise_and_sunset_info(dataframe):
    #  Create colums with datetimes and make them timezone aware
    #dataframe['dts'] = aptime.Time(dataframe['MJDOBS'], format='mjd').datetime 
    #dataframe['dts'] = dataframe['dts'].dt.tz_localize(pytz.UTC)

    #  Create colums with sunrise and sunset times in UTC time
    location = astral.Astral()['Santiago']
    timezone=location.timezone
    dataframe['sunset_utc'] = dataframe['dts'].apply(astral.Astral().sunset_utc,args=(location.latitude,location.longitude))
    dataframe['sunrise_utc'] = dataframe['dts'].apply(astral.Astral().sunrise_utc,args=(location.latitude,location.longitude))

    #  Calculate time difference from sunset in hours
    dataframe['time_from_sunset'] = dataframe['dts']-dataframe['sunset_utc']

    #  Dates that use sunsets of the following day
    ind = np.where(dataframe['time_from_sunset'].dt.days == -1)[0]
    dataframe.loc[ind,'sunset_utc'] = (dataframe['dts'][ind]-datetime.timedelta(days =1)).apply(astral.Astral().sunset_utc,args=(location.latitude,location.longitude))

    #  Calculate time difference of observation from sunset after adjusting sunset times
    #  Calculate time difference of observation from sunset in hours
    dataframe['time_from_sunset'] = (dataframe['dts']-dataframe['sunset_utc'])
    dataframe['hours_from_sunset'] = dataframe['time_from_sunset'].dt.seconds/3600.

    #  Make changes when dates that use sunsets of following date
    ind1 = np.where(dataframe['sunset_utc'] > dataframe['dts'])[0]
    dataframe.loc[ind1,'sunset_utc'] = (dataframe.loc[ind1,'dts']-datetime.timedelta(days =1)).apply(astral.Astral().sunset_utc,args=(location.latitude,location.longitude))

    #  Make changes when dates that use sunrise of following date
    ind2 = np.where(dataframe['sunrise_utc'] > dataframe['dts'])[0]
    dataframe.loc[ind2,'sunrise_utc'] = (dataframe.loc[ind2,'dts']-datetime.timedelta(days =1)).apply(astral.Astral().sunrise_utc,args=(location.latitude,location.longitude))

    #  Calculate time difference of observation from sunset after adjusting sunset times
    #  Calculate time difference of observation from sunset in hours
    dataframe['time_from_sunset'] = (dataframe['dts']-dataframe['sunset_utc'])
    dataframe['hours_from_sunset'] = dataframe['time_from_sunset'].dt.seconds/3600.

    #  Calculate time difference of observation from sunrise after adjusting sunset times
    #  Calculate time difference of observation from sunrise in hours
    dataframe['time_from_sunrise'] = (dataframe['dts']-dataframe['sunrise_utc'])
    dataframe['hours_from_sunrise'] = dataframe['time_from_sunrise'].dt.seconds/3600.


    #  Assign column with night number
    dataframe['night_number'] = np.ones(len(dataframe))
    d0 = dataframe['sunset_utc'].dt.date
    d1 = np.roll(d0,-1)
    d1[-1] = np.nan
    extrema = np.where(d0 != d1)[0]
    start = 0
    val = 1

    for new_night in extrema:
        dataframe.loc[start:new_night,'night_number'] = val
        start = new_night + 1
        val = val + 1
    
    #  Select data only taken during night hours
    night_hrs = np.where(dataframe['hours_from_sunset'] < dataframe['hours_from_sunrise'])[0]
    dataframe = dataframe.iloc[night_hrs]
    
    new_ind = np.arange(len(dataframe))
    dataframe = dataframe.set_index(new_ind)
    
    return dataframe

In [18]:
GS_GPI_data = sunrise_and_sunset_info(GS_GPI_data)

In [4]:
raw_contrast_data = sunrise_and_sunset_info(raw_contrast_data)

# Compute dT for temparature column

The purpose of this function is to compute derivatives of temperature data that are sampled unevenly. I accompish this by interpolating temperatures for each night of data and then taking the derivative of that interpolated function. The input for the function is the name of the IFS dataframe and the name of the temperature sensor. The output is a list of rates of temperature change for all the nights in order.   

1. I create an empty dataframe where I will append derivative values for all nights of data in order. 

2. I seperate the IFS data frame into single night dataframes. Each single night dataframe consists of the exact same columns as the IFS dataframe.

3. I itterate over each night of data and check whether there are enough data points to interpolate over. If there are less than four data points, then I append nans in the empty dataframe as placeholders.

4. If there are more than four data points, I store the time stamps(MJD format) in one variable and the temperature values in another variable.

5. I calculate the number of knots to interpolate over by inputing the list of time stamps, the list of temperature values, and the list of weights calculated by taking the standard deviation of temperatures and diving it from 1. Documentation of the function I used to calculate knots is listed below. 
http://man.hubwiz.com/docset/SciPy.docset/Contents/Resources/Documents/doc/generated/scipy.interpolate.splrep.html

6. I interpolate over the data points using a spline fit and then I take it's derivative. Documentation of the function I used to interpolate through knots is listed below.  
http://man.hubwiz.com/docset/SciPy.docset/Contents/Resources/Documents/doc/generated/scipy.interpolate.splev.html#scipy.interpolate.splev

7. I append each night of  derivative values in the empty dataframe I had previously created. 

8. I return the dataframe consiting of one column with derivative values for all the nights in order. 

In [19]:
def calc_dspline_fit(dataframe,temp):
    grouped = dataframe.groupby(['OBJNAME','DATESTR'])
    
    for i in dataframe.groupby(['OBJNAME','DATESTR']):
        x = i[1]['MJDOBS']*24
        y = pd.to_numeric(i[1][temp],errors='coerce')
        
        if len(x)>4:
            tck = interpolate.splrep(x,y,w = np.ones(len(np.isfinite(y))) * 1/(np.std(y)+.001))
            dspline_fit = interpolate.splev(x, tck, der=1)#  evaluates the derivative of the spline
            dataframe.loc[i[1].index,temp +'_dT'] =  dspline_fit
        else:
            dataframe.loc[i[1].index,temp +'_dT'] =  np.nan
    return dataframe
         

Make a column for the temperature difference and for the temperature rates of change for the AO bench in the IFS dataframe. 

In [23]:
col = list(gpi_logs.columns)[1:6] + ['OMSATEMP']
 
for i in col:    
    GS_GPI_data = calc_dspline_fit(GS_GPI_data,i) 

Create seperate data tables for each sensor

# Filter data using conditions (optional)

In [20]:
date1 = datetime.date(year = 2014, month = 1, day =1)
#date2 = datetime.date(year = 2017, month = 8, day =1)

#  AO system requirements
#cond1 = (merge_data['dts'] > date1) & (merge_data['dts'] < date2)
cond1 = (merge_data['dts'] > date1)
cond2 = merge_data['COADDS'] == 1
cond3 = merge_data['IFSFILT'] == 'H'
cond4 = merge_data['OBSMODE'] == 'H_coron'
cond5 = merge_data['AOFRAMES'] == 1000

#  Good seeing conditions
cond6 = merge_data['IMAG'] < 7  #Bright Stars
cond7 = merge_data['MASSTAU'] > 1.0  #Good seeing
#cond8 = merge_data['DIMMSEE'] < 2.0  #Good seeing

#  dT outliers
#cond9 = np.abs(merge_data['OMSA_dT']) < .5
#cond10 = np.abs(merge_data['M1_low_dT']) < .5

ind = np.where(cond1 & cond2 & cond3 & cond4 & cond5 & cond6 & cond7)[0]  # encorporates seeing
print len(ind)

filtered_merge_data = merge_data.iloc[ind]
new_ind = np.arange(len(filtered_merge_data))
filtered_merge_data = filtered_merge_data.set_index(new_ind)

print len(filtered_merge_data)

2639
2639


In [21]:
np.sum(np.isfinite(raw_contrast_data['MASSTAU']))

6183

# Save data table as CSV file

In [23]:
GS_GPI_data.keys()

Index([u'DATAFILE', u'DATALAB', u'OBJNAME', u'DATESTR', u'UTSTART', u'MJDOBS',
       u'ITIME', u'COADDS', u'OBSMODE', u'DISPERSR', u'IFSFILT', u'AOFRAMES',
       u'AOSPATIA', u'HMAG', u'IMAG', u'AOFLUX', u'RawDPwfe', u'PAR_ANG',
       u'PA', u'IAA', u'AZIMUTH', u'ELEVATIO', u'AIRMASS', u'AMSTART',
       u'AMEND', u'WINDM2', u'WINDM2DR', u'WINDDIRE', u'WINDSPEE', u'TAMBIENT',
       u'OMSATEMP', u'GLITEMP', u'GLOTEMP', u'MASSSEE', u'MASSTAU', u'DIMMSEE',
       u'MASS05CN', u'MASS1CN2', u'MASS2CN2', u'MASS4CN2', u'MASS8CN2',
       u'MASS16CN', u'MASSISOP', u'DRPDATE', u'CONTR025', u'CONTR040',
       u'CONTR080', u'dts', u'cal_wfe', u'T_M1+Y', u'T_M1-Y',
       u'T_TrussAirLow+X', u'T_TrussAirMid+X', u'T_TrussAirTop+X',
       u'T_TrussSurLow+X', u'T_TrussSurMid+X', u'T_TrussSurTop+X', u'T_twr',
       u'OE_Interior_Air_temperature', u'OMSS_AO_Bench_temperature',
       u'OMSS_Interface_Structure_temperature',
       u'OMSS_Mounting_Frame_temperature', u'Outside_OE_temperature',
  

In [31]:
GS_GPI_data.to_csv('IFS_RawContrast_Gemini_env_20180530_all_seeing.csv',index=False)