In [45]:
from datetime import datetime

import time
import os
import sys
from pathlib import Path

import numpy as np
from scipy.stats import lognorm
import pandas as pd

from astropy import stats
from astropy.io import fits
from astropy.time import Time
import astropy.units as u

import matplotlib
matplotlib.use('nbagg')
from matplotlib import style
style.use('ggplot')
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

print(tf.__version__)

1.13.1


In [2]:
%load_ext autoreload
%autoreload 2

In [91]:
# load data and make some subsets for each wfs for inspection later

data = pd.read_csv("../test_data/2019_wfs.csv")
data['ut'] = pd.to_datetime(data.ut)
data['az'][data['az'] < 0.] += 360.

f9 = data[data['wfs'] == 'newf9']
f5 = data[data['wfs'] == 'f5']
mmirs = data[data['wfs'] == 'mmirs']
bino = data[data['wfs'] == 'binospec']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [92]:
f5.head()

Unnamed: 0,ut,airmass,az,cc_x_err,cc_y_err,chamt,el,exptime,file,focerr,...,tiltx,tilty,time,transx,transy,wavefront_rms,wfs,xcen,ycen,comaerr
704,2019-01-24 05:07:19,1.03,238.56,4.124,1.983,2.3,76.2,30.0,manual_wfs_0007.fits,14.14,...,162.39,120.86,2019-01-24T05:07:19,68.69,1699.35,552.881886,f5,265.802711,278.550051,4.575988
705,2019-01-24 05:08:44,1.03,239.38,0.0,0.0,2.4,75.95,30.0,manual_wfs_0008.fits,-8.14,...,167.22,122.87,2019-01-24T05:08:44,24.03,1808.23,402.289146,f5,260.128508,264.503442,0.0
706,2019-01-24 07:38:58,1.02,165.88,0.739,-2.525,2.1,77.55,30.0,manual_wfs_0011.fits,0.72,...,152.47,139.17,2019-01-24T07:38:58,-197.89,1635.01,314.428797,f5,245.999018,238.80233,2.630921
707,2019-01-24 09:44:40,1.58,124.85,-1.613,-3.569,2.6,39.32,30.0,manual_wfs_0012.fits,6.6,...,273.25,155.28,2019-01-24T09:44:40,-391.5,2837.29,532.928487,f5,233.094555,225.136825,3.916571
708,2019-01-24 09:45:58,1.57,125.15,-1.266,-0.254,2.6,39.54,30.0,manual_wfs_0013.fits,-2.19,...,271.12,151.76,2019-01-24T09:45:58,-303.87,2793.77,557.452351,f5,231.522304,235.355972,1.291229


In [93]:
data.columns

Index(['ut', 'airmass', 'az', 'cc_x_err', 'cc_y_err', 'chamt', 'el', 'exptime',
       'file', 'focerr', 'focus', 'fwhm', 'osst', 'outt', 'raw_seeing',
       'residual_rms', 'seeing', 'tiltx', 'tilty', 'time', 'transx', 'transy',
       'wavefront_rms', 'wfs', 'xcen', 'ycen', 'comaerr'],
      dtype='object')

In [94]:
# wrangle the times to add colums for mjd to look for trends over time and hour to look for nightly trends
raw_times = data['time']
times = Time(raw_times.values.tolist(), format='isot', scale='utc')
mjd = times.mjd
data['mjd'] = mjd.tolist()
data['hour'] = data['ut'].dt.hour
data.head()

Unnamed: 0,ut,airmass,az,cc_x_err,cc_y_err,chamt,el,exptime,file,focerr,...,time,transx,transy,wavefront_rms,wfs,xcen,ycen,comaerr,mjd,hour
0,2019-01-07 08:52:20.109,1.0329,11.005093,-0.0,0.0,0.762,75.489591,30.0,f9wfs_20190107-015252.fits,0.0,...,2019-01-07T08:52:20.109000,660.87,1027.74,692.108567,newf9,369.464652,449.093992,0.0,58490.369677,8
1,2019-01-07 08:53:46.527,1.0327,10.026086,0.0,0.0,0.757,75.543728,30.0,f9wfs_20190107-015418.fits,-6.04,...,2019-01-07T08:53:46.527000,661.93,1026.7,487.315068,newf9,378.922652,444.197907,0.0,58490.370677,8
2,2019-01-10 06:37:44.616,1.719,283.541748,-0.0,-0.242,7.682,35.572431,30.0,f9wfs_20190109-233812.fits,7.66,...,2019-01-10T06:37:44.616000,149.41,1633.39,363.252097,newf9,413.161213,441.833007,0.242,58493.276211,6
3,2019-01-10 07:24:51.145,1.6684,262.283493,0.0,-2.982,7.1,36.824057,30.0,f9wfs_20190110-002520.fits,2.61,...,2019-01-10T07:24:51.145000,403.54,1422.66,471.32682,newf9,400.016602,454.68545,2.982,58493.308925,7
4,2019-01-11 06:22:35.246,1.2322,310.203323,3.395,1.458,0.658,54.247805,30.0,f9wfs_20190110-232302.fits,-3.82,...,2019-01-11T06:22:35.246000,206.12,1545.03,605.3651,newf9,407.925506,437.723447,3.694833,58494.265686,6


In [95]:
# trim out columns not relevant to training
trimmed = data.drop(columns=['ut', 'time', 'airmass', 'cc_x_err', 'cc_y_err', 'exptime', 'file', 'focerr', 'fwhm', 'raw_seeing', 'residual_rms', 'seeing', 'wavefront_rms', 'xcen', 'ycen', 'comaerr'])

In [96]:
# assign columns for each wfs so we can use them as features for training
wfs = trimmed.pop('wfs')
trimmed['f9'] = (wfs == 'newf9') * 1
trimmed['f5'] = (wfs == 'f5') * 1
trimmed['mmirs'] = (wfs == 'mmirs') * 1
trimmed['bino'] = (wfs == 'binospec') * 1
trimmed

Unnamed: 0,az,chamt,el,focus,osst,outt,tiltx,tilty,transx,transy,mjd,hour,f9,f5,mmirs,bino
0,11.005093,0.762,75.489591,1069.21,3.70,0.8,209.61,81.65,660.87,1027.74,58490.369677,8,1,0,0,0
1,10.026086,0.757,75.543728,1117.59,3.53,0.8,209.55,81.45,661.93,1026.70,58490.370677,8,1,0,0,0
2,283.541748,7.682,35.572431,300.51,10.20,7.3,257.64,171.82,149.41,1633.39,58493.276211,6,1,0,0,0
3,262.283493,7.100,36.824057,531.36,8.85,6.6,241.41,152.30,403.54,1422.66,58493.308925,7,1,0,0,0
4,310.203323,0.658,54.247805,392.71,4.70,1.0,263.71,142.77,206.12,1545.03,58494.265686,6,1,0,0,0
5,217.249859,0.909,55.256390,1208.70,1.02,1.4,248.64,117.48,538.81,1342.69,58494.339418,8,1,0,0,0
6,98.613086,1.138,77.634575,1301.08,0.82,1.5,214.77,85.30,648.72,870.49,58494.398441,9,1,0,0,0
7,60.113495,1.384,80.221469,1399.46,0.90,1.2,224.39,78.69,695.02,991.74,58494.422679,10,1,0,0,0
8,156.085442,0.053,60.188801,1412.41,0.28,0.4,264.32,114.21,519.48,1565.18,58494.468360,11,1,0,0,0
9,32.607925,0.075,79.876987,1485.28,0.20,0.2,193.71,66.91,834.28,906.10,58494.486702,11,1,0,0,0


In [99]:
labels = ['focus', 'tiltx', 'tilty', 'transx', 'transy']
train_dataset = trimmed.sample(frac=0.8, random_state=0)
test_dataset = trimmed.drop(train_dataset.index)

train_stats = train_dataset.describe()
train_stats = train_stats.drop(columns=labels)
train_stats = train_stats.transpose()
train_stats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
az,5643.0,193.663207,114.727689,0.065961,95.174252,191.194366,307.035377,373.046695
chamt,5643.0,4.876816,3.132168,-6.23,2.0865,5.193,7.0935,14.2
el,5643.0,57.844113,11.160818,26.619272,51.007144,57.304042,63.776416,89.38
osst,5643.0,4.897111,3.188202,-3.75,1.5,5.38,7.3,13.5
outt,5643.0,5.244639,3.072589,-7.0,2.7,5.6,7.3,14.5
mjd,5643.0,58549.483299,28.963657,58490.370677,58522.537756,58570.460965,58573.381926,58612.390602
hour,5643.0,6.907673,3.162135,1.0,4.0,7.0,10.0,13.0
f9,5643.0,0.007443,0.085958,0.0,0.0,0.0,0.0,1.0
f5,5643.0,0.0521,0.222248,0.0,0.0,0.0,0.0,1.0
mmirs,5643.0,0.097111,0.296136,0.0,0.0,0.0,0.0,1.0


In [100]:
train_labels = {}
test_labels = {}
for l in labels:
    train_labels[l] = train_dataset.pop(l)
    test_labels[l] = test_dataset.pop(l)