In [13]:
import os
import re
import numpy as np
import matplotlib.pyplot as plt

from rhlUtils import imshow#, makePsf

%matplotlib notebook 
%config InlineBackend.figure_format = 'retina'

#%matplotlib qt
#%gui qt

dataDir = "./"

Read the data.  There are three items in the pickle:

- The data (a dict indexed by "fiberId" containing numpy arrays):
 - pixelPos           Measured centroid of an arc line (pixels)
 - pixelPosErr        Error in pixelPos
 - refWavelength      Nominal wavelength (from NIST)
 - modelFitWavelength Wavelength corresponding to pixelPos, based on instrument model
 - status             bitwise OR of flags (see statusFlags)
- A dict giving the meaning of the status bits
- An linear approximation to the wavelength solution:  wavelength = wavelength0 + nmPerPix*i

In [45]:
import pickle

with open(os.path.join(dataDir, "arclines.pickle"), "rb") as fd:
    data, statusFlags, (wavelength0, nmPerPix) = pickle.load(fd)
    print(wavelength0)
    print(data[5])

621.064
{'pixelPos': array([  137.5663147 ,   193.48243713,   215.64245605,   335.57107544,
         365.88891602,   441.81820679,   502.8387146 ,   532.92498779,
         577.44696045,   821.34313965,     0.        ,   970.4039917 ,
        1102.35534668,  1184.33239746,  1407.3927002 ,  1446.03283691,
        1464.98632812,  1519.10620117,  1528.63330078,  1990.54211426,
        2152.37182617,  2194.40063477,  2215.19824219,  2358.61181641,
        2366.62353516,  2406.57788086,  2483.00756836,  2597.39013672,
        2622.26367188,  2634.9609375 ,  2692.93408203,  2724.3828125 ,
        2747.71166992,  2798.83129883,  2813.52197266,  2822.15258789,
        2880.79907227,  2960.56665039,  3057.9609375 ,  3071.99755859,
        3135.88891602,  3218.06030273,  3409.19140625,  3472.82226562,
        3561.59619141,  3591.92749023,  3622.78491211,  3679.2590332 ,
        3742.36035156,  3783.30883789,  3816.60498047,  3874.30200195,
        3890.43481445,  4034.59960938]), 'pixelPosErr': 

Plot some of the data.  A good place to start would be the pixel position (pixelPos) and the reference wavelength (refWavelength) for a fibre, then use wavelength0 and nmPerPix to construct an approximate (linear) wavelength solution and look at the residuals.

In [46]:
fiberId = 5
assert fiberId in data, "Unknown fiberId: %d" % fiberId
pixelPos = data[5]['pixelPos']
refWavelength = data[5]['refWavelength']
modelFitWavelength = data[5]['modelFitWavelength']
print(pixelPos)
print(refWavelength)
#print(modelFitWavelength)
plt.plot(pixelPos, refWavelength)
print(wavelength0)
print(nmPerPix)

wavelengths=nmPerPix*pixelPos+wavelength0

plt.plot(pixelPos, wavelengths, color='b', linestyle='--')

residuals=wavelengths-refWavelength
print(residuals)

[  137.5663147    193.48243713   215.64245605   335.57107544   365.88891602
   441.81820679   502.8387146    532.92498779   577.44696045   821.34313965
     0.           970.4039917   1102.35534668  1184.33239746  1407.3927002
  1446.03283691  1464.98632812  1519.10620117  1528.63330078  1990.54211426
  2152.37182617  2194.40063477  2215.19824219  2358.61181641  2366.62353516
  2406.57788086  2483.00756836  2597.39013672  2622.26367188  2634.9609375
  2692.93408203  2724.3828125   2747.71166992  2798.83129883  2813.52197266
  2822.15258789  2880.79907227  2960.56665039  3057.9609375   3071.99755859
  3135.88891602  3218.06030273  3409.19140625  3472.82226562  3561.59619141
  3591.92749023  3622.78491211  3679.2590332   3742.36035156  3783.30883789
  3816.60498047  3874.30200195  3890.43481445  4034.59960938]
[ 633.61791992  638.47558594  640.40179443  650.83251953  653.46875
  660.07751465  665.39294434  668.01202393  671.88977051  693.13787842
  703.43518066  706.10540771  717.5915527

<IPython.core.display.Javascript object>

621.064
0.0855412
[ -0.78637104  -0.86090631  -0.89152077  -1.063411    -1.10621779
  -1.21990178  -1.31556561  -1.36103007  -1.43031485  -1.8152573
 -82.37121582  -2.03194635  -2.23081757  -2.34316028  -2.64067709
  -2.69033491  -2.71270778  -2.77475863  -2.78706986  -3.19928623
  -3.28764024  -3.30261507  -3.30969613  -3.34258983  -3.32717988
  -3.33515847  -3.3412186   -3.32047131  -3.30189077  -3.30773175
  -3.28357744  -3.25998106  -3.25537001  -3.22262484  -3.20571345
  -3.20175446  -3.15936954  -3.09230493  -2.98428886  -2.9727888
  -2.88336261  -2.76179644  -2.42808664  -2.29514786  -2.07937381
  -2.01885652  -1.9453775   -1.79585034  -1.60670767  -1.48795928
  -1.38763193  -1.20161167  -1.14605485  -0.61871656]


Take a look at the statusFlags and the values of status from your fibre.  You probably want to ignore some of the data (Hint: I muttered about bad lines in the lecture)

In [56]:
rmmask=np.where(data[5]['status']!=1)[0]
print(rmmask)
pixelPos = np.delete(pixelPos, rmmask)
refWavelength = np.delete(refWavelength, rmmask)
modelFitWavelength = np.delete(modelFitWavelength, rmmask)
wavelengths=np.delete(wavelengths, rmmask)

plt.plot(pixelPos, wavelengths, color='b', linestyle='--')
residuals=wavelengths-refWavelength
print(residuals)

[ 9 10 13]


<IPython.core.display.Javascript object>

[-0.78637104 -0.86090631 -0.89152077 -1.063411   -1.10621779 -1.21990178
 -1.31556561 -1.36103007 -1.43031485 -2.77475863 -2.78706986 -3.28764024
 -3.30261507 -3.30969613 -3.34258983 -3.32717988 -3.33515847 -3.3412186
 -3.32047131 -3.30189077 -3.30773175 -3.28357744 -3.25998106 -3.25537001
 -3.22262484 -3.20571345 -3.20175446 -3.15936954 -3.09230493 -2.98428886
 -2.9727888  -2.88336261 -2.76179644 -2.42808664 -2.29514786 -2.07937381
 -2.01885652 -1.9453775  -1.79585034 -1.60670767 -1.48795928 -1.38763193
 -1.20161167 -1.14605485 -0.61871656]


Let's concentrate on just one fiber for now; choose a fibre, any fibre.  We want to fit a better model of the wavelength solution.  The CCD has 4096 rows, and we want a solution valid over all of them.

Experiment with a range of order of fitter, and look at the rms error in the wavelength solution.  You can look at $\chi^2/\nu$ too, if you like, but I think you'll find that the centroiding errors are wrong.

You probably want to look at the fit and at the residuals from the fit.

In [69]:
import numpy.polynomial.chebyshev
myFiberId = 315

nrow = 4096
fitOrder = 2

plt.plot(refWavelength, residuals)
fit=numpy.polynomial.chebyshev.chebfit(refWavelength, pixelPos, fitOrder)
print(fit)
#plt.plot(refWavlength,)
plt.plot(refWavelength, (fit[0]+fit[1]*refWavelength+fit[2]**2*refWavelength), color='r', linestyle='--')

<IPython.core.display.Javascript object>

[ -6.56090669e+03   9.85954157e+00   5.66542515e-04]


[<matplotlib.lines.Line2D at 0x11659ee48>]

Now repeat the preceeding exercise using the model of the spectrograph (i.e. the modelFitWavelength not your linear approximation).  What order of polynomial is needed now?

Is that rms error honest, or are we overfitting?  Modify your code to hold back some number of arclines from the fit and measure the rms only of those ones.

If this was all

I was nice and gave you clean (but real) data.  In the real world you'd probably want to do an n-sigma clip on the residuals and iterate.  Implement this.