spacetelescope · cmagness · Feb 6, 2020 · Mar 10, 2020 · Mar 16, 2020 · jwhite3
diff --git a/.gitignore b/.gitignore
@@ -71,3 +71,7 @@ target/
 
 # Finder
 .DS_Store
+
+# Misc
+testoutputs/
+*.ipynb
diff --git a/cosmo/monitors/data_models.py b/cosmo/monitors/data_models.py
@@ -1,7 +1,9 @@
 import pandas as pd
 import numpy as np
+import os
 
 from typing import List
+from glob import glob
 from monitorframe.datamodel import BaseDataModel
 from peewee import OperationalError
 
@@ -192,3 +194,104 @@ def get_new_data(self):
         data_results = data_results.replace([np.inf, -np.inf], np.nan).dropna().reset_index(drop=True)
 
         return data_results[~data_results.EXPTYPE.str.contains('ACQ|DARK|FLAT')]
+
+
+class NUVDarkDataModel(BaseDataModel):
+    """Datamodel for NUV Dark files."""
+    files_source = FILES_SOURCE
+    subdir_pattern = '?????'
+
+    def get_new_data(self):
+        header_request = {
+            0: ['ROOTNAME'],
+            1: ['EXPSTART','EXPTIME']
+            }
+
+        table_request = {
+            1: ['TIME','XCORR','YCORR'],
+            3: ['TIME','LATITUDE','LONGITUDE','DARKRATE']
+            }
+
+        # any special data requests
+        # TODO: add spt support for temp, sun_lat, sun_long
+        # TODO: is this a good place to add solar data scraping in the future?
+
+        # this is temporary to find the files from the dark programs until
+        # we can add the dark files to the monitor_data database
+        files = []
+        # 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17
+        program_ids = ['15776/', '15538/', '14942/', '14521/', '14442/', '13974/',
+                       '13528/', '13126/', '12720/', '12420/', '11894/']
+        # program_ids = ['15776/']
+        for program in program_ids:
+            new_files_source = os.path.join(FILES_SOURCE, program)
+            subfiles = glob(os.path.join(new_files_source, "*corrtag*"))
+            files += subfiles
+
+        if not files:  # No new files
+            return pd.DataFrame()
+
+        # need to add any other keywords that need to be set
+        data_results = data_from_exposures(files,
+                                           header_request=header_request,
+                                           table_request=table_request)
+
+        return data_results
+
+
+class CorrtagDataModel(BaseDataModel):
+    """Datamodel for all NUV Dark files."""
+    files_source = FILES_SOURCE
+    subdir_pattern = '?????'
+
+    def get_new_data(self):
+        header_request = {
+            0: ['ROOTNAME'],
+            1: [
+                'EXPSTART',
+                'EXPTIME',
+                'EXPTYPE',
+                'DETECTOR'
+                ]
+            }
+
+        table_request = {
+            1: [
+                'TIME',
+                'XCORR',
+                'YCORR'
+                ],
+            3: [
+                'LATITUDE',
+                'LONGITUDE'
+                ]
+
+            }
+        # TODO: add gross counts, don't need PHA, lat or long right now
+
+        # any special data requests
+        # TODO: add spt support for temp, sun_lat, sun_long
+        # TODO: is this a good place to add solar data scraping in the future?
+
+        # this is temporary to find the files from the dark programs until
+        # we can add the dark files to the monitor_data database
+        # files = []
+        # program_ids = ['15776/']
+        # for program in program_ids:
+        #     new_files_source = os.path.join(FILES_SOURCE, program)
+        #     subfiles = glob(os.path.join(new_files_source, "*corrtag*"))
+        #     # TODO: figure out why this wasn't working with find_files()
+        #     files += subfiles
+
+        files = find_files('*corrtag*', data_dir=self.files_source,
+                           subdir_pattern=self.subdir_pattern)
+
+        if not files:  # No new files
+            return pd.DataFrame()
+
+        # need to add any other keywords that need to be set
+        data_results = data_from_exposures(files,
+                                           header_request=header_request,
+                                           table_request=table_request)
+
+        return data_results
diff --git a/cosmo/monitors/nuv_dark_monitor.py b/cosmo/monitors/nuv_dark_monitor.py
@@ -0,0 +1,230 @@
+import numpy as np
+import plotly.graph_objs as go
+import plotly.express as px
+import datetime
+import pandas as pd
+
+from peewee import Model
+from monitorframe.monitor import BaseMonitor
+from astropy.time import Time
+from typing import List, Union
+
+from .data_models import NUVDarkDataModel
+from ..monitor_helpers import fit_line, convert_day_of_year, create_visibility, v2v3
+from .. import SETTINGS
+
+import os
+import yaml
+import cosmo
+import numpy
+import itertools
+from glob import glob
+from astropy.io import fits
+import matplotlib.pyplot as plt
+from matplotlib.ticker import FormatStrFormatter
+from cosmo.monitors.data_models import NUVDarkDataModel
+from cosmo.filesystem import find_files, data_from_exposures, data_from_jitters
+from cosmo.monitor_helpers import absolute_time, explode_df
+from monitorframe.datamodel import BaseDataModel
+from monitorframe.monitor import BaseMonitor
+
+# these imports are so messy i will fix this later
+
+COS_MONITORING = SETTINGS['output']
+
+
+class NUVDarkMonitor(BaseMonitor):
+    data_model = NUVDarkDataModel
+    # TODO: update docs
+    # docs = "https://spacetelescope.github.io/cosmo/monitors.html#<darks>"
+    output = COS_MONITORING
+
-
+    # Plot stuff
+    x = 'TIME'
+    y= 'DARK_RATE'
+    plot_type = 'scatter'
-
+    # Plot stuff
+    x = 'TIME'
+    y= 'DARK_RATE'
+    plot_type = 'scatter'
+    run = 'monthly'
+
+    def get_data(self):
+        # access data, perform any filtering required for analysis
-        # access data, perform any filtering required for analysis
+        """access data, perform any filtering required for analysis"""
-        # access data, perform any filtering required for analysis
+        """access data, perform any filtering required for analysis"""
+        data = self.model.new_data
+        dark_rate_column = []
+        dec_year_column = []
+
+        xlim = [0, 1024]
+        ylim = [0, 1024]
+
+        # parallelize, this is going to get bad when looking at a lot of data
+        for index, row in data.iterrows():
+            subdf = pd.DataFrame({
+                "EXPSTART": row["EXPSTART"], "TIME": [row["TIME"]],
+                "XCORR": [row["XCORR"]], "YCORR": [row["YCORR"]],
+                "TIME_3": [row["TIME_3"]]
+                })
+
+            # this is temporary until i understand james' suggestion to use
+            # df.apply and lambda functions
+            xcorr = subdf["XCORR"][0]
+            ycorr = subdf["YCORR"][0]
+            filtered_xcorr = xcorr[
+                np.where((xcorr > xlim[0]) & (xcorr < xlim[1]))]
+            filtered_ycorr = ycorr[
+                np.where((ycorr > ylim[0]) & (ycorr < ylim[1]))]
+            subdf["XCORR"] = [filtered_xcorr]
+            subdf["YCORR"] = [filtered_ycorr]
+
+            dark_rate_array, dec_year_array = self.calculate_dark_rate(subdf,
+                                                                       xlim,
+                                                                       ylim)
+            dark_rate_column.append(dark_rate_array)
+            dec_year_column.append(dec_year_array)
+
+        data["DARK_RATE"] = dark_rate_column
+        data["DECIMAL_YEAR"] = dec_year_column
+
+        # when the monitor method of the monitor is called, it will
+        # initialize the self.data attribute
+        # with this method and then can be used by the other methods
+        return data
+
+    def calculate_dark_rate(self, dataframe, xlim, ylim):
+        # calculate dark rate for one exposure, with a dataframe with TIME,
-        # calculate dark rate for one exposure, with a dataframe with TIME,
+        """calculate dark rate for one exposure, with a dataframe with TIME, XCORR, and YCORR values"""
-        # calculate dark rate for one exposure, with a dataframe with TIME,
+        """calculate dark rate for one exposure, with a dataframe with TIME, XCORR, and YCORR values"""
+        # XCORR, and YCORR values
+
+        # need to set this somewhere
+        timestep = 25
+        time_bins = dataframe["TIME_3"][0][::timestep]
+
+        counts = np.histogram(dataframe["TIME_3"][0], bins=time_bins)[0]
+        npix = float((xlim[1] - xlim[0]) * (ylim[1] - ylim[0]))
+        dark_rate_array = counts / npix / timestep
+        # save the whole histogram in time bins, and then plot each of them
+
+        # make a decimal year array corresponding to the time bins of the
+        # dark rates
+        # do this with the expstart (mjd) and time array from the timeline
+        # extension
+        # taking the expstart, binning the time array by the timestep,
+        # removing the last element in the array (bin by front edge),
+        # and then multiplying by the conversion factor
+        # this is done by the absolute_time helper function
+        mjd_array = absolute_time(expstart=dataframe['EXPSTART'][0],
+                                  time=dataframe['TIME_3'][0][::timestep][:-1])
+        dec_year_array = mjd_array.decimalyear
+
+        return dark_rate_array, dec_year_array
+
+    def track(self):
+        # track something. perhaps current dark rate?
+        if self.data is None:
+            self.data = self.get_data()
+
+        plotdf = pd.DataFrame({
+            "DECIMAL_YEAR": self.data["DECIMAL_YEAR"],
+            "DARK_RATE": self.data["DARK_RATE"]
+            })
+
+        # i can't get explode_df to work so this is for now
+        # i think i know why it doesn't work and i fixed it i just haven't
+        # switched to using it
+        all_dec_year = []
+        all_dark_rates = []
+        for index, row in plotdf.iterrows():
+            all_dec_year = list(
+                itertools.chain(all_dec_year, row["DECIMAL_YEAR"]))
+            all_dark_rates = list(
+                itertools.chain(all_dark_rates, row["DARK_RATE"]))
+
+        dark_counts = np.asarray(all_dark_rates)
+        fig = plt.figure(figsize=(12, 9))
+        bin_size = 1e-8
+        n_bins = int((dark_counts.max() - dark_counts.min()) / bin_size)
+        ax = fig.add_subplot(2, 1, 1)
+        ax.hist(dark_counts, bins=n_bins, align='mid', histtype='stepfilled')
+        counts, bins = np.histogram(dark_counts, bins=100)
+        cuml_dist = np.cumsum(counts)
+        count_99 = abs(cuml_dist / float(cuml_dist.max()) - .99).argmin()
+        count_95 = abs(cuml_dist / float(cuml_dist.max()) - .95).argmin()
+
+        mean = dark_counts.mean()
+        med = np.median(dark_counts)
+        std = dark_counts.std()
+        mean_obj = ax.axvline(x=mean, lw=2, ls='--', color='r', label='Mean ')
+        med_obj = ax.axvline(x=med, lw=2, ls='-', color='r', label='Median')
+        two_sig = ax.axvline(x=med + (2 * std), lw=2, ls='-', color='gold')
+        three_sig = ax.axvline(x=med + (3 * std), lw=2, ls='-',
+                               color='DarkOrange')
+        dist_95 = ax.axvline(x=bins[count_95], lw=2, ls='-',
+                             color='LightGreen')
+        dist_99 = ax.axvline(x=bins[count_99], lw=2, ls='-', color='DarkGreen')
+
+        ax.grid(True, which='both')
+        ax.set_title('Histogram of Dark Rates', fontsize=15, fontweight='bold')
+        ax.set_ylabel('Frequency', fontsize=15, fontweight='bold')
+        ax.set_xlabel('Counts/pix/sec', fontsize=15, fontweight='bold')
+        ax.set_xlim(dark_counts.min(), dark_counts.max())
+        ax.xaxis.set_major_formatter(FormatStrFormatter('%3.2e'))
+
+        ax = fig.add_subplot(2, 1, 2)
+        # log_bins = np.logspace(np.log10(dark.min()), np.log10(dark.max()),
+        # 100)
+        ax.hist(dark_counts, bins=n_bins, align='mid', log=True,
+                histtype='stepfilled')
+
+        ax.axvline(x=mean, lw=2, ls='--', color='r', label='Mean')
+        ax.axvline(x=med, lw=2, ls='-', color='r', label='Median')
+        ax.axvline(x=med + (2 * std), lw=2, ls='-', color='gold')
+        ax.axvline(x=med + (3 * std), lw=2, ls='-', color='DarkOrange')
+        ax.axvline(x=bins[count_95], lw=2, ls='-', color='LightGreen')
+        ax.axvline(x=bins[count_99], lw=2, ls='-', color='DarkGreen')
+
+        # ax.set_xscale('log')
+        ax.grid(True, which='both')
+        ax.set_ylabel('Log Frequency', fontsize=15, fontweight='bold')
+        ax.set_xlabel('Counts/pix/sec', fontsize=15, fontweight='bold')
+        ax.set_xlim(dark_counts.min(), dark_counts.max())
+        ax.xaxis.set_major_formatter(FormatStrFormatter('%3.2e'))
+
+        fig.legend([med_obj, mean_obj, two_sig, three_sig, dist_95, dist_99],
+                   ['Median: {0:.2e}'.format(med),
+                    'Mean: {0:.2e}'.format(mean),
+                    r'2$\sigma$: {0:.2e}'.format(med + (2 * std)),
+                    r'3$\sigma$: {0:.2e}'.format(med + (3 * std)),
+                    r'95$\%$: {0:.2e}'.format(bins[count_95]),
+                    r'99$\%$: {0:.2e}'.format(bins[count_99])], shadow=True,
+                   numpoints=1, bbox_to_anchor=[0.8, 0.8])
+
+    def plot(self):
+        # select the important columns from the dataframe
+        if self.data is None:
+            self.data = self.get_data()
+
+        plotdf = pd.DataFrame({
+            "DECIMAL_YEAR": self.data["DECIMAL_YEAR"],
+            "DARK_RATE": self.data["DARK_RATE"]
+            })
+
+        # i can't get explode_df to work so this is for now
+        all_dec_year = []
+        all_dark_rates = []
+        for index, row in plotdf.iterrows():
+            all_dec_year = list(
+                itertools.chain(all_dec_year, row["DECIMAL_YEAR"]))
+            all_dark_rates = list(
+                itertools.chain(all_dark_rates, row["DARK_RATE"]))
+
+        # not sure what is happening here tbh, still figuring it out
+        #         self.x = all_dec_year
+        #         self.y = all_dark_rates
+
+        #         self.basic_scatter()
+
+        fig = plt.figure(figsize=(12, 9))
+        plt.scatter(all_dec_year, all_dark_rates)
+        # plt.xlim(min(all_dec_year), max(all_dec_year))
+        plt.ylim(0, max(all_dark_rates) + 0.5e-7)
+        plt.ticklabel_format(axis='y', style='sci', scilimits=(-2, 2))
+        plt.ticklabel_format(axis='x', style='plain')
+        plt.xlabel("Decimal Year")
+        plt.ylabel("Dark Rate (c/p/s)")
+        plt.grid(True)
+
+    def store_results(self):
+        # need to store results if not going in the database
+        pass