-
Notifications
You must be signed in to change notification settings - Fork 10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
draft nuv dark monitor. help. #156
base: master
Are you sure you want to change the base?
Changes from all commits
67f5cc8
8b215b0
57ef9d2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -71,3 +71,7 @@ target/ | |
|
||
# Finder | ||
.DS_Store | ||
|
||
# Misc | ||
testoutputs/ | ||
*.ipynb |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,9 @@ | ||
import pandas as pd | ||
import numpy as np | ||
import os | ||
|
||
from typing import List | ||
from glob import glob | ||
from monitorframe.datamodel import BaseDataModel | ||
from peewee import OperationalError | ||
|
||
|
@@ -192,3 +194,104 @@ def get_new_data(self): | |
data_results = data_results.replace([np.inf, -np.inf], np.nan).dropna().reset_index(drop=True) | ||
|
||
return data_results[~data_results.EXPTYPE.str.contains('ACQ|DARK|FLAT')] | ||
|
||
|
||
class NUVDarkDataModel(BaseDataModel): | ||
"""Datamodel for NUV Dark files.""" | ||
files_source = FILES_SOURCE | ||
subdir_pattern = '?????' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't look like you're using
I'd recommend the 2nd option just because it might make "forward compatibility" easier (i.e. if dark datasets aren't manually identified in this way in the future). I think @dzhuliya has an example of using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes i want to use |
||
|
||
def get_new_data(self): | ||
header_request = { | ||
0: ['ROOTNAME'], | ||
1: ['EXPSTART','EXPTIME'] | ||
} | ||
|
||
table_request = { | ||
1: ['TIME','XCORR','YCORR'], | ||
3: ['TIME','LATITUDE','LONGITUDE','DARKRATE'] | ||
} | ||
|
||
# any special data requests | ||
# TODO: add spt support for temp, sun_lat, sun_long | ||
# TODO: is this a good place to add solar data scraping in the future? | ||
|
||
# this is temporary to find the files from the dark programs until | ||
# we can add the dark files to the monitor_data database | ||
files = [] | ||
# 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17 | ||
program_ids = ['15776/', '15538/', '14942/', '14521/', '14442/', '13974/', | ||
'13528/', '13126/', '12720/', '12420/', '11894/'] | ||
# program_ids = ['15776/'] | ||
for program in program_ids: | ||
new_files_source = os.path.join(FILES_SOURCE, program) | ||
subfiles = glob(os.path.join(new_files_source, "*corrtag*")) | ||
files += subfiles | ||
|
||
if not files: # No new files | ||
return pd.DataFrame() | ||
|
||
# need to add any other keywords that need to be set | ||
data_results = data_from_exposures(files, | ||
header_request=header_request, | ||
table_request=table_request) | ||
|
||
return data_results | ||
|
||
|
||
class CorrtagDataModel(BaseDataModel): | ||
"""Datamodel for all NUV Dark files.""" | ||
files_source = FILES_SOURCE | ||
subdir_pattern = '?????' | ||
|
||
def get_new_data(self): | ||
header_request = { | ||
0: ['ROOTNAME'], | ||
1: [ | ||
'EXPSTART', | ||
'EXPTIME', | ||
'EXPTYPE', | ||
'DETECTOR' | ||
] | ||
} | ||
|
||
table_request = { | ||
1: [ | ||
'TIME', | ||
'XCORR', | ||
'YCORR' | ||
], | ||
3: [ | ||
'LATITUDE', | ||
'LONGITUDE' | ||
] | ||
|
||
} | ||
cmagness marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# TODO: add gross counts, don't need PHA, lat or long right now | ||
|
||
# any special data requests | ||
# TODO: add spt support for temp, sun_lat, sun_long | ||
# TODO: is this a good place to add solar data scraping in the future? | ||
|
||
# this is temporary to find the files from the dark programs until | ||
# we can add the dark files to the monitor_data database | ||
# files = [] | ||
# program_ids = ['15776/'] | ||
# for program in program_ids: | ||
# new_files_source = os.path.join(FILES_SOURCE, program) | ||
# subfiles = glob(os.path.join(new_files_source, "*corrtag*")) | ||
# # TODO: figure out why this wasn't working with find_files() | ||
# files += subfiles | ||
|
||
files = find_files('*corrtag*', data_dir=self.files_source, | ||
subdir_pattern=self.subdir_pattern) | ||
|
||
if not files: # No new files | ||
return pd.DataFrame() | ||
|
||
# need to add any other keywords that need to be set | ||
data_results = data_from_exposures(files, | ||
header_request=header_request, | ||
table_request=table_request) | ||
|
||
return data_results |
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,230 @@ | ||||||||||||
import numpy as np | ||||||||||||
import plotly.graph_objs as go | ||||||||||||
import plotly.express as px | ||||||||||||
import datetime | ||||||||||||
import pandas as pd | ||||||||||||
|
||||||||||||
from peewee import Model | ||||||||||||
from monitorframe.monitor import BaseMonitor | ||||||||||||
from astropy.time import Time | ||||||||||||
from typing import List, Union | ||||||||||||
|
||||||||||||
from .data_models import NUVDarkDataModel | ||||||||||||
from ..monitor_helpers import fit_line, convert_day_of_year, create_visibility, v2v3 | ||||||||||||
from .. import SETTINGS | ||||||||||||
|
||||||||||||
import os | ||||||||||||
import yaml | ||||||||||||
import cosmo | ||||||||||||
import numpy | ||||||||||||
import itertools | ||||||||||||
from glob import glob | ||||||||||||
from astropy.io import fits | ||||||||||||
import matplotlib.pyplot as plt | ||||||||||||
from matplotlib.ticker import FormatStrFormatter | ||||||||||||
from cosmo.monitors.data_models import NUVDarkDataModel | ||||||||||||
from cosmo.filesystem import find_files, data_from_exposures, data_from_jitters | ||||||||||||
from cosmo.monitor_helpers import absolute_time, explode_df | ||||||||||||
from monitorframe.datamodel import BaseDataModel | ||||||||||||
from monitorframe.monitor import BaseMonitor | ||||||||||||
|
||||||||||||
# these imports are so messy i will fix this later | ||||||||||||
|
||||||||||||
COS_MONITORING = SETTINGS['output'] | ||||||||||||
|
||||||||||||
|
||||||||||||
class NUVDarkMonitor(BaseMonitor): | ||||||||||||
data_model = NUVDarkDataModel | ||||||||||||
# TODO: update docs | ||||||||||||
# docs = "https://spacetelescope.github.io/cosmo/monitors.html#<darks>" | ||||||||||||
output = COS_MONITORING | ||||||||||||
|
||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ^^ If you want to use the built-in basic plotting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||||||||
run = 'monthly' | ||||||||||||
|
||||||||||||
def get_data(self): | ||||||||||||
# access data, perform any filtering required for analysis | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
data = self.model.new_data | ||||||||||||
dark_rate_column = [] | ||||||||||||
dec_year_column = [] | ||||||||||||
|
||||||||||||
xlim = [0, 1024] | ||||||||||||
ylim = [0, 1024] | ||||||||||||
|
||||||||||||
# parallelize, this is going to get bad when looking at a lot of data | ||||||||||||
cmagness marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||
for index, row in data.iterrows(): | ||||||||||||
subdf = pd.DataFrame({ | ||||||||||||
"EXPSTART": row["EXPSTART"], "TIME": [row["TIME"]], | ||||||||||||
"XCORR": [row["XCORR"]], "YCORR": [row["YCORR"]], | ||||||||||||
"TIME_3": [row["TIME_3"]] | ||||||||||||
}) | ||||||||||||
|
||||||||||||
# this is temporary until i understand james' suggestion to use | ||||||||||||
# df.apply and lambda functions | ||||||||||||
xcorr = subdf["XCORR"][0] | ||||||||||||
ycorr = subdf["YCORR"][0] | ||||||||||||
filtered_xcorr = xcorr[ | ||||||||||||
np.where((xcorr > xlim[0]) & (xcorr < xlim[1]))] | ||||||||||||
filtered_ycorr = ycorr[ | ||||||||||||
np.where((ycorr > ylim[0]) & (ycorr < ylim[1]))] | ||||||||||||
subdf["XCORR"] = [filtered_xcorr] | ||||||||||||
subdf["YCORR"] = [filtered_ycorr] | ||||||||||||
|
||||||||||||
dark_rate_array, dec_year_array = self.calculate_dark_rate(subdf, | ||||||||||||
xlim, | ||||||||||||
ylim) | ||||||||||||
dark_rate_column.append(dark_rate_array) | ||||||||||||
dec_year_column.append(dec_year_array) | ||||||||||||
|
||||||||||||
data["DARK_RATE"] = dark_rate_column | ||||||||||||
data["DECIMAL_YEAR"] = dec_year_column | ||||||||||||
|
||||||||||||
# when the monitor method of the monitor is called, it will | ||||||||||||
# initialize the self.data attribute | ||||||||||||
# with this method and then can be used by the other methods | ||||||||||||
return data | ||||||||||||
|
||||||||||||
def calculate_dark_rate(self, dataframe, xlim, ylim): | ||||||||||||
# calculate dark rate for one exposure, with a dataframe with TIME, | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
# XCORR, and YCORR values | ||||||||||||
|
||||||||||||
# need to set this somewhere | ||||||||||||
timestep = 25 | ||||||||||||
time_bins = dataframe["TIME_3"][0][::timestep] | ||||||||||||
|
||||||||||||
counts = np.histogram(dataframe["TIME_3"][0], bins=time_bins)[0] | ||||||||||||
cmagness marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||
npix = float((xlim[1] - xlim[0]) * (ylim[1] - ylim[0])) | ||||||||||||
dark_rate_array = counts / npix / timestep | ||||||||||||
# save the whole histogram in time bins, and then plot each of them | ||||||||||||
|
||||||||||||
# make a decimal year array corresponding to the time bins of the | ||||||||||||
# dark rates | ||||||||||||
# do this with the expstart (mjd) and time array from the timeline | ||||||||||||
# extension | ||||||||||||
# taking the expstart, binning the time array by the timestep, | ||||||||||||
# removing the last element in the array (bin by front edge), | ||||||||||||
# and then multiplying by the conversion factor | ||||||||||||
# this is done by the absolute_time helper function | ||||||||||||
mjd_array = absolute_time(expstart=dataframe['EXPSTART'][0], | ||||||||||||
time=dataframe['TIME_3'][0][::timestep][:-1]) | ||||||||||||
dec_year_array = mjd_array.decimalyear | ||||||||||||
|
||||||||||||
return dark_rate_array, dec_year_array | ||||||||||||
|
||||||||||||
def track(self): | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep that is the end goal, don't worry. once the histogram is working correctly it will get the rates from the histogram and return those |
||||||||||||
# track something. perhaps current dark rate? | ||||||||||||
cmagness marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||
if self.data is None: | ||||||||||||
self.data = self.get_data() | ||||||||||||
|
||||||||||||
plotdf = pd.DataFrame({ | ||||||||||||
"DECIMAL_YEAR": self.data["DECIMAL_YEAR"], | ||||||||||||
"DARK_RATE": self.data["DARK_RATE"] | ||||||||||||
}) | ||||||||||||
cmagness marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||
|
||||||||||||
# i can't get explode_df to work so this is for now | ||||||||||||
# i think i know why it doesn't work and i fixed it i just haven't | ||||||||||||
# switched to using it | ||||||||||||
all_dec_year = [] | ||||||||||||
all_dark_rates = [] | ||||||||||||
for index, row in plotdf.iterrows(): | ||||||||||||
all_dec_year = list( | ||||||||||||
itertools.chain(all_dec_year, row["DECIMAL_YEAR"])) | ||||||||||||
all_dark_rates = list( | ||||||||||||
itertools.chain(all_dark_rates, row["DARK_RATE"])) | ||||||||||||
Comment on lines
+128
to
+132
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This bit might be what's causing the funkiness in your plots perhaps? |
||||||||||||
|
||||||||||||
dark_counts = np.asarray(all_dark_rates) | ||||||||||||
fig = plt.figure(figsize=(12, 9)) | ||||||||||||
bin_size = 1e-8 | ||||||||||||
n_bins = int((dark_counts.max() - dark_counts.min()) / bin_size) | ||||||||||||
ax = fig.add_subplot(2, 1, 1) | ||||||||||||
ax.hist(dark_counts, bins=n_bins, align='mid', histtype='stepfilled') | ||||||||||||
counts, bins = np.histogram(dark_counts, bins=100) | ||||||||||||
cuml_dist = np.cumsum(counts) | ||||||||||||
count_99 = abs(cuml_dist / float(cuml_dist.max()) - .99).argmin() | ||||||||||||
count_95 = abs(cuml_dist / float(cuml_dist.max()) - .95).argmin() | ||||||||||||
|
||||||||||||
mean = dark_counts.mean() | ||||||||||||
med = np.median(dark_counts) | ||||||||||||
std = dark_counts.std() | ||||||||||||
mean_obj = ax.axvline(x=mean, lw=2, ls='--', color='r', label='Mean ') | ||||||||||||
med_obj = ax.axvline(x=med, lw=2, ls='-', color='r', label='Median') | ||||||||||||
two_sig = ax.axvline(x=med + (2 * std), lw=2, ls='-', color='gold') | ||||||||||||
three_sig = ax.axvline(x=med + (3 * std), lw=2, ls='-', | ||||||||||||
color='DarkOrange') | ||||||||||||
dist_95 = ax.axvline(x=bins[count_95], lw=2, ls='-', | ||||||||||||
color='LightGreen') | ||||||||||||
dist_99 = ax.axvline(x=bins[count_99], lw=2, ls='-', color='DarkGreen') | ||||||||||||
|
||||||||||||
ax.grid(True, which='both') | ||||||||||||
ax.set_title('Histogram of Dark Rates', fontsize=15, fontweight='bold') | ||||||||||||
ax.set_ylabel('Frequency', fontsize=15, fontweight='bold') | ||||||||||||
ax.set_xlabel('Counts/pix/sec', fontsize=15, fontweight='bold') | ||||||||||||
ax.set_xlim(dark_counts.min(), dark_counts.max()) | ||||||||||||
ax.xaxis.set_major_formatter(FormatStrFormatter('%3.2e')) | ||||||||||||
|
||||||||||||
ax = fig.add_subplot(2, 1, 2) | ||||||||||||
# log_bins = np.logspace(np.log10(dark.min()), np.log10(dark.max()), | ||||||||||||
# 100) | ||||||||||||
ax.hist(dark_counts, bins=n_bins, align='mid', log=True, | ||||||||||||
histtype='stepfilled') | ||||||||||||
|
||||||||||||
ax.axvline(x=mean, lw=2, ls='--', color='r', label='Mean') | ||||||||||||
ax.axvline(x=med, lw=2, ls='-', color='r', label='Median') | ||||||||||||
ax.axvline(x=med + (2 * std), lw=2, ls='-', color='gold') | ||||||||||||
ax.axvline(x=med + (3 * std), lw=2, ls='-', color='DarkOrange') | ||||||||||||
ax.axvline(x=bins[count_95], lw=2, ls='-', color='LightGreen') | ||||||||||||
ax.axvline(x=bins[count_99], lw=2, ls='-', color='DarkGreen') | ||||||||||||
|
||||||||||||
# ax.set_xscale('log') | ||||||||||||
ax.grid(True, which='both') | ||||||||||||
ax.set_ylabel('Log Frequency', fontsize=15, fontweight='bold') | ||||||||||||
ax.set_xlabel('Counts/pix/sec', fontsize=15, fontweight='bold') | ||||||||||||
ax.set_xlim(dark_counts.min(), dark_counts.max()) | ||||||||||||
ax.xaxis.set_major_formatter(FormatStrFormatter('%3.2e')) | ||||||||||||
|
||||||||||||
fig.legend([med_obj, mean_obj, two_sig, three_sig, dist_95, dist_99], | ||||||||||||
['Median: {0:.2e}'.format(med), | ||||||||||||
'Mean: {0:.2e}'.format(mean), | ||||||||||||
r'2$\sigma$: {0:.2e}'.format(med + (2 * std)), | ||||||||||||
r'3$\sigma$: {0:.2e}'.format(med + (3 * std)), | ||||||||||||
r'95$\%$: {0:.2e}'.format(bins[count_95]), | ||||||||||||
r'99$\%$: {0:.2e}'.format(bins[count_99])], shadow=True, | ||||||||||||
numpoints=1, bbox_to_anchor=[0.8, 0.8]) | ||||||||||||
Comment on lines
+135
to
+191
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to reiterate, plotting should not be in def plot(self):
... # Make plotly traces here; let's say you have a list called "traces"
self.figure.add_traces(traces) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah that is the end goal as well, but marc wanted the plots replicated for now |
||||||||||||
|
||||||||||||
def plot(self): | ||||||||||||
# select the important columns from the dataframe | ||||||||||||
if self.data is None: | ||||||||||||
self.data = self.get_data() | ||||||||||||
Comment on lines
+195
to
+196
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see where you're going with this, and it's not a bad idea, but the precedent with the other monitors has been to not enable "asynchronous" steps in the sense that if you want to call individual steps, you still need to call the other steps that it depends on. If you enabled this here, it should be enabled for all of the other monitors as well so there's no confusion on expected behavior between monitors. |
||||||||||||
|
||||||||||||
plotdf = pd.DataFrame({ | ||||||||||||
"DECIMAL_YEAR": self.data["DECIMAL_YEAR"], | ||||||||||||
"DARK_RATE": self.data["DARK_RATE"] | ||||||||||||
}) | ||||||||||||
|
||||||||||||
# i can't get explode_df to work so this is for now | ||||||||||||
all_dec_year = [] | ||||||||||||
all_dark_rates = [] | ||||||||||||
for index, row in plotdf.iterrows(): | ||||||||||||
all_dec_year = list( | ||||||||||||
itertools.chain(all_dec_year, row["DECIMAL_YEAR"])) | ||||||||||||
all_dark_rates = list( | ||||||||||||
itertools.chain(all_dark_rates, row["DARK_RATE"])) | ||||||||||||
|
||||||||||||
# not sure what is happening here tbh, still figuring it out | ||||||||||||
# self.x = all_dec_year | ||||||||||||
# self.y = all_dark_rates | ||||||||||||
|
||||||||||||
# self.basic_scatter() | ||||||||||||
|
||||||||||||
fig = plt.figure(figsize=(12, 9)) | ||||||||||||
plt.scatter(all_dec_year, all_dark_rates) | ||||||||||||
# plt.xlim(min(all_dec_year), max(all_dec_year)) | ||||||||||||
plt.ylim(0, max(all_dark_rates) + 0.5e-7) | ||||||||||||
plt.ticklabel_format(axis='y', style='sci', scilimits=(-2, 2)) | ||||||||||||
plt.ticklabel_format(axis='x', style='plain') | ||||||||||||
plt.xlabel("Decimal Year") | ||||||||||||
plt.ylabel("Dark Rate (c/p/s)") | ||||||||||||
plt.grid(True) | ||||||||||||
cmagness marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||
|
||||||||||||
def store_results(self): | ||||||||||||
# need to store results if not going in the database | ||||||||||||
pass |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Was this something for you in particular? If so, I'd be careful about adding it here since
.gitignore
could grow quite a bit depending on what folks are doing.Instead, try using the the unit tests! You could even update some of the "cleanup" fixtures to remove "output" on the start of tests rather than the end so that you can inspect the test artifacts. If you'd like some suggestions or help with this, let me know!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, actually that would be helpful! but i was planning to remove that stuff before merging it in, it's just for ease of directory access right now