In [None]:
import numpy as np
import pandas as pd
import holoviews as hv

hv.extension("bokeh")
from datetime import datetime, timedelta
import iso8601
from peewee import fn

In [None]:
%load_ext autoreload
%autoreload 2
import inventory
from inventory import File
import jdutil
import pytz

In [None]:
db = inventory.connect_db("data/paulsson.full.db")

In [None]:
nyc = pytz.timezone("America/New_York")

# Database schema migration

In [None]:
from playhouse.migrate import *

In [None]:
migrator = SqliteMigrator(db)

In [None]:
migrate(
    migrator.add_column("file", "uid", IntegerField(default=-1)),
    migrator.add_column("file", "gid", IntegerField(default=-1)),
    migrator.add_column("file", "mode", IntegerField(default=-1)),
)

# Datetime interpretation

## Summary

The zero-point for `acquisition_times` is `acquisition_time_nyc`. The last time exposure was taken at about `acquisition_time_nyc + last_frame_dt`, which is a few seconds before the time of last write `mtime`. `dtimeabsolute` and `acquisition_time_nyc + dtimemsec` are the same; it's unclear why this particular time 10 minutes into data acquisition is special.

## Scratch work

In [None]:
files = File.select().where(File.type == "nd2").order_by(File.size.desc()).limit(10)

In [None]:
file = files[0]
md = file.metadata
mtime = nyc.localize(datetime.fromtimestamp(file.mtime))
ctime = nyc.localize(datetime.fromtimestamp(file.ctime))

In [None]:
file.path

In [None]:
dtimemsec = timedelta(
    milliseconds=md["image_metadata_sequence"]["SLxPictureMetadata"]["dTimeMSec"]
)

In [None]:
dtimeabsolute = pytz.UTC.localize(
    jdutil.jd_to_datetime(
        md["image_metadata_sequence"]["SLxPictureMetadata"]["dTimeAbsolute"]
    )
)
dtimeabsolute_nyc = dtimeabsolute.astimezone(nyc)

In [None]:
acquisition_time = pytz.UTC.localize(
    jdutil.jd_to_datetime(float(md["acquisition_time"]["variant"]["no_name"]["@value"]))
)
acquisition_time_nyc = acquisition_time.astimezone(nyc)

In [None]:
last_frame_dt = timedelta(milliseconds=md["acquisition_times"][-1])

In [None]:
acquisition_time_nyc

In [None]:
acquisition_time_nyc + dtimemsec

In [None]:
dtimeabsolute_nyc

In [None]:
acquisition_time_nyc + last_frame_dt

In [None]:
mtime

# TIFF date finding

In [None]:
tiff_files = (
    File.select().where(File.type == "tiff").order_by(File.size.desc()).limit(10)
)

In [None]:
tiff_files[0].path

In [None]:
delta_t = timedelta(
    milliseconds=float(
        tiff_files[0].metadata["image_description"]["OME"]["Image"]["Pixels"]["Plane"][
            0
        ]["@DeltaT"]
    )
)

In [None]:
(
    iso8601.parse_date(
        tiff_files[0].metadata["image_description"]["OME"]["Image"]["AcquisitionDate"]
    )
    + delta_t
)

In [None]:
tiff_files[0].metadata["image_description"]["OME"]["Image"]["Pixels"]["Plane"]

In [None]:
tiff_files[0].metadata["image_description"]["OME"]["Image"]["AcquisitionDate"]

## Summary

`@DeltaT` in TIFF OME metadata is actually in msec (spec says seconds), corresponds to `acquisition_times` in ND2. TIFF OME `AcquisitionDate` (expressed as ISO 8601 string, 1 sec resolution) corresponds to `dTimeAbsolute` in ND2. For this file, dTimeMSec seems to be the first entry in `acquisition_times`. This disagrees with what I find above!

## Scratch work

In [None]:
nd2_files = File.select().where(File.type == "nd2").order_by(File.size.desc()).limit(40)

In [None]:
nd2_file = nd2_files[21]

In [None]:
nd2_file.metadata["image_metadata_sequence"]["SLxPictureMetadata"]["dTimeAbsolute"]

In [None]:
nd2_file.metadata["acquisition_times"][:100]

In [None]:
jds = float(
    nd2_file.metadata["acquisition_time"]["variant"]["no_name"]["@value"]
) + np.array(nd2_file.metadata["acquisition_times"]) / (1000 * 60 * 60 * 24)
jds2 = nd2_file.metadata["image_metadata_sequence"]["SLxPictureMetadata"][
    "dTimeAbsolute"
] + np.array(nd2_file.metadata["acquisition_times"]) / (1000 * 60 * 60 * 24)

In [None]:
pytz.UTC.localize(
    jdutil.jd_to_datetime(
        nd2_file.metadata["image_metadata_sequence"]["SLxPictureMetadata"][
            "dTimeAbsolute"
        ]
    )
).astimezone(nyc)

In [None]:
np.array(nd2_file.metadata["acquisition_times"]) / (1000 * 60 * 60 * 24)

In [None]:
jds

In [None]:
pytz.UTC.localize(jdutil.jd_to_datetime(jds[53])).astimezone(nyc)

In [None]:
pytz.UTC.localize(jdutil.jd_to_datetime(jds2[53])).astimezone(nyc)

In [None]:
pytz.UTC.localize(
    jdutil.jd_to_datetime(
        nd2_file.metadata["image_metadata_sequence"]["SLxPictureMetadata"][
            "dTimeAbsolute"
        ]
    )
).astimezone(nyc)

In [None]:
pytz.UTC.localize(
    jdutil.jd_to_datetime(
        nd2_file.metadata["image_metadata_sequence"]["SLxPictureMetadata"][
            "dTimeAbsolute"
        ]
    )
).astimezone(nyc)

In [None]:
timedelta(milliseconds=nd2_file.metadata["acquisition_times"][-1])

In [None]:
acquisition_time = pytz.UTC.localize(
    jdutil.jd_to_datetime(
        float(nd2_file.metadata["acquisition_time"]["variant"]["no_name"]["@value"])
    )
)
acquisition_time_nyc = acquisition_time.astimezone(nyc)
acquisition_time_nyc

In [None]:
atol = 0.1
np.where(
    np.logical_and(
        np.isclose(nd2_file.metadata["x_data"], -3535.7, atol=atol),
        np.isclose(nd2_file.metadata["y_data"], 124.2, atol=atol),
        np.isclose(nd2_file.metadata["z_data"], 5463.74, atol=atol),
    )
)

# Correspondence

In [None]:
nd2_files = File.select().where(File.type == "nd2").order_by(File.size.desc()).limit(40)
paths = []
data = []
for nd2_file in nd2_files.iterator():
    paths.append(nd2_file.path)
    data.append(
        (
            nd2_file.metadata["x_data"],
            nd2_file.metadata["y_data"],
            nd2_file.metadata["z_data"],
        )
    )

In [None]:
a = np.vstack(data)
a.shape

In [None]:
data[0][0].info

# Correspondence 2

In [None]:
tiff_files = (
    File.select().where(File.type == "tiff").order_by(File.size.desc()).limit(10)
)

In [None]:
tiff_files[5].path

In [None]:
tiff_files[5].size

In [None]:
fs = (
    File.select()
    .where(File.type == "nd2")
    .where(
        File.path.startswith(
            "/n/files/SysBio/PAULSSON LAB/SILVIA/Ti3Data/2016_05_15--PlasmidLosses"
        )
    )
)

In [None]:
fs[0]

In [None]:
fs[0].size

In [None]:
File.select(fn.)

In [None]:
for tiff_file in (
    File.select().where(File.type == "tiff").order_by(File.size.desc()).limit(10)
):
    hhhh

# Old

In [None]:
files = (
    File.select(File.mtime, File.size)
    .where(File.type == "nd2")
    .where(File.path.startswith("/n/files/SysBio/PAULSSON LAB/Silvia"))
    .order_by(File.mtime)
)

In [None]:
files = (
    File.select(File.mtime, File.size, File.path)
    .where(File.type == "nd2")
    .where(File.size > 0.5e12)
    .order_by(File.mtime)
)

In [None]:
f = files[1]

In [None]:
f.path

In [None]:
sum(f.size for f in files) / 1e12

In [None]:
len(files)

In [None]:
files[2].mtime

In [None]:
mtimes, sizes = zip(*[(f.mtime, f.size) for f in files])

In [None]:
sizes = np.array(sizes)

In [None]:
cumsizes = np.cumsum(sizes)

In [None]:
hv.Curve((mtimes, cumsizes))

In [None]:
hv.Histogram?

In [None]:
hv.Histogram(*np.histogram(sizes, 100)).opts(plot={"logy": False})

In [None]:
sizes = np.array([dd["size"] for dd in d])
sizes[::-1].sort()

In [None]:
plt.hist([dd["size"] for dd in d], bins=100, log=True)

In [None]:
plt.figure(figsize=(12, 12))
plt.plot(np.cumsum(sizes))