In [1]:
import pandas as pd
import numpy as np
from plotly import graph_objs as go

# How does our lab collect data?

Here was a small Python project that I thought of - are there trends in the rate of data collection in our lab at the CfA? From a qualitative sense, it always felt that when visitors come, several come at once and one would expect this would reflect in the number of scans produced in a small period of time.

Another question I'd like to ask is how long do we typically accumulate data for? This is reflected in the number of "shots", i.e. the number of accumulations at a repetition rate of 5 Hz (typically).

Finally, what are the most common frequencies the spectrometers are tuned to.

In [28]:
ft1_df = pd.read_pickle("../data/FTM1_scans.pkl")
ft2_df = pd.read_pickle("../data/FTM2_scans.pkl")

In [29]:
# Convert the datetime handling into numpy format
for df in [ft1_df, ft2_df]:
    df["date"] = df["date"].astype("datetime64")

In [45]:
# Bin all of the data into year, month, and day
grouped_dfs = [
    df.groupby([df["date"].dt.year, df["date"].dt.month, df["date"].dt.day]).count() for df in [ft1_df, ft2_df]
]

In [46]:
for df in grouped_dfs:
    df["cumulative"] = np.cumsum(df["id"])

In [47]:
grouped_dfs[0].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,cavity,date,id,machine,shots,cumulative
date,date,date,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2014,7,8,254,254,254,254,254,254
2014,7,9,12081,12081,12081,12081,12081,12335
2014,7,10,288,288,288,288,288,12623
2014,7,11,29,29,29,29,29,12652
2014,7,13,10577,10577,10577,10577,10577,23229


In [48]:
flattened_dfs = [
    df.set_index(df.index.map(lambda t: pd.datetime(*t))) for df in grouped_dfs
]

In [53]:
layout = {
        "height": 600.,
        "yaxis": {
            "title": "Number of scans",
        },
        "xaxis": {
            "title": "Time"
        },
        "title": "How we collect data",
        "showlegend": True,
        "legend": {
            "x": 0.1,
            "y": 0.95
        }
    }

fig = go.FigureWidget(layout=layout)

traces = [
    fig.add_scattergl(x=df.index, y=df["cumulative"], name=name) for df, name in zip(flattened_dfs, ["FT1", "FT2"])
]

fig

FigureWidget({
    'data': [{'name': 'FT1',
              'type': 'scattergl',
              'uid': '9df93d18-…

In [88]:
shot_histo = [
    np.histogram(df["shots"], bins=[10, 50, 200, 500, 1000, 2000, 5000, 10000,]) for df in [ft1_df, ft2_df]
]

In [91]:
fig = go.FigureWidget()
fig.layout["xaxis"]["type"] = "log"
fig.layout["yaxis"]["type"] = "log"

for histo, name in zip(shot_histo, ["FT1", "FT2"]):
    fig.add_scatter(x=histo[1], y=histo[0], name=name)

fig

FigureWidget({
    'data': [{'name': 'FT1',
              'type': 'scatter',
              'uid': '2f219a75-09…

In [81]:
freq_histo = [
    np.histogram(df["cavity"], bins=np.linspace(7000., 40000., 100)) for df in [ft1_df, ft2_df]
]

In [86]:
fig = go.FigureWidget()

fig.layout["xaxis"]["tickformat"] = ".,"
fig.layout["xaxis"]["title"] = "Frequency (MHz)"
fig.layout["yaxis"]["title"] = "Counts"
fig.layout["title"] = "What are the most common frequencies?"

for histo, name in zip(freq_histo, ["FT1", "FT2"]):
    fig.add_bar(x=histo[1], y=histo[0], name=name)

fig

FigureWidget({
    'data': [{'name': 'FT1',
              'type': 'bar',
              'uid': '67ea064a-aacb-4…