In [1]:
# Import useful packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.sparse import csr_array
from scipy.sparse import lil_array
from scipy.sparse import save_npz
from scipy.sparse import load_npz
import os
from datetime import datetime, timedelta
from seticore import viewer, hit_capnp, stamp_capnp
import traceback



In [14]:
# Read in the data

# Check which server we're on (in case the data is in different places on different servers)
import socket
hostname = socket.gethostname()

# Get paths to data
if hostname == "blpc1" or hostname == "blpc2":
    data_path = "/datax/scratch/nstieg/"
elif hostname == "cosmic-gpu-1":
    data_path = "/mnt/cosmic-gpu-1/data0/nstiegle/"
else:
    raise Exception("Data path not known")

full_dataset_path = data_path + "25GHz_higher.pkl"
coherent_dataset_path = data_path + "25GHz_higher_coherent.pkl"
incoherent_dataset_path = data_path + "25GHz_higher_incoherent.pkl"

# Read in data
# coherent = pd.read_pickle(coherent_dataset_path)
# incoherent = pd.read_pickle(incoherent_dataset_path)
df = pd.read_pickle(full_dataset_path)

In [5]:
# Check what the extensions of the file_uri's are in the full dataset. Are any of them the actual stamps?
for file_uri in df["file_uri"].tolist():
    filename = file_uri.split('/')[-1]
    extension = filename.split('.')[-1]
    if extension != "hits":
        print(file_uri)

In [12]:
# Let's get all the hits which correspond to one stamp file
first_stamp = df["file_uri"][0]
in_first_stamp = df[df["file_uri"] == first_stamp].reset_index(drop=True)
in_first_stamp.shape
in_first_stamp.to_csv("/home/nstiegle/BL-COSMIC-2024-proj/stamps/hits_in_first_stamp.csv", index=False)

Test using Dave MacMahon's indexing from hits to stamps. Maybe even get the stamp file of every hit in the targets and save them back out

In [9]:
# Load data
targets = pd.read_pickle("/mnt/cosmic-gpu-1/data0/nstiegle/representative_samples/1in25_targets.pkl") # 1in25 target hits
print(targets.shape)
cosmic_stamp_index = pd.read_csv("~davidm/local/src/FindSarfi.jl/dfidx.csv") # Part of mapping from hit_uri to stamp_uri and index in stamp file

# Group index by hit_uri for easy recovery
hit_uri_groups = cosmic_stamp_index.groupby('hits_uri')

(1248312, 29)


In [27]:
# Get the stamp_uri and the frameidx from the cosmic_stamp_index for a
# given hit_uri and hit_frequency
def find_stamp_of_hit(hit_uri, hit_frequency):
    # Get the stamp uri and the frame idx from the idx
    try:
        group = hit_uri_groups.get_group(hit_uri)
    except KeyError:
        return np.nan, np.nan
    idxrow = group[(hit_frequency >= group.flo) * (hit_frequency <= group.fhi)]
    try:
        stamp_uri = idxrow["stamps_uri"].values[0]
        frameidx = idxrow["frameidx"].values[0]
    except IndexError:
        print(hit_uri, hit_frequency)
    return stamp_uri, frameidx

In [28]:
# Setup for multiprocessing
import multiprocessing
# Check if we need to remake the inputs array
try:
    assert(len(inputs) == len(targets))
except:
    inputs = []
    for i, row in targets.iterrows():
        inputs.append((row.file_uri, row.signal_frequency))
p = multiprocessing.Pool() 

# Run algorithm with multiprocessing
results = p.starmap(find_stamp_of_hit, inputs)

# Save results
results = np.array(results)
stamp_uris = results[:, 0]
frame_indices = results[:, 1]
targets["stamp_uri"] = stamp_uris
targets["frameidx"] = frame_indices
targets.shape
targets.to_csv("/mnt/cosmic-gpu-1/data0/nstiegle/representative_samples/1in25_targets_indexed.csv", index=False)

In [2]:
def find_stamp_recipe(stamp_filepath, directory_path=None):
    """
    Get the Recipe for the BFR5 that matches the given stamp
    (just the filepath that is most similar and ends with .bfr5).
    """
    if directory_path is None:
        directory_path = os.path.dirname(stamp_filepath)

    closest_bfr5 = None
    closest_commonlen = 0
    for root, dirs, files in os.walk(directory_path, topdown=True):
        for f in filter(lambda x: x.endswith("bfr5"), files):
            filepath = os.path.join(root, f)
            commonpath = os.path.commonpath([filepath, stamp_filepath])
            commonlen = len(commonpath)
            if commonlen > closest_commonlen:
                closest_bfr5 = filepath
                closest_commonlen = commonlen
        break

    if closest_bfr5 is None:
        return None
    try:
        return viewer.Recipe(closest_bfr5)
    except BaseException as err:
        print(f"Error encountered instantiating Recipe from '{closest_bfr5}': {err}")
        print(traceback.format_exc())
        return False

In [39]:
# Load in the indexed targets
targets_indexed = pd.read_csv("/mnt/cosmic-gpu-1/data0/nstiegle/representative_samples/1in25_targets_indexed.csv") # Read in indexed his from above
print(targets_indexed.shape)

# Take out the files which didn't have .hits files
good_targets = targets_indexed.dropna()
print(good_targets.shape)

(1248312, 31)


In [50]:
def get_stamp(stamp_uri, frameidx):
    # Load up the stamp
    with open(stamp_uri, 'r') as f:
        f.seek(8 * (frameidx - 1)) # -1 for Julia being 1 indexed, *8 for some bit/byte reason
        s = stamp_capnp.Stamp.read(f)
        recipe = find_stamp_recipe(stamp_uri)
        return viewer.Stamp(s, recipe)

def antenna_signal_snr_power(stamp):
    # Get the powers in the frequency bins of each antenna by summing 
    # over polarization and complex magnitude
    # Also rearrange so indices are (antenna, time bin, frequency bin)
    antenna_powers = np.square(stamp.real_array()).sum(axis=(2, 4)).transpose(2, 0, 1)
    snr_and_signals = np.array([stamp.snr_and_signal(antenna_power) for antenna_power in antenna_powers])
    return (snr_and_signals[:, 0], snr_and_signals[:, 1])

def find_sarfi(stamp_uri, frameidx):
    stamp = get_stamp(stamp_uri, frameidx)
    snrs, signals = antenna_signal_snr_power(stamp)
    antenna_titles = [stamp.recipe.antenna_names[i] for i in range(stamp.stamp.numAntennas)]
    return antenna_titles, snrs, signals

# Setup for multiprocessing
import multiprocessing

p = multiprocessing.Pool() 

# Run algorithm with multiprocessing
inputs = []
for i, row in good_targets.iterrows():
    inputs.append((row.stamp_uri, row.frameidx))
print('ready and going')

results = p.starmap(find_sarfi, inputs)

# Save results
results = np.array(results)
antenna_titles = results[:, 0]
antenna_snrs = results[:, 1]
antenna_signals = results[:, 2]
good_targets["antenna_titles"] = antenna_titles
good_targets["antenna_snrs"] = antenna_snrs
good_targets["antenna_signals"] = antenna_signals
good_targets.to_csv("/mnt/cosmic-gpu-1/data0/nstiegle/representative_samples/1in25_good_targets_results")

ready and going


  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)
  dtype=np.bool)


AttributeError: module 'numpy' has no attribute 'bool'.
`np.bool` was a deprecated alias for the builtin `bool`. To avoid this error in existing code, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations