## Setup

In [1]:
# Import useful packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# import seaborn as sns
from scipy.sparse import csr_array
from scipy.sparse import lil_array
from scipy.sparse import save_npz
from scipy.sparse import load_npz
import os
from datetime import datetime, timedelta

In [2]:
# Read in the data

# Check which server we're on (in case the data is in different places on different servers)
import socket
hostname = socket.gethostname()

# Get paths to data
if hostname == "blpc1" or hostname == "blpc2":
    data_path = "/datax/scratch/nstieg/"
elif hostname == "cosmic-gpu-1":
    data_path = "/mnt/cosmic-gpu-1/data0/nstiegle/"
else:
    raise Exception("Data path not known")

full_dataset_path = data_path + "25GHz_higher.pkl"
coherent_dataset_path = data_path + "25GHz_higher_coherent.pkl"
incoherent_dataset_path = data_path + "25GHz_higher_incoherent.pkl"

# Read in data
coherent = pd.read_pickle(coherent_dataset_path)
# incoherent = pd.read_pickle(incoherent_dataset_path)
# df = pd.read_pickle(full_dataset_path)

In [3]:
# All data
# # Load in distance and mask
# path = "/home/nstieg/BL-COSMIC-2024-proj/frequency_adjacency/adjacent_in_all/"
# distances_path = path + "all_within_1000hz.distances.npz"
# mask_path = path + "all_within_1000hz.mask.npz"
# distances = load_npz(distances_path)
# mask = load_npz(mask_path)
# print(distances.shape)
# print(mask.shape)

# Coherent, by source data
# Load in all the distances and masks to a np array of [[distances, mask], ...] for each source
path = os.path.expanduser("~") + "/" + "BL-COSMIC-2024-proj/frequency_adjacency/adjacent_in_each_source/" # Place to save arrays
files = os.listdir(path) # All the files and directories in the folder
files = [f for f in files if os.path.isfile(path + '/' + f)] # Just get the folders
files = [f.split('.') for f in files] # Take off the extension
files = np.array(files) # Turn into a numpy array so I can slice off the extensions
files = np.unique(files[:, 0]) # Get the unique filenames (sources observed)
sources_data = [] # Append (source.distances.npz, source.mask.npz)
sources = []
for file in files:
    distances = load_npz(path + "/" + file + ".distances.npz")
    mask = load_npz(path + "/" + file + ".mask.npz")
    sources_data.append((distances, mask))
    sources.append(file.split('_')[0])

sources_data = np.array(sources_data, dtype='object')

## Coherent Filters

In [48]:
# Apply filter one
# Extend COSMIC's ability to detect zero drift rate RFI
# Removes hits from a single target which are at exactly the same frequency as other hits 
# from that target and where all the hits detected at that frequency have a zero drift rate
# Parameters:
# - coherent: The df of coherent data
# Returns the ids of the hits which pass the filter
def filter1(coherent):
    # What we're going to return
    good_ids = np.array([]) # Place to store all the good ids we find
    
    # Filter by each source
    sources = coherent.groupby("source_name")
    for source_name, source in sources:
        print("On source", source_name)
        source_good_ids = np.array([]) # Put all the good ids from this source
        
        # Put things into groups of frequencies which are *exactly* the same
        groups = source.groupby("signal_frequency")
        for f, group in groups:
            # Let through those with no exact collisions in frequency
            if len(group) == 1:
                source_good_ids = np.concatenate((source_good_ids, group.id.values))
            else:
                # See if this group has signals with drift rates other than zero,
                # they pass this filter as well
                if 0 not in group.signal_drift_rate.values:
                    source_good_ids = np.concatenate((source_good_ids, group.id.values))
        good_ids = np.concatenate((good_ids, source_good_ids))

    return good_ids

filter1(coherent)

On source 2535280716217508992
On source 2536546185381558272
On source 2542485953354555264


In [43]:
a = []
sources = coherent.groupby("source_name")
first_source = sources.get_group(coherent.source_name.iloc[0])
groups = first_source.groupby("signal_frequency")
group = groups.get_group(coherent.signal_frequency.iloc[1])
group.id.values

900273191

## All data filters