# Cross-match ICECAT and 10 year dataset 

In this notebook I cross match the ICECAT and the 10 year (2008 - 2018) datasets released by the IceCube collaboration. This is done because the ICECAT doesn't include the muon energy.

In [1]:
import pandas as pd
import glob
import os
from scipy.spatial import KDTree
import numpy as np

## Preliminaries

In [2]:
icecat = pd.read_csv("IceCube_Gold_Bronze_Tracks.csv")
icecat

Unnamed: 0,NAME,RUNID,EVENTID,START,EVENTMJD,I3TYPE,RA,DEC,RA_ERR_PLUS,RA_ERR_MINUS,...,ENERGY,FAR,SIGNAL,CASCADE_SCR,SKIMMING_SCR,START_SCR,STOP_SCR,THRGOING_SCR,CR_VETO,OTHER_I3TYPES
0,IC110514A,118178,17334444,2011-05-14 01:32:22.654109,55695.064151,gfu-gold,138.47,-1.94,6.68,3.78,...,187.0,1.30,0.508,3.460000e-07,4.480000e-07,8.110000e-02,4.650000e-05,0.919000,False,
1,IC110610A,118309,46569873,2011-06-10 10:13:33.147086,55722.426078,gfu-gold,272.55,35.64,1.67,2.42,...,294.0,0.25,0.750,3.370000e-10,3.990000e-11,1.200000e-06,1.290000e-04,1.000000,False,gfu-bronze
2,IC110616A,118342,24578488,2011-06-16 17:30:53.939961,55728.729791,gfu-bronze,71.15,5.38,1.41,2.07,...,109.0,4.34,0.257,7.820000e-09,1.320000e-07,5.160000e-05,1.830000e-05,1.000000,False,
3,IC110714A,118435,58198553,2011-07-14 02:42:41.855488,55756.112984,hese-gold,68.20,40.67,0.31,1.10,...,72.0,0.11,0.778,4.330000e-05,1.100000e-05,8.760000e-01,1.810000e-05,0.124000,False,hese-bronze
4,IC110726A,118475,52691508,2011-07-26 12:15:33.258167,55768.510802,gfu-bronze,151.08,6.99,1.19,1.71,...,160.0,1.92,0.396,5.000000e-02,1.390000e-06,9.460000e-01,2.020000e-04,0.003820,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
343,IC230725A,138193,21103478,2023-07-25 21:30:51.063687,60150.896424,gfu-bronze,327.04,12.33,2.20,2.02,...,145.0,1.68,0.394,2.270000e-07,2.180000e-10,1.000000e+00,3.760000e-09,0.000132,False,
344,IC230727A,138198,44334860,2023-07-27 16:05:39.630288,60152.670598,gfu-bronze,33.66,7.63,1.14,0.70,...,113.0,3.38,0.294,4.880000e-06,9.160000e-10,1.000000e+00,1.080000e-07,0.000053,False,
345,IC230914A,138354,45413430,2023-09-14 05:21:03.717216,60201.222960,gfu-bronze,163.83,31.83,2.55,2.02,...,168.0,0.88,0.544,2.760000e-13,8.770000e-08,3.220000e-08,2.960000e-07,1.000000,False,
346,IC231004A,138415,56188508,2023-10-04 14:39:41.180329,60221.610893,gfu-gold,143.79,25.04,1.05,0.97,...,442.0,0.45,0.842,8.750000e-11,1.440000e-08,1.440000e-04,6.870000e-05,1.000000,False,gfu-bronze


In [3]:
# Only events before 2018
icecat_dropped = icecat.drop(icecat.tail(146).index)

# Verify the result
icecat_dropped

Unnamed: 0,NAME,RUNID,EVENTID,START,EVENTMJD,I3TYPE,RA,DEC,RA_ERR_PLUS,RA_ERR_MINUS,...,ENERGY,FAR,SIGNAL,CASCADE_SCR,SKIMMING_SCR,START_SCR,STOP_SCR,THRGOING_SCR,CR_VETO,OTHER_I3TYPES
0,IC110514A,118178,17334444,2011-05-14 01:32:22.654109,55695.064151,gfu-gold,138.47,-1.94,6.68,3.78,...,187.0,1.30,0.508,3.460000e-07,4.480000e-07,8.110000e-02,0.000046,0.91900,False,
1,IC110610A,118309,46569873,2011-06-10 10:13:33.147086,55722.426078,gfu-gold,272.55,35.64,1.67,2.42,...,294.0,0.25,0.750,3.370000e-10,3.990000e-11,1.200000e-06,0.000129,1.00000,False,gfu-bronze
2,IC110616A,118342,24578488,2011-06-16 17:30:53.939961,55728.729791,gfu-bronze,71.15,5.38,1.41,2.07,...,109.0,4.34,0.257,7.820000e-09,1.320000e-07,5.160000e-05,0.000018,1.00000,False,
3,IC110714A,118435,58198553,2011-07-14 02:42:41.855488,55756.112984,hese-gold,68.20,40.67,0.31,1.10,...,72.0,0.11,0.778,4.330000e-05,1.100000e-05,8.760000e-01,0.000018,0.12400,False,hese-bronze
4,IC110726A,118475,52691508,2011-07-26 12:15:33.258167,55768.510802,gfu-bronze,151.08,6.99,1.19,1.71,...,160.0,1.92,0.396,5.000000e-02,1.390000e-06,9.460000e-01,0.000202,0.00382,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
197,IC180417A,130932,35022693,2018-04-17 06:41:03.494152,58225.278513,gfu-gold,305.73,-4.41,3.60,1.58,...,202.0,0.85,0.577,1.810000e-05,1.100000e-06,1.500000e-01,0.000107,0.85000,False,gfu-bronze
198,IC180528A,131096,32665194,2018-05-28 12:09:00.393219,58266.506255,gfu-bronze,312.14,0.30,1.41,2.02,...,110.0,4.20,0.278,1.290000e-06,2.710000e-06,2.700000e-02,0.000016,0.97300,False,
199,IC180608A,131134,60192271,2018-06-08 14:19:31.856250,58277.596896,gfu-bronze,69.08,-1.08,1.63,1.41,...,158.0,2.07,0.396,1.580000e-05,8.900000e-07,1.250000e-03,0.001010,0.99800,False,
200,IC180612A,131145,43542963,2018-06-12 04:34:10.301490,58281.190397,gfu-bronze,338.69,3.73,5.10,5.71,...,107.0,4.66,0.250,2.910000e-07,2.040000e-06,3.930000e-03,0.000044,0.99600,False,


In [4]:
# Define the path to your CSV files
csv_files_path = "IC86_*.csv"  # Update this path to your actual files

# Get a list of all CSV files
csv_files = glob.glob(csv_files_path)

# List to hold individual DataFrames
dataframes = []

# Loop over the list of CSV files and read each one into a DataFrame
for csv_file in csv_files:
    try:
        df = pd.read_csv(csv_file, sep="\s+")
        dataframes.append(df)
    except Exception as e:
        print(f"Error reading {csv_file}: {e}")

# Check if all DataFrames have the same columns
columns = dataframes[0].columns
for df in dataframes:
    if not df.columns.equals(columns):
        print(f"Column mismatch found in {csv_file}. Expected columns: {columns}, but found: {df.columns}")

# Concatenate all DataFrames into a single DataFrame
ic86 = pd.concat(dataframes, ignore_index=True)

# Display the combined DataFrame
ic86

Unnamed: 0,MJD[days],log10(E/GeV),AngErr[deg],RA[deg],Dec[deg],Azimuth[deg],Zenith[deg]
0,56400.507095,4.92,0.20,314.246,-26.519,164.885,63.530
1,56400.529671,4.51,0.43,66.127,-16.144,61.200,73.882
2,56400.529858,4.96,0.79,145.736,-20.735,341.649,69.204
3,56400.540443,5.11,0.20,333.504,-28.945,157.675,61.118
4,56400.548719,5.04,0.27,312.225,-33.462,181.923,56.587
...,...,...,...,...,...,...,...
897401,58307.961110,5.14,0.20,221.246,-59.959,141.239,29.956
897402,58307.962666,2.94,1.23,191.547,4.415,171.627,94.312
897403,58307.963556,2.88,0.30,308.127,42.543,55.437,132.606
897404,58307.963972,2.98,0.21,332.199,44.720,31.489,134.811


In [5]:
# Extract the MJD columns
mjd1 = icecat_dropped['EVENTMJD']
mjd2 = ic86['MJD[days]']

# Find the intersection of the MJD values from both datasets
common_mjd = pd.Series(list(set(mjd1) & set(mjd2)))

# Count the number of matching MJD values
num_matching_mjd = len(common_mjd)
print(f"Number of matching MJD values: {num_matching_mjd}")

Number of matching MJD values: 0


In [6]:
# Extract the Ra and Dec columns
ra_dec1 = icecat_dropped[['RA', 'DEC']]
ra_dec2 = ic86[['RA[deg]', 'Dec[deg]']]

# Create pairs of Ra and Dec
ra_dec1_pairs = list(ra_dec1.itertuples(index=False, name=None))
ra_dec2_pairs = list(ra_dec2.itertuples(index=False, name=None))

# Find the intersection of the Ra and Dec pairs from both datasets
common_ra_dec_pairs = set(ra_dec1_pairs) & set(ra_dec2_pairs)

# Convert the common pairs to a DataFrame
common_ra_dec_df = pd.DataFrame(common_ra_dec_pairs, columns=['Ra', 'Dec'])
common_ra_dec_df

Unnamed: 0,Ra,Dec


In [7]:
# Extract columns and identifiers for dataset1
ra_dec_mjd1 = icecat_dropped[['NAME', 'RA', 'DEC', 'EVENTMJD', 'RA_ERR_MINUS', 'RA_ERR_PLUS', 'DEC_ERR_MINUS', 'DEC_ERR_PLUS']].to_numpy()

# Extract columns for dataset2
ra_dec_mjd2 = ic86[['RA[deg]', 'Dec[deg]', 'MJD[days]', ]].to_numpy()

# Create KDTree for dataset2 using Ra and Dec
tree = KDTree(ra_dec_mjd2[:, :2])  # Use only Ra and Dec for KDTree

# List to store the results
matches = []

# Iterate over each event in dataset1
for event in ra_dec_mjd1:
    event_id1, ra1, dec1, mjd1, ra_err_minus, ra_err_plus, dec_err_minus, dec_err_plus = event
    tolerance_ra = max(ra_err_minus, ra_err_plus)
    tolerance_dec = max(dec_err_minus, dec_err_plus)
    tolerance = max(tolerance_ra, tolerance_dec)
    
    # Query the KDTree for points within the tolerance
    idx = tree.query_ball_point([ra1, dec1], tolerance)
    
    # If there are matches, further filter by MJD
    for i in idx:
        ra2, dec2, mjd2 = ra_dec_mjd2[i]
        mjd_tolerance = 1  # Set your MJD tolerance; adjust as needed
        if abs(mjd1 - mjd2) <= mjd_tolerance:
            matches.append((event_id1, ra1, dec1, mjd1, ra2, dec2, mjd2))

# Convert matches to a DataFrame for easy viewing
matches_df = pd.DataFrame(matches, columns=['EventID1', 'Ra1', 'Dec1', 'MJD1', 'Ra2', 'Dec2', 'MJD2'])

# Display the matches
matches_df

Unnamed: 0,EventID1,Ra1,Dec1,MJD1,Ra2,Dec2,MJD2
0,IC110514A,138.47,-1.94,55695.064151,136.502,-2.056,55695.776587
1,IC110514A,138.47,-1.94,55695.064151,137.799,-2.048,55695.064144
2,IC110514A,138.47,-1.94,55695.064151,141.108,-0.213,55695.508692
3,IC110514A,138.47,-1.94,55695.064151,137.467,0.258,55695.278542
4,IC110610A,272.55,35.64,55722.426078,272.503,35.699,55722.426077
...,...,...,...,...,...,...,...
588,IC180613A,38.06,11.53,58282.982360,37.166,11.374,58282.982360
589,IC180613A,38.06,11.53,58282.982360,36.434,13.946,58283.090002
590,IC180613A,38.06,11.53,58282.982360,34.257,15.405,58283.272185
591,IC180613A,38.06,11.53,58282.982360,39.920,8.603,58282.483454


In [8]:
# Define a function to print matches for a specific EventID1
def print_matches_for_event(event_id1):
    filtered_matches = matches_df[matches_df['EventID1'] == event_id1]
    print(filtered_matches)

# Example: Print matches for a specific EventID1
specific_event_id1 = 'IC110514A'  # Replace with the specific EventID1 you want to check
print_matches_for_event(specific_event_id1)

    EventID1     Ra1  Dec1          MJD1      Ra2   Dec2          MJD2
0  IC110514A  138.47 -1.94  55695.064151  136.502 -2.056  55695.776587
1  IC110514A  138.47 -1.94  55695.064151  137.799 -2.048  55695.064144
2  IC110514A  138.47 -1.94  55695.064151  141.108 -0.213  55695.508692
3  IC110514A  138.47 -1.94  55695.064151  137.467  0.258  55695.278542


## Table with ICECAT events that have only one correspondance in the 10 year dataset, are gold events and have a possible source

In [9]:
# Group by EventID1 and count the matches
match_counts = matches_df.groupby('EventID1').size().reset_index(name='count')

# Filter for events with only one match
single_matches = match_counts[match_counts['count'] == 1]['EventID1']

# Display the events from dataset1 that have only one match in dataset2
single_match_events = matches_df[matches_df['EventID1'].isin(single_matches)]
single_match_events

Unnamed: 0,EventID1,Ra1,Dec1,MJD1,Ra2,Dec2,MJD2
4,IC110610A,272.55,35.64,55722.426078,272.503,35.699,55722.426077
7,IC110714A,68.20,40.67,55756.112984,68.354,40.753,55756.112976
8,IC110726A,151.08,6.99,55768.510802,152.440,6.570,55768.510799
11,IC110818A,332.45,-2.09,55791.688643,332.773,-2.668,55791.688635
15,IC110907A,196.08,9.40,55811.794616,196.574,9.592,55811.794607
...,...,...,...,...,...,...,...
571,IC180228A,294.79,26.40,58177.572404,295.342,26.528,58177.572404
572,IC180313A,287.18,5.53,58190.678580,287.006,5.828,58190.678580
577,IC180410A,218.50,0.56,58218.776795,218.445,0.403,58218.776795
581,IC180528A,312.14,0.30,58266.506255,311.928,0.222,58266.506255


In [10]:
# Filter for gold events
gold_events = icecat_dropped[icecat_dropped['NAME'].isin(single_matches) & icecat_dropped['I3TYPE'].str.contains("gold", case=False)]

# Merge the classification column into the final result
gold_match_events = pd.merge(single_match_events, gold_events[['NAME', 'I3TYPE']], left_on='EventID1', right_on='NAME')

# Drop the redundant EventID column after the merge
gold_match_events = gold_match_events.drop(columns=['NAME'])

# Display the final table with classification
gold_match_events

Unnamed: 0,EventID1,Ra1,Dec1,MJD1,Ra2,Dec2,MJD2,I3TYPE
0,IC110610A,272.55,35.64,55722.426078,272.503,35.699,55722.426077,gfu-gold
1,IC110714A,68.2,40.67,55756.112984,68.354,40.753,55756.112976,hese-gold
2,IC110907A,196.08,9.4,55811.794616,196.574,9.592,55811.794607,gfu-gold
3,IC111216A,36.74,18.88,55911.276851,36.554,18.407,55911.276841,gfu-gold
4,IC120301A,237.96,18.76,55987.806913,238.125,18.896,55987.80691,gfu-gold
5,IC120501A,165.37,-71.51,56048.570421,164.532,-69.749,56049.4622,hese-gold
6,IC120523A,171.08,26.44,56070.574282,171.113,26.433,56070.574282,ehe-gold
7,IC120807A,330.07,1.42,56146.207147,330.02,1.624,56146.207158,gfu-gold
8,IC120916A,182.24,3.88,56186.305305,182.541,3.805,56186.305316,gfu-gold
9,IC120922A,70.62,19.79,56192.549332,70.852,20.443,56192.549332,ehe-gold


In [11]:
# Manually create a list of possible sources
# For this example, we'll create a dictionary where keys are EventID1 and values are possible sources
#The distance to the coincident sources is shown in parentheses with each source name
possible_source = {
    'IC110610A': '4FGL J1808.8+3522 (0.37)',
    'IC110907A': '4FGL J1301.6+0834 (1.06)',
    'IC111216A': 'SWIFT J0225.0+18 (0.46)',
    'IC120916A': '4FGL J1204.8+0407 (1.06)',
    'IC130127A': '4FGL J2333.4-0133 (0.58)',
    'IC130408A': 'SWIFT J1114.3+20 (0.71)',
    'IC131204A': '4FGL J1916.7-1516 (1.08)',
    'IC140101A': '4FGL J1251.3-0201 (0.88)',
    'IC140109A': 'SWIFT J1933.9+32 (0.31)',
    'IC140705A': '4FGL J0138.5+0300 (1.33)',
    'IC140721A': '4FGL J0649.5-3139 (1.32)',
    'IC150904A': '3FHL J0854.1+2752 (0.29)',
    'IC150919A': '4FGL J1836.4+3137 (1.32)',
    'IC150926A': '4FGL J1258.7-0452 (0.34)',
    'IC160104A': '4FGL J0515.9+0537 (0.75)',
    'IC160814A': 'SWIFT J1325.2-32 (1.25)',
    'IC161001A': '4FGL J1249.8+3707 (0.09)',
    'IC170922A': '3FHL J0509.4+0542 (0.11)',
    # Add more mappings as necessary
}

# Create a new column in gold_match_events for possible sources
gold_match_events['PossibleSource'] = gold_match_events['EventID1'].map(possible_source)

# Display the final table with classification and possible sources
gold_match_events

Unnamed: 0,EventID1,Ra1,Dec1,MJD1,Ra2,Dec2,MJD2,I3TYPE,PossibleSource
0,IC110610A,272.55,35.64,55722.426078,272.503,35.699,55722.426077,gfu-gold,4FGL J1808.8+3522 (0.37)
1,IC110714A,68.2,40.67,55756.112984,68.354,40.753,55756.112976,hese-gold,
2,IC110907A,196.08,9.4,55811.794616,196.574,9.592,55811.794607,gfu-gold,4FGL J1301.6+0834 (1.06)
3,IC111216A,36.74,18.88,55911.276851,36.554,18.407,55911.276841,gfu-gold,SWIFT J0225.0+18 (0.46)
4,IC120301A,237.96,18.76,55987.806913,238.125,18.896,55987.80691,gfu-gold,
5,IC120501A,165.37,-71.51,56048.570421,164.532,-69.749,56049.4622,hese-gold,
6,IC120523A,171.08,26.44,56070.574282,171.113,26.433,56070.574282,ehe-gold,
7,IC120807A,330.07,1.42,56146.207147,330.02,1.624,56146.207158,gfu-gold,
8,IC120916A,182.24,3.88,56186.305305,182.541,3.805,56186.305316,gfu-gold,4FGL J1204.8+0407 (1.06)
9,IC120922A,70.62,19.79,56192.549332,70.852,20.443,56192.549332,ehe-gold,


In [12]:
# Filter to include only the rows with a possible source
events_with_sources = gold_match_events[gold_match_events['PossibleSource'].notna()]

# Reset the index if needed
events_with_sources = events_with_sources.reset_index(drop=True)

# Display the final table with classification and possible sources
events_with_sources

Unnamed: 0,EventID1,Ra1,Dec1,MJD1,Ra2,Dec2,MJD2,I3TYPE,PossibleSource
0,IC110610A,272.55,35.64,55722.426078,272.503,35.699,55722.426077,gfu-gold,4FGL J1808.8+3522 (0.37)
1,IC110907A,196.08,9.4,55811.794616,196.574,9.592,55811.794607,gfu-gold,4FGL J1301.6+0834 (1.06)
2,IC111216A,36.74,18.88,55911.276851,36.554,18.407,55911.276841,gfu-gold,SWIFT J0225.0+18 (0.46)
3,IC120916A,182.24,3.88,56186.305305,182.541,3.805,56186.305316,gfu-gold,4FGL J1204.8+0407 (1.06)
4,IC130127A,352.97,-1.98,56319.279989,353.439,-1.874,56319.279989,gfu-gold,4FGL J2333.4-0133 (0.58)
5,IC130408A,167.83,20.66,56390.188774,167.059,20.296,56390.188774,hese-gold,SWIFT J1114.3+20 (0.71)
6,IC131204A,288.98,-14.21,56630.470072,289.43,-13.832,56630.470072,ehe-gold,4FGL J1916.7-1516 (1.08)
7,IC140101A,192.26,-2.69,56658.403864,191.475,-2.766,56658.403864,gfu-gold,4FGL J1251.3-0201 (0.88)
8,IC140109A,293.12,33.02,56666.502986,292.785,33.293,56666.502986,gfu-gold,SWIFT J1933.9+32 (0.31)
9,IC140705A,25.88,2.54,56843.668693,25.933,2.744,56843.668693,gfu-gold,4FGL J0138.5+0300 (1.33)


## Table with ICECAT events that have multiple correspondances in the 10 year dataset, are gold events and have a possible source

In [13]:
# Group by EventID1 and count the matches
match_counts = matches_df.groupby('EventID1').size().reset_index(name='count')

# Filter for events with more than one match
multiple_matches = match_counts[match_counts['count'] > 1]['EventID1']

# Display the events from dataset1 that have multiple matches in dataset2
multiple_match_events = matches_df[matches_df['EventID1'].isin(multiple_matches)]

# Display the final table
multiple_match_events

Unnamed: 0,EventID1,Ra1,Dec1,MJD1,Ra2,Dec2,MJD2
0,IC110514A,138.47,-1.94,55695.064151,136.502,-2.056,55695.776587
1,IC110514A,138.47,-1.94,55695.064151,137.799,-2.048,55695.064144
2,IC110514A,138.47,-1.94,55695.064151,141.108,-0.213,55695.508692
3,IC110514A,138.47,-1.94,55695.064151,137.467,0.258,55695.278542
5,IC110616A,71.15,5.38,55728.729791,71.276,5.311,55728.729781
...,...,...,...,...,...,...,...
588,IC180613A,38.06,11.53,58282.982360,37.166,11.374,58282.982360
589,IC180613A,38.06,11.53,58282.982360,36.434,13.946,58283.090002
590,IC180613A,38.06,11.53,58282.982360,34.257,15.405,58283.272185
591,IC180613A,38.06,11.53,58282.982360,39.920,8.603,58282.483454


In [14]:
# Group by EventID1 and count the matches
match_counts = matches_df.groupby('EventID1').size().reset_index(name='count')

# Filter for events with more than one match
multiple_matches = match_counts[match_counts['count'] > 1]['EventID1']

# Display the events from dataset1 that have multiple matches in dataset2
multiple_match_events = matches_df[matches_df['EventID1'].isin(multiple_matches)]

# Ensure gold_events DataFrame is correctly filtered for gold events
gold_events = icecat_dropped[icecat_dropped['I3TYPE'].str.contains("gold", case=False)]

# Filter multiple match events to include only gold events
gold_multiple_match_events = pd.merge(multiple_match_events, gold_events[['NAME', 'I3TYPE']], left_on='EventID1', right_on='NAME')

# Drop the redundant EventID column after the merge
gold_multiple_match_events = gold_multiple_match_events.drop(columns=['NAME'])

# Print the entire DataFrame
gold_multiple_match_events

Unnamed: 0,EventID1,Ra1,Dec1,MJD1,Ra2,Dec2,MJD2,I3TYPE
0,IC110514A,138.47,-1.94,55695.064151,136.502,-2.056,55695.776587,gfu-gold
1,IC110514A,138.47,-1.94,55695.064151,137.799,-2.048,55695.064144,gfu-gold
2,IC110514A,138.47,-1.94,55695.064151,141.108,-0.213,55695.508692,gfu-gold
3,IC110514A,138.47,-1.94,55695.064151,137.467,0.258,55695.278542,gfu-gold
4,IC110902A,9.76,7.59,55806.092203,9.631,7.734,55806.092199,gfu-gold
...,...,...,...,...,...,...,...,...
271,IC170923A,173.45,-2.54,58019.021300,173.088,-2.699,58019.021300,gfu-gold
272,IC170923A,173.45,-2.54,58019.021300,171.428,-1.379,58019.914668,gfu-gold
273,IC180417A,305.73,-4.41,58225.278513,302.870,-5.168,58224.845583,gfu-gold
274,IC180417A,305.73,-4.41,58225.278513,304.911,-5.351,58224.488815,gfu-gold


In [15]:
# Extract the unique EventID1 values from the gold_multiple_match_events DataFrame
unique_event_ids = gold_multiple_match_events['EventID1'].unique()

# Print the unique EventID1 values
print("Unique EventID1 values with gold classification and multiple matches:")
print(unique_event_ids)

Unique EventID1 values with gold classification and multiple matches:
['IC110514A' 'IC110902A' 'IC121011A' 'IC140203A' 'IC140410A' 'IC140927A'
 'IC150127A' 'IC151017A' 'IC160225A' 'IC160924A' 'IC161210A' 'IC170105A'
 'IC170626A' 'IC170803A' 'IC170824A' 'IC170923A' 'IC180417A']


In [16]:
# List of EventID1 values to be removed (Don't have a possible source)
event_ids_to_remove = ['IC110902A', 'IC121011A', 'IC140203A','IC150127A','IC160225A','IC161210A', 'IC170803A','IC170923A','IC180417A']  # Replace with the actual EventID1 values you want to remove

# Filter the DataFrame to exclude these EventID1 values
filtered_gold_multiple_match_events = gold_multiple_match_events[~gold_multiple_match_events['EventID1'].isin(event_ids_to_remove)]

# Reset the index if needed
filtered_gold_multiple_match_events = filtered_gold_multiple_match_events.reset_index(drop=True)

# Print the filtered DataFrame
filtered_gold_multiple_match_events


Unnamed: 0,EventID1,Ra1,Dec1,MJD1,Ra2,Dec2,MJD2,I3TYPE
0,IC110514A,138.47,-1.94,55695.064151,136.502,-2.056,55695.776587,gfu-gold
1,IC110514A,138.47,-1.94,55695.064151,137.799,-2.048,55695.064144,gfu-gold
2,IC110514A,138.47,-1.94,55695.064151,141.108,-0.213,55695.508692,gfu-gold
3,IC110514A,138.47,-1.94,55695.064151,137.467,0.258,55695.278542,gfu-gold
4,IC140410A,2.11,81.22,56757.098757,17.827,-66.263,56757.304954,gfu-gold
...,...,...,...,...,...,...,...,...
251,IC170626A,280.99,8.80,57930.519316,281.301,9.708,57930.365375,gfu-gold
252,IC170824A,41.92,12.37,57989.553751,41.944,11.957,57989.553751,gfu-gold
253,IC170824A,41.92,12.37,57989.553751,42.871,13.627,57989.406463,gfu-gold
254,IC170824A,41.92,12.37,57989.553751,43.370,13.593,57990.225278,gfu-gold


In [17]:
# Extract the unique EventID1 values from the gold_multiple_match_events DataFrame
unique_event_ids = filtered_gold_multiple_match_events['EventID1'].unique()

# Print the unique EventID1 values
print("Unique EventID1 values with gold classification and multiple matches:")
print(unique_event_ids)

Unique EventID1 values with gold classification and multiple matches:
['IC110514A' 'IC140410A' 'IC140927A' 'IC151017A' 'IC160924A' 'IC170105A'
 'IC170626A' 'IC170824A']


In [18]:
# Define a function to print matches for a specific EventID1
def print_matches_for_event(event_id1):
    filtered_matches = filtered_gold_multiple_match_events[filtered_gold_multiple_match_events['EventID1'] == event_id1]
    print(filtered_matches)

# Example: Print matches for a specific EventID1
specific_event_id1 = 'IC170824A'  # Replace with the specific EventID1 you want to check

print_matches_for_event(specific_event_id1)

      EventID1    Ra1   Dec1          MJD1     Ra2    Dec2          MJD2  \
252  IC170824A  41.92  12.37  57989.553751  41.944  11.957  57989.553751   
253  IC170824A  41.92  12.37  57989.553751  42.871  13.627  57989.406463   
254  IC170824A  41.92  12.37  57989.553751  43.370  13.593  57990.225278   
255  IC170824A  41.92  12.37  57989.553751  44.201  13.849  57990.482387   

       I3TYPE  
252  gfu-gold  
253  gfu-gold  
254  gfu-gold  
255  gfu-gold  


In [19]:
# List of indices to be kept
indices_to_keep = [1, 169, 232, 235, 238, 247, 250, 252]  # Replace with the actual indices of the rows you want to keep

# Filter the DataFrame to keep only the specified rows by indices
filtered_events_to_keep = filtered_gold_multiple_match_events.iloc[indices_to_keep]

# Reset the index if needed
filtered_events_to_keep = filtered_events_to_keep.reset_index(drop=True)

# Print the DataFrame after keeping the specified rows
filtered_events_to_keep

Unnamed: 0,EventID1,Ra1,Dec1,MJD1,Ra2,Dec2,MJD2,I3TYPE
0,IC110514A,138.47,-1.94,55695.064151,137.799,-2.048,55695.064144,gfu-gold
1,IC140410A,2.11,81.22,56757.098757,54.891,86.783,56757.098757,gfu-gold
2,IC140927A,50.89,-0.63,56927.160846,51.105,-0.252,56927.160846,gfu-gold
3,IC151017A,197.53,19.95,57312.67573,197.245,21.151,57312.67573,gfu-gold
4,IC160924A,241.13,1.34,57655.741067,240.479,1.743,57655.741067,gfu-gold
5,IC170105A,309.95,8.16,57758.141923,311.684,7.995,57758.141923,gfu-gold
6,IC170626A,280.99,8.8,57930.519316,281.048,8.894,57930.519316,gfu-gold
7,IC170824A,41.92,12.37,57989.553751,41.944,11.957,57989.553751,gfu-gold


In [20]:
# Manually create a list of possible sources
# For this example, we'll create a dictionary where keys are EventID1 and values are possible sources
#The distance to the coincident sources is shown in parentheses with each source name
possible_sources = {
    'IC110514A': '4FGL J0914.1-0202 (0.12)',
    'IC140410A': 'SWIFT J0017.1+81 (0.5)',
    'IC140927A': '3FHL J0323.6-0109 (0.54)',
    'IC151017A': '4FGL J1311.8+2057 (1.09)',
    'IC160924A': '4FGL J1608.4+0055 (1.07)',
    'IC170105A': 'SWIFT J2033.1+09 (2.41)',
    'IC170626A': '4FGL J1846.3+0919 (0.8)',
    'IC170824A': 'SWIFT J0248.3+12 (0.38)',
    # Add more mappings as necessary
}

# Create a new column in gold_match_events for possible sources
filtered_events_to_keep['PossibleSource'] = filtered_events_to_keep['EventID1'].map(possible_sources)

# Display the final table with classification and possible sources
filtered_events_to_keep

Unnamed: 0,EventID1,Ra1,Dec1,MJD1,Ra2,Dec2,MJD2,I3TYPE,PossibleSource
0,IC110514A,138.47,-1.94,55695.064151,137.799,-2.048,55695.064144,gfu-gold,4FGL J0914.1-0202 (0.12)
1,IC140410A,2.11,81.22,56757.098757,54.891,86.783,56757.098757,gfu-gold,SWIFT J0017.1+81 (0.5)
2,IC140927A,50.89,-0.63,56927.160846,51.105,-0.252,56927.160846,gfu-gold,3FHL J0323.6-0109 (0.54)
3,IC151017A,197.53,19.95,57312.67573,197.245,21.151,57312.67573,gfu-gold,4FGL J1311.8+2057 (1.09)
4,IC160924A,241.13,1.34,57655.741067,240.479,1.743,57655.741067,gfu-gold,4FGL J1608.4+0055 (1.07)
5,IC170105A,309.95,8.16,57758.141923,311.684,7.995,57758.141923,gfu-gold,SWIFT J2033.1+09 (2.41)
6,IC170626A,280.99,8.8,57930.519316,281.048,8.894,57930.519316,gfu-gold,4FGL J1846.3+0919 (0.8)
7,IC170824A,41.92,12.37,57989.553751,41.944,11.957,57989.553751,gfu-gold,SWIFT J0248.3+12 (0.38)


## Final table

In [21]:
# Concatenate the DataFrames
combined_df = pd.concat([events_with_sources, filtered_events_to_keep])

# Sort the combined DataFrame by EventID1 in alphabetical order
sorted_combined_df = combined_df.sort_values(by='EventID1', ascending=True)

# Reset the index if needed
sorted_combined_df = sorted_combined_df.reset_index(drop=True)

# Print the resulting DataFrame
sorted_combined_df

Unnamed: 0,EventID1,Ra1,Dec1,MJD1,Ra2,Dec2,MJD2,I3TYPE,PossibleSource
0,IC110514A,138.47,-1.94,55695.064151,137.799,-2.048,55695.064144,gfu-gold,4FGL J0914.1-0202 (0.12)
1,IC110610A,272.55,35.64,55722.426078,272.503,35.699,55722.426077,gfu-gold,4FGL J1808.8+3522 (0.37)
2,IC110907A,196.08,9.4,55811.794616,196.574,9.592,55811.794607,gfu-gold,4FGL J1301.6+0834 (1.06)
3,IC111216A,36.74,18.88,55911.276851,36.554,18.407,55911.276841,gfu-gold,SWIFT J0225.0+18 (0.46)
4,IC120916A,182.24,3.88,56186.305305,182.541,3.805,56186.305316,gfu-gold,4FGL J1204.8+0407 (1.06)
5,IC130127A,352.97,-1.98,56319.279989,353.439,-1.874,56319.279989,gfu-gold,4FGL J2333.4-0133 (0.58)
6,IC130408A,167.83,20.66,56390.188774,167.059,20.296,56390.188774,hese-gold,SWIFT J1114.3+20 (0.71)
7,IC131204A,288.98,-14.21,56630.470072,289.43,-13.832,56630.470072,ehe-gold,4FGL J1916.7-1516 (1.08)
8,IC140101A,192.26,-2.69,56658.403864,191.475,-2.766,56658.403864,gfu-gold,4FGL J1251.3-0201 (0.88)
9,IC140109A,293.12,33.02,56666.502986,292.785,33.293,56666.502986,gfu-gold,SWIFT J1933.9+32 (0.31)


## Table with coincidence with Rodrigues et. al.

In [22]:
# Define a function to print matches for a specific EventID1
def print_matches_for_event(event_id1):
    filtered_matches = sorted_combined_df[sorted_combined_df['EventID1'] == event_id1]
    print(filtered_matches)

# Example: Print matches for a specific EventID1
specific_event_id1 = 'IC160510A'  # Replace with the specific EventID1 you want to check
print_matches_for_event(specific_event_id1)

Empty DataFrame
Columns: [EventID1, Ra1, Dec1, MJD1, Ra2, Dec2, MJD2, I3TYPE, PossibleSource]
Index: []


In [23]:
# List of indices to be kept
indices_to_keep = [3,25,12,14,6,17,16,1]  # Replace with the actual indices of the rows you want to keep

# Filter the DataFrame to keep only the specified rows by indices
events_Rodrigues = sorted_combined_df.iloc[indices_to_keep]

# Reset the index if needed
events_Rodrigues = events_Rodrigues.reset_index(drop=True)

# Print the DataFrame after keeping the specified rows
events_Rodrigues

Unnamed: 0,EventID1,Ra1,Dec1,MJD1,Ra2,Dec2,MJD2,I3TYPE,PossibleSource
0,IC111216A,36.74,18.88,55911.276851,36.554,18.407,55911.276841,gfu-gold,SWIFT J0225.0+18 (0.46)
1,IC170922A,77.43,5.79,58018.871186,77.331,5.673,58018.871186,gfu-gold,3FHL J0509.4+0542 (0.11)
2,IC140721A,101.82,-32.89,56859.758833,94.097,-32.77,56860.650259,hese-gold,4FGL J0649.5-3139 (1.32)
3,IC150904A,133.77,28.08,57269.759661,133.863,27.766,57269.759661,gfu-gold,3FHL J0854.1+2752 (0.29)
4,IC130408A,167.83,20.66,56390.188774,167.059,20.296,56390.188774,hese-gold,SWIFT J1114.3+20 (0.71)
5,IC151017A,197.53,19.95,57312.67573,197.245,21.151,57312.67573,gfu-gold,4FGL J1311.8+2057 (1.09)
6,IC150926A,194.55,-4.56,57291.90119,194.419,-4.498,57291.90119,ehe-gold,4FGL J1258.7-0452 (0.34)
7,IC110610A,272.55,35.64,55722.426078,272.503,35.699,55722.426077,gfu-gold,4FGL J1808.8+3522 (0.37)


In [24]:
#List of indices to be kept
indices_to_keep = [1,2,3,6,7]  # Replace with the actual indices of the rows you want to keep

# Filter the DataFrame to keep only the specified rows by indices
sources_Rodrigues = events_Rodrigues.iloc[indices_to_keep]

# Reset the index if needed
sources_Rodrigues = sources_Rodrigues.reset_index(drop=True)

# Print the DataFrame after keeping the specified rows
sources_Rodrigues

Unnamed: 0,EventID1,Ra1,Dec1,MJD1,Ra2,Dec2,MJD2,I3TYPE,PossibleSource
0,IC170922A,77.43,5.79,58018.871186,77.331,5.673,58018.871186,gfu-gold,3FHL J0509.4+0542 (0.11)
1,IC140721A,101.82,-32.89,56859.758833,94.097,-32.77,56860.650259,hese-gold,4FGL J0649.5-3139 (1.32)
2,IC150904A,133.77,28.08,57269.759661,133.863,27.766,57269.759661,gfu-gold,3FHL J0854.1+2752 (0.29)
3,IC150926A,194.55,-4.56,57291.90119,194.419,-4.498,57291.90119,ehe-gold,4FGL J1258.7-0452 (0.34)
4,IC110610A,272.55,35.64,55722.426078,272.503,35.699,55722.426077,gfu-gold,4FGL J1808.8+3522 (0.37)


In [25]:
# Manually create a list of possible sources
# For this example, we'll create a dictionary where keys are EventID1 and values are possible sources
#The distance to the coincident sources is shown in parentheses with each source name
sources_Ro = {
    'IC170922A': 'TXS 0506+056',
    'IC140721A': '3HSP J064933.6-31392',
    'IC150904A': '3HSP J085410.1+27542',
    'IC150926A': '3HSP J125848.0-04474',
    'IC110610A': '3HSP J180849.7+35204',
    # Add more mappings as necessary
}

# Create a new column in gold_match_events for possible sources
sources_Rodrigues['SourceRodrigues'] = sources_Rodrigues['EventID1'].map(sources_Ro)

# Sort the combined DataFrame by EventID1 in alphabetical order
sources_Rodrigues_sorted = sources_Rodrigues.sort_values(by='EventID1', ascending=True)
sources_Rodrigues_sorted = sources_Rodrigues_sorted.reset_index(drop=True)

# Display the final table with classification and possible sources
sources_Rodrigues_sorted

Unnamed: 0,EventID1,Ra1,Dec1,MJD1,Ra2,Dec2,MJD2,I3TYPE,PossibleSource,SourceRodrigues
0,IC110610A,272.55,35.64,55722.426078,272.503,35.699,55722.426077,gfu-gold,4FGL J1808.8+3522 (0.37),3HSP J180849.7+35204
1,IC140721A,101.82,-32.89,56859.758833,94.097,-32.77,56860.650259,hese-gold,4FGL J0649.5-3139 (1.32),3HSP J064933.6-31392
2,IC150904A,133.77,28.08,57269.759661,133.863,27.766,57269.759661,gfu-gold,3FHL J0854.1+2752 (0.29),3HSP J085410.1+27542
3,IC150926A,194.55,-4.56,57291.90119,194.419,-4.498,57291.90119,ehe-gold,4FGL J1258.7-0452 (0.34),3HSP J125848.0-04474
4,IC170922A,77.43,5.79,58018.871186,77.331,5.673,58018.871186,gfu-gold,3FHL J0509.4+0542 (0.11),TXS 0506+056


In [26]:
# Manually create a list of possible sources
# For this example, we'll create a dictionary where keys are EventID1 and values are possible sources
#The distance to the coincident sources is shown in parentheses with each source name
log10 = {
    'IC110610A': 4.62,
    'IC140721A': 5.13,
    'IC150904A': 4.90,
    'IC150926A': 4.78,
    'IC170922A': 4.72,
    # Add more mappings as necessary
}

# Create a new column in gold_match_events for possible sources
sources_Rodrigues_sorted['log10(E/GeV)'] = sources_Rodrigues_sorted['EventID1'].map(log10)

# Sort the combined DataFrame by EventID1 in alphabetical order
sources_Rodrigues_energy = sources_Rodrigues_sorted.sort_values(by='EventID1', ascending=True)
sources_Rodrigues_energy = sources_Rodrigues_energy.reset_index(drop=True)

# Display the final table with classification and possible sources
sources_Rodrigues_energy

Unnamed: 0,EventID1,Ra1,Dec1,MJD1,Ra2,Dec2,MJD2,I3TYPE,PossibleSource,SourceRodrigues,log10(E/GeV)
0,IC110610A,272.55,35.64,55722.426078,272.503,35.699,55722.426077,gfu-gold,4FGL J1808.8+3522 (0.37),3HSP J180849.7+35204,4.62
1,IC140721A,101.82,-32.89,56859.758833,94.097,-32.77,56860.650259,hese-gold,4FGL J0649.5-3139 (1.32),3HSP J064933.6-31392,5.13
2,IC150904A,133.77,28.08,57269.759661,133.863,27.766,57269.759661,gfu-gold,3FHL J0854.1+2752 (0.29),3HSP J085410.1+27542,4.9
3,IC150926A,194.55,-4.56,57291.90119,194.419,-4.498,57291.90119,ehe-gold,4FGL J1258.7-0452 (0.34),3HSP J125848.0-04474,4.78
4,IC170922A,77.43,5.79,58018.871186,77.331,5.673,58018.871186,gfu-gold,3FHL J0509.4+0542 (0.11),TXS 0506+056,4.72


In [28]:
# Manually create a list of possible sources
# For this example, we'll create a dictionary where keys are EventID1 and values are possible sources
#The distance to the coincident sources is shown in parentheses with each source name
AngErr = {
    #'IC110610A': 4.62,
    #'IC140721A': 5.13,
    #'IC150904A': 4.90,
    #'IC150926A': 4.78,
    'IC170922A': 0.2,
    # Add more mappings as necessary
}

# Create a new column in gold_match_events for possible sources
sources_Rodrigues_energy['AngErr[deg]'] = sources_Rodrigues_energy['EventID1'].map(AngErr)

# Sort the combined DataFrame by EventID1 in alphabetical order
sources_Rodrigues_ang = sources_Rodrigues_energy.sort_values(by='EventID1', ascending=True)
sources_Rodrigues_ang = sources_Rodrigues_ang.reset_index(drop=True)

# Display the final table with classification and possible sources
sources_Rodrigues_ang

Unnamed: 0,EventID1,Ra1,Dec1,MJD1,Ra2,Dec2,MJD2,I3TYPE,PossibleSource,SourceRodrigues,log10(E/GeV),AngErr[deg]
0,IC110610A,272.55,35.64,55722.426078,272.503,35.699,55722.426077,gfu-gold,4FGL J1808.8+3522 (0.37),3HSP J180849.7+35204,4.62,
1,IC140721A,101.82,-32.89,56859.758833,94.097,-32.77,56860.650259,hese-gold,4FGL J0649.5-3139 (1.32),3HSP J064933.6-31392,5.13,
2,IC150904A,133.77,28.08,57269.759661,133.863,27.766,57269.759661,gfu-gold,3FHL J0854.1+2752 (0.29),3HSP J085410.1+27542,4.9,
3,IC150926A,194.55,-4.56,57291.90119,194.419,-4.498,57291.90119,ehe-gold,4FGL J1258.7-0452 (0.34),3HSP J125848.0-04474,4.78,
4,IC170922A,77.43,5.79,58018.871186,77.331,5.673,58018.871186,gfu-gold,3FHL J0509.4+0542 (0.11),TXS 0506+056,4.72,0.2
