# northeren_tracks Dataset

The dataset only consists of neutrino events. <br>
For information on how to use sqlite3 see documentation: https://docs.python.org/3/library/sqlite3.html 
1) Fetching table and column names:

In [1]:
db_path = '/net/big-tank/POOL/projects/icecube/graphnet/workshop-2023/data/icecube/northern_tracks.db'

In [2]:
import sqlite3
import pandas as pd
import numpy as np

con = sqlite3.connect(db_path)

# Getting table and column names
query = 'select name from sqlite_master where type = "table"'
table_names = pd.read_sql(query,con)

for table_name in table_names['name']:
    print("Table name:", table_name)
    # Getting column names of table
    query = f"SELECT * FROM {table_name} LIMIT 1"
    column_names = pd.read_sql(query,con).columns
    print("Column names:", np.array(column_names))

Table name: truth
Column names: ['CascadeFilter_13' 'DeepCoreFilter_13' 'EventID' 'L3_oscNext_bool'
 'L4_oscNext_bool' 'L5_oscNext_bool' 'L6_oscNext_bool' 'L7_oscNext_bool'
 'MuonFilter_13' 'OnlineL2Filter_17' 'RunID' 'SubEventID' 'SubrunID'
 'azimuth' 'dbang_decay_length' 'elasticity' 'energy' 'energy_track'
 'event_no' 'event_time' 'inelasticity' 'interaction_type' 'pid'
 'position_x' 'position_y' 'position_z' 'sim_type' 'stopped_muon'
 'track_length' 'zenith']
Table name: HVInIcePulses
Column names: ['awtd' 'charge' 'dom_time' 'dom_x' 'dom_y' 'dom_z' 'event_no'
 'event_time' 'fadc' 'hlc' 'is_bad_dom' 'is_bright_dom' 'is_errata_dom'
 'is_saturated_dom' 'pmt_area' 'rde' 'width']
Table name: tum_dnn
Column names: ['event_no' 'tum_bdt_sigma' 'tum_dnn_energy_dst' 'tum_dnn_energy_hive']
Table name: northeren_tracks_muon_labels
Column names: ['classification' 'classification_emuon_cascade_energy'
 'classification_emuon_deposited' 'classification_emuon_entry'
 'classification_emuon_track_en

2) Plotting distribution of energies <br>

The 'energy' truth value describes the origional energy of the neutrino. <br>
The 'classification_emuon_entry' truth value describes the muon energy when entering the detector. 

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

bins = 50
colors = ['tab:blue', 'tab:orange']
tables = ['truth', 'northeren_tracks_muon_labels']
for count, i in enumerate(['energy', 'classification_emuon_entry']):
    sql = f"SELECT {i} FROM {tables[count]}"
    column_data = pd.read_sql(sql, con)
    print(count)
    plt.hist(np.log10(column_data), histtype = 'step', label= i, bins=bins, color=colors[count])
    

# Show plot
plt.title("event energy")
plt.legend()
plt.xlabel('energy [log10 GeV]')


plt.show()

 

0
1


  plt.show()


3) Plotting distribution of xyz-coordinates of sensors

In [4]:
# WARNING: This cell can take up to 10 minutes to run!

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

bins = 50
fig, ax = plt.subplots(3)
colors = ['tab:blue', 'tab:orange', 'tab:green']
for count, i in enumerate(['dom_x', 'dom_y', 'dom_z']):
    sql = f"SELECT {i} FROM HVInIcePulses"
    column_data = pd.read_sql(sql, con)
    print(count)
    ax[count].hist(column_data, histtype = 'step', label= i, bins=bins, color=colors[count])
    ax[count].legend()
    ax[count].set_yscale('log')


# Show plot
fig.suptitle("xyz-coordinates of sensors")
plt.subplots_adjust(hspace=0.4)
plt.xlabel('positions [m]')


plt.show()
 

0
1
2


  plt.show()


4) Plotting distributions of azimuth and zenith angle

In [5]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

bins = [50, 15]

for count, i in enumerate(['azimuth', 'zenith']):
    sql = f"SELECT {i} FROM truth"
    column_data = pd.read_sql(sql, con)
    plt.hist(column_data, histtype = 'step', label= i, bins=bins[count])

# Show plot
plt.title("azimuth zenith distribution")
plt.legend()
plt.xlabel('angles [radians]')
plt.show()


  plt.show()
