## Importing libraries

In [1]:
import pandas as pd
import numpy as np

## Cleaning data

In [3]:
def remove_volume_ids(data_H_T, volumn_list):
    data_H_T = data_H_T.loc[data_H_T['volume_id'].isin(volumn_list)]
    return data_H_T


def remove_weight_0(data_H_T):
    data_H_T = data_H_T.loc[data_H_T["weight"] > 0]
    return data_H_T


def remove_particle_id_0(data_H_T):
    data_H_T = data_H_T.loc[data_H_T["particle_id"] > 0]
    return data_H_T


def remove_nhits_less_3(data_P):
    data_P = data_P.loc[data_P["nhits"] > 3]
    return data_P


def same_layer_filter(data_H_T):
    data_H_T = data_H_T.drop_duplicates(subset = ["particle_id", "volume_id", "layer_id"])
    small_occurrence = data_H_T["particle_id"].value_counts() > 3
    data_H_T = data_H_T[data_H_T["particle_id"].isin(small_occurrence[small_occurrence].index)]
    return data_H_T


def pt_cutter(data_P, pt_cut_start, pt_cut_end):
    data_P["Pt"] = (data_P.px**2+data_P.py**2)**(1/2)
    data_P = data_P.loc[(data_P["Pt"] >= pt_cut_start) & (data_P["Pt"] <= pt_cut_end)]
    return data_P


def full_data_clean(data_H_T, data_P, volumn_list, pt_cut_start, pt_cut_end):
    data_H_T = remove_volume_ids(data_H_T, volumn_list)
    data_H_T = remove_weight_0(data_H_T)
    data_H_T = remove_particle_id_0(data_H_T)
    data_H_T = same_layer_filter(data_H_T)
    
    data_P = pt_cutter(data_P, pt_cut_start, pt_cut_end)
    data_P = remove_nhits_less_3(data_P)
    
    particle_id_list = list(set(data_H_T.particle_id) - set(data_P.particle_id))
    data_H_T = data_H_T[~data_H_T['particle_id'].isin(particle_id_list)]
    
    particle_id_list = list(set(data_P.particle_id) - set(data_H_T.particle_id))
    data_P = data_P[~data_P['particle_id'].isin(particle_id_list)]
    return data_H_T.reset_index(drop=True) , data_P.reset_index(drop=True)