In [1]:
import pandas as pd 
import numpy as np 
import os

In [None]:
def data_generator(directories_number,list_len):

    
    directories = os.listdir('/kaggle/input/icecube-neutrinos-in-deep-ice/train/')
    directories = directories[:directories_number]#temporal, indica cuantos batches se van a tomar
    directories = [('/kaggle/input/icecube-neutrinos-in-deep-ice/train/')+x for i, x in enumerate(directories)]
    
    data = pd.DataFrame()
    
    
    for file in directories:
        temp = pd.read_parquet(file)
        
        temp = temp.sort_values('charge',ascending = False).groupby('event_id').head(list_len)
        # al poner el head en 100 se reduciria la tabla en un 98%
    
        data = pd.concat([data, temp], axis=0)
        
    
    #data = data.drop_duplicates()##
    
    coords = pd.read_csv('/kaggle/input/icecube-neutrinos-in-deep-ice/sensor_geometry.csv')
    train_meta = pd.read_parquet('/kaggle/input/icecube-neutrinos-in-deep-ice/train_meta.parquet')
    
    data['event'] = data.index
    
    
    data = pd.merge(data, train_meta[['event_id', 'azimuth', 'zenith']], left_on=data['event'], right_on=train_meta['event_id'].astype(float))
    
    data = pd.merge(data, coords, on='sensor_id', how='outer')


     
    data = pd.concat([
        data.groupby('event').agg({'time': lambda x: list(x)}),
        data.groupby('event').agg({'charge': lambda x: list(x)}),
        data.groupby('event').agg({'x': lambda x: list(x)}),
        data.groupby('event').agg({'y': lambda x: list(x)}),
        data.groupby('event').agg({'z': lambda x: list(x)}),
        data.groupby('event')['azimuth'].apply(min),
        data.groupby('event')['zenith'].apply(min)
        ],axis=1)
    

    data['time'] = data['time'].map(lambda a: a + [0] * (list_len - len(a)))
    data['charge'] = data['charge'].map(lambda a: a + [0] * (list_len - len(a)))
    data['x'] = data['x'].map(lambda a: a + [0] * (list_len - len(a)))
    data['y'] = data['y'].map(lambda a: a + [0] * (list_len - len(a)))
    data['z'] = data['z'].map(lambda a: a + [0] * (list_len - len(a)))
    
    return data[['time','charge','x','y','z','azimuth','zenith']]



In [None]:
data = data_generator(3,15)

In [None]:
import tensorflow as tf 
import keras

In [None]:
size = 0.25
epoch = 15

test_size = int(round((len(data)*size),0))
data.sample(frac=1)


X = data[['time','charge','x','y','z']]
y_az = data['azimuth']
y_ze = data['zenith']


num_cols = len(X.columns)
num_rows = len(X)
res = list(X.sum(axis=1).to_numpy())
res = np.reshape(res, (num_rows, num_cols, -1))
X = res


X_train = X[test_size:]
X_test = X[:test_size]

y_az_train = y_az.iloc[test_size:]
y_az_test = y_az.iloc[:test_size]

y_ze_train = y_ze.iloc[test_size:]
y_ze_test = y_ze.iloc[:test_size]


In [None]:
input = keras.Input(shape=(5, 15, ))

x = keras.layers.Flatten()(input)
x = keras.layers.BatchNormalization()(x)

xa = keras.layers.Dense(16, activation='relu')(x)
xa = keras.layers.Dense(16, activation='relu')(xa)
xa = keras.layers.Dense(8, activation='relu')(xa)

xz = keras.layers.Dense(16, activation='relu')(x)
xz = keras.layers.Dense(16, activation='relu')(xz)
xz = keras.layers.Dense(8, activation='relu')(xz)

out_az = keras.layers.Dense(1, activation='linear', name='az-out')(xa)
out_ze = keras.layers.Dense(1, activation='linear', name='ze-out')(xz)


model = keras.Model( inputs = input, outputs = [out_az, out_ze])


model.compile(
    loss = {
        'az-out': tf.keras.losses.MeanAbsoluteError(),
        'ze-out': tf.keras.losses.MeanAbsoluteError(),
    },

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0004))
    
        
    
history = model.fit(x=X_train,
            y={
                'az-out': y_az_train,
                'ze-out': y_ze_train
            },
            validation_data=(X_test,             
            {
                'az-out': y_az_test,
                'ze-out': y_ze_test
            }),
              epochs=epoch,verbose=1)
    

In [None]:
# az-out_loss: 1.5651 - ze-out_loss: 0.5711
# az-out_loss: 1.5721 - ze-out_loss: 0.5747
# az-out_loss: 3.1406 - ze-out_loss: 1.5342

In [None]:
def data_test(list_len):
    
    directories = os.listdir('/kaggle/input/icecube-neutrinos-in-deep-ice/test/')
    directories = directories[:2]#temporal, indica cuantos batches se van a tomar
    directories = [('/kaggle/input/icecube-neutrinos-in-deep-ice/test/')+x for i, x in enumerate(directories)]
    
    data = pd.DataFrame()
    
    
    for file in directories:
        temp = pd.read_parquet(file)
        
        temp = temp.sort_values('charge',ascending = False).groupby('event_id').head(list_len)
        # al poner el head en 100 se reduciria la tabla en un 98%
    
        data = pd.concat([data, temp], axis=0)
        
    
    #data = data.drop_duplicates()##
    
    coords = pd.read_csv('/kaggle/input/icecube-neutrinos-in-deep-ice/sensor_geometry.csv')
    
    data['event_id'] = data.index
    
    data = pd.merge(data, coords, on='sensor_id', how='outer')

     
    data = pd.concat([
        data.groupby('event_id').agg({'event_id': lambda x: min(x)}),
        data.groupby('event_id').agg({'time': lambda x: list(x)}),
        data.groupby('event_id').agg({'charge': lambda x: list(x)}),
        data.groupby('event_id').agg({'x': lambda x: list(x)}),
        data.groupby('event_id').agg({'y': lambda x: list(x)}),
        data.groupby('event_id').agg({'z': lambda x: list(x)})
        ],axis=1)
    
    
    data['time'] = data['time'].map(lambda a: a + [0] * (list_len - len(a)))
    data['charge'] = data['charge'].map(lambda a: a + [0] * (list_len - len(a)))
    data['x'] = data['x'].map(lambda a: a + [0] * (list_len - len(a)))
    data['y'] = data['y'].map(lambda a: a + [0] * (list_len - len(a)))
    data['z'] = data['z'].map(lambda a: a + [0] * (list_len - len(a)))
    

    return data[['event_id','time','charge','x','y','z']]

In [None]:
test_data = data_test(15)

In [None]:
not_event = test_data[['time','charge','x','y','z']]

num_cols = len(not_event.columns)
num_rows = len(not_event)
res = list(not_event.sum(axis=1).to_numpy())
res = np.reshape(res, (num_rows, num_cols, -1))


In [None]:
#pred_az, pred_ze = model.predict(test_data[['time', 'charge', 'x', 'y', 'z']])
#test_data[['azimuth', 'zenith']] = model.predict(test_data[['time', 'charge', 'x', 'y', 'z']])
pred_az, pred_ze = model.predict(res)

In [None]:
#idd = test_data['event_id']
#submit = pd.concat([idd, pred_az, pred_ze], axis=1)

pred_az = pred_az.flatten()
pred_ze = pred_ze.flatten()

submit = pd.DataFrame()
submit['event_id'] = test_data['event_id'].astype('int')
submit['azimuth'] = pred_az.tolist()
submit['zenith'] = pred_ze.tolist()
submit = submit.reset_index(drop=True)

In [None]:
#test_data[['event_id','azimuth','zenith']].to_parquet('submission.parquet')
submit.to_csv('submission.csv')