### モジュール読み込み

In [None]:
import os

import numpy as np
import pandas as pd

from trackml.dataset import load_event,  load_dataset
from trackml.randomize import shuffle_hits
from trackml.score import score_event

%matplotlib inline

import sys
sys.path.append("../lib/")

import hough_clusterer as hc

### データ読み込み

In [6]:
path_to_train = '../path_to_train'
event_prefix = 'event000001000'
hits, cells, particles, truth = load_event(os.path.join(path_to_train, event_prefix))

mem_bytes = (hits.memory_usage(index=True).sum() 
             + cells.memory_usage(index=True).sum() 
             + particles.memory_usage(index=True).sum() 
             + truth.memory_usage(index=True).sum())
print('{} memory usage {:.2f} MB'.format(event_prefix, mem_bytes / 2**20))

event000001000 memory usage 18.46 MB


### モデルの定義

In [7]:
model = hc.Clusterer(N_bins_r0inv=200, N_bins_gamma=500, N_theta=500, min_hits=9)

### モデルによる予言

In [8]:
labels = model.predict(hits)

In [15]:
labels

array([   0., 5539.,    0., ..., 2084., 2124., 2124.])

### スコアの確認

In [10]:
def create_one_event_submission(event_id, hits, labels):
    sub_data = np.column_stack(([event_id]*len(hits), hits.hit_id.values, labels))
    submission = pd.DataFrame(data=sub_data, columns=["event_id", "hit_id", "track_id"]).astype(int)
    return submission

- 1イベント

In [11]:
submission = create_one_event_submission(0, hits, labels)
score = score_event(truth, submission)

In [12]:
print("Your score: ", score)

Your score:  0.1403401478160794


- 複数イベント

In [None]:
dataset_submissions = []
dataset_scores = []

for event_id, hits, cells, particles, truth in load_dataset(path_to_train, skip=0, nevents=5):
        
    # Track pattern recognition
    model = Clusterer(N_bins_r0inv=200, N_bins_gamma=500, N_theta=500, min_hits=9)
    labels = model.predict(hits)
        
    # Prepare submission for an event
    one_submission = create_one_event_submission(event_id, hits, labels)
    dataset_submissions.append(one_submission)
    
    # Score for the event
    score = score_event(truth, one_submission)
    dataset_scores.append(score)
    
    print("Score for event %d: %.3f" % (event_id, score))
    
print('Mean score: %.3f' % (np.mean(dataset_scores)))

### テストデータに適用して提出用ファイルの作成

In [None]:
path_to_test = "../path_to_test"
test_dataset_submissions = []

create_submission = True # True for submission 

if create_submission:
    for event_id, hits, cells in load_dataset(path_to_test, parts=['hits', 'cells']):

        # Track pattern recognition
        model = hc.Clusterer(N_bins_r0inv=200, N_bins_gamma=500, N_theta=500, min_hits=9)
        labels = model.predict(hits)

        # Prepare submission for an event
        one_submission = create_one_event_submission(event_id, hits, labels)
        test_dataset_submissions.append(one_submission)
        
        print('Event ID: ', event_id)

    # Create submission file
    submission = pd.concat(test_dataset_submissions, axis=0)
    submission.to_csv('submission.csv.gz', index=False, compression='gzip')