In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import random
from sklearn.metrics import ConfusionMatrixDisplay, classification_report


In [None]:
import pandas.api.types
from typing import Dict, List, Tuple


# These are variables to be used by the score function
column_names = {
    'series_id_column_name': 'series_id',
    'time_column_name': 'step',
    'event_column_name': 'event',
    'score_column_name': 'score',
}

tolerances = {
    'onset': [12, 36, 60, 90, 120, 150, 180, 240, 300, 360], 
    'wakeup': [12, 36, 60, 90, 120, 150, 180, 240, 300, 360]
}


def score(

        solution: pd.DataFrame,

        submission: pd.DataFrame,

        tolerances: Dict[str, List[float]],

        series_id_column_name: str,

        time_column_name: str,

        event_column_name: str,

        score_column_name: str,

        use_scoring_intervals: bool = False,

) -> float:

    """Event Detection Average Precision, an AUCPR metric for event detection in

    time series and video.
 
    This metric is similar to IOU-threshold average precision metrics commonly

    used in object detection. For events occuring in time series, we replace the

    IOU threshold with a time tolerance.
 
    Submissions are evaluated on the average precision of detected events,

    averaged over timestamp error tolerance thresholds, averaged over event

    classes.
 
    Detections are matched to ground-truth events within error tolerances, with

    ambiguities resolved in order of decreasing confidence.
 
    Detailed Description

    --------------------

    Evaluation proceeds in four steps:
 
    1. Selection - (optional) Predictions not within a series' scoring

    intervals are dropped.

    2. Assignment - Predicted events are matched with ground-truth events.

    3. Scoring - Each group of predictions is scored against its corresponding

    group of ground-truth events via Average Precision.

    4. Reduction - The multiple AP scores are averaged to produce a single

    overall score.
 
    Selection
 
    With each series there may be a defined set of scoring intervals giving the

    intervals of time over which zero or more ground-truth events might be

    annotated in that series. A prediction will be evaluated only if it falls

    within a scoring interval. These scoring intervals can be chosen to improve

    the fairness of evaluation by, for instance, ignoring edge-cases or

    ambiguous events.
 
    It is recommended that, if used, scoring intervals be provided for training

    data but not test data.
 
    Assignment
 
    For each set of predictions and ground-truths within the same `event x

    tolerance x series_id` group, we match each ground-truth to the

    highest-confidence unmatched prediction occurring within the allowed

    tolerance.
 
    Some ground-truths may not be matched to a prediction and some predictions

    may not be matched to a ground-truth. They will still be accounted for in

    the scoring, however.
 
    Scoring
 
    Collecting the events within each `series_id`, we compute an Average

    Precision score for each `event x tolerance` group. The average precision

    score is the area under the (step-wise) precision-recall curve generated by

    decreasing confidence score thresholds over the predictions. In this

    calculation, matched predictions over the threshold are scored as TP and

    unmatched predictions as FP. Unmatched ground-truths are scored as FN.
 
    Reduction
 
    The final score is the average of the above AP scores, first averaged over

    tolerance, then over event.
 
    Parameters

    ----------

    solution : pd.DataFrame, with columns:
 
        `series_id_column_name` identifier for each time series
 
        `time_column_name` the time of occurence for each event as a numeric type
 
        `event_column_name` class label for each event
 
        The solution contains the time of occurence of one or more types of

        event within one or more time series. The metric expects the solution to

        contain the same event types as those given in `tolerances`.
 
        When `use_scoring_intervals == True`, you may include `start` and `end`

        events to delimit intervals within which detections will be scored.

        Detected events (from the user submission) outside of these events will

        be ignored.
 
    submission : pd.DataFrame, with columns as above and in addition:
 
        `score_column_name` the predicted confidence score for the detected event
 
    tolerances : Dict[str, List[float]]
 
        Maps each event class to a list of timestamp tolerances used

        for matching detections to ground-truth events.
 
    use_scoring_intervals: bool, default False
 
        Whether to ignore predicted events outside intervals delimited

        by `'start'` and `'end'` events in the solution. When `False`,

        the solution should not include `'start'` and `'end'` events.

        See the examples for illustration.
 
    Returns

    -------

    event_detection_ap : float

        The mean average precision of the detected events.
 
    Examples

    --------

    Detecting `'pass'` events in football:

    >>> column_names = {

    ...     'series_id_column_name': 'video_id',

    ...     'time_column_name': 'time',

    ...     'event_column_name': 'event',

    ...     'score_column_name': 'score',

    ... }

    >>> tolerances = {'pass': [1.0]}

    >>> solution = pd.DataFrame({

    ...     'video_id': ['a', 'a'],

    ...     'event': ['pass', 'pass'],

    ...     'time': [0, 15],

    ... })

    >>> submission = pd.DataFrame({

    ...     'video_id': ['a', 'a', 'a'],

    ...     'event': ['pass', 'pass', 'pass'],

    ...     'score': [1.0, 0.5, 1.0],

    ...     'time': [0, 10, 14.5],

    ... })

    >>> score(solution, submission, tolerances, **column_names)

    1.0
 
    Increasing the confidence score of the false detection above the true

    detections decreases the AP.

    >>> submission.loc[1, 'score'] = 1.5

    >>> score(solution, submission, tolerances, **column_names)

    0.6666666666666666...
 
    Likewise, decreasing the confidence score of a true detection below the

    false detection also decreases the AP.

    >>> submission.loc[1, 'score'] = 0.5  # reset

    >>> submission.loc[0, 'score'] = 0.0

    >>> score(solution, submission, tolerances, **column_names)

    0.8333333333333333...
 
    We average AP scores over tolerances. Previously, the detection at 14.5

    would match, but adding smaller tolerances gives AP scores where it does

    not match. This results in both a FN, since the ground-truth wasn't

    detected, and a FP, since the detected event matches no ground-truth.

    >>> tolerances = {'pass': [0.1, 0.2, 1.0]}

    >>> score(solution, submission, tolerances, **column_names)

    0.3888888888888888...
 
    We also average over time series and over event classes.

    >>> tolerances = {'pass': [0.5, 1.0], 'challenge': [0.25, 0.50]}

    >>> solution = pd.DataFrame({

    ...     'video_id': ['a', 'a', 'b'],

    ...     'event': ['pass', 'challenge', 'pass'],

    ...     'time': [0, 15, 0],  # restart time for new time series b

    ... })

    >>> submission = pd.DataFrame({

    ...     'video_id': ['a', 'a', 'b'],

    ...     'event': ['pass', 'challenge', 'pass'],

    ...     'score': [1.0, 0.5, 1.0],

    ...     'time': [0, 15, 0],

    ... })

    >>> score(solution, submission, tolerances, **column_names)

    1.0
 
    By adding scoring intervals to the solution, we may choose to ignore

    detections outside of those intervals.

    >>> tolerances = {'pass': [1.0]}

    >>> solution = pd.DataFrame({

    ...     'video_id': ['a', 'a', 'a', 'a'],

    ...     'event': ['start', 'pass', 'pass', 'end'],

    ...     'time': [0, 10, 20, 30],

    ... })

    >>> submission = pd.DataFrame({

    ...     'video_id': ['a', 'a', 'a'],

    ...     'event': ['pass', 'pass', 'pass'],

    ...     'score': [1.0, 1.0, 1.0],

    ...     'time': [10, 20, 40],

    ... })

    >>> score(solution, submission, tolerances, **column_names, use_scoring_intervals=True)

    1.0
 
    """

    # Validate metric parameters

    assert len(tolerances) > 0, "Events must have defined tolerances."

    assert set(tolerances.keys()) == set(solution[event_column_name]).difference({'start', 'end'}),        (f"Solution column {event_column_name} must contain the same events "

         "as defined in tolerances.")

    assert pd.api.types.is_numeric_dtype(solution[time_column_name]),        f"Solution column {time_column_name} must be of numeric type."
 
    # Validate submission format

    for column_name in [

        series_id_column_name,

        time_column_name,

        event_column_name,

        score_column_name,

    ]:

        if column_name not in submission.columns:

            raise ParticipantVisibleError(f"Submission must have column '{target_name}'.")
 
    if not pd.api.types.is_numeric_dtype(submission[time_column_name]):

        raise ParticipantVisibleError(

            f"Submission column '{time_column_name}' must be of numeric type."

        )

    if not pd.api.types.is_numeric_dtype(submission[score_column_name]):

        raise ParticipantVisibleError(

            f"Submission column '{score_column_name}' must be of numeric type."

        )
 
    # Set these globally to avoid passing around a bunch of arguments

    globals()['series_id_column_name'] = series_id_column_name

    globals()['time_column_name'] = time_column_name

    globals()['event_column_name'] = event_column_name

    globals()['score_column_name'] = score_column_name

    globals()['use_scoring_intervals'] = use_scoring_intervals
 
    return event_detection_ap(solution, submission, tolerances)
 
def filter_detections(

        detections: pd.DataFrame, intervals: pd.DataFrame

) -> pd.DataFrame:

    """Drop detections not inside a scoring interval."""

    detection_time = detections.loc[:, time_column_name].sort_values().to_numpy()

    intervals = intervals.to_numpy()

    is_scored = np.full_like(detection_time, False, dtype=bool)
 
    i, j = 0, 0

    while i < len(detection_time) and j < len(intervals):

        time = detection_time[i]

        int_ = intervals[j]
 
        # If the detection is prior in time to the interval, go to the next detection.

        if time < int_.left:

            i += 1

        # If the detection is inside the interval, keep it and go to the next detection.

        elif time in int_:

            is_scored[i] = True

            i += 1

        # If the detection is later in time, go to the next interval.

        else:

            j += 1
 
    return detections.loc[is_scored].reset_index(drop=True)
 
def match_detections(

        tolerance: float, ground_truths: pd.DataFrame, detections: pd.DataFrame

) -> pd.DataFrame:

    """Match detections to ground truth events. Arguments are taken from a common event x tolerance x series_id evaluation group."""

    detections_sorted = detections.sort_values(score_column_name, ascending=False).dropna()

    is_matched = np.full_like(detections_sorted[event_column_name], False, dtype=bool)

    gts_matched = set()

    for i, det in enumerate(detections_sorted.itertuples(index=False)):

        best_error = tolerance

        best_gt = None
 
        for gt in ground_truths.itertuples(index=False):

            error = abs(getattr(det, time_column_name) - getattr(gt, time_column_name))

            if error < best_error and gt not in gts_matched:

                best_gt = gt

                best_error = error
 
        if best_gt is not None:

            is_matched[i] = True

            gts_matched.add(best_gt)
 
    detections_sorted['matched'] = is_matched
 
    return detections_sorted
 

def precision_recall_curve(

        matches: np.ndarray, scores: np.ndarray, p: int

) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:

    if len(matches) == 0:

        return [1], [0], []
 
    # Sort matches by decreasing confidence

    idxs = np.argsort(scores, kind='stable')[::-1]

    scores = scores[idxs]

    matches = matches[idxs]
 
    distinct_value_indices = np.where(np.diff(scores))[0]

    threshold_idxs = np.r_[distinct_value_indices, matches.size - 1]

    thresholds = scores[threshold_idxs]
 
    # Matches become TPs and non-matches FPs as confidence threshold decreases

    tps = np.cumsum(matches)[threshold_idxs]

    fps = np.cumsum(~matches)[threshold_idxs]
 
    precision = tps / (tps + fps)

    precision[np.isnan(precision)] = 0

    recall = tps / p  # total number of ground truths might be different than total number of matches
 
    # Stop when full recall attained and reverse the outputs so recall is non-increasing.

    last_ind = tps.searchsorted(tps[-1])

    sl = slice(last_ind, None, -1)
 
    # Final precision is 1 and final recall is 0

    return np.r_[precision[sl], 1], np.r_[recall[sl], 0], thresholds[sl]
 

def average_precision_score(matches: np.ndarray, scores: np.ndarray, p: int) -> float:

    precision, recall, _ = precision_recall_curve(matches, scores, p)

    # Compute step integral

    return -np.sum(np.diff(recall) * np.array(precision)[:-1])
 

def event_detection_ap(

        solution: pd.DataFrame,

        submission: pd.DataFrame,

        tolerances: Dict[str, List[float]],

) -> float:
 
    # Ensure solution and submission are sorted properly

    solution = solution.sort_values([series_id_column_name, time_column_name])

    submission = submission.sort_values([series_id_column_name, time_column_name])
 
    # Extract scoring intervals.

    if use_scoring_intervals:

        intervals = (

            solution

            .query("event in ['start', 'end']")

            .assign(interval=lambda x: x.groupby([series_id_column_name, event_column_name]).cumcount())

            .pivot(

                index='interval',

                columns=[series_id_column_name, event_column_name],

                values=time_column_name,

            )

            .stack(series_id_column_name)

            .swaplevel()

            .sort_index()

            .loc[:, ['start', 'end']]

            .apply(lambda x: pd.Interval(*x, closed='both'), axis=1)

        )
 
    # Extract ground-truth events.

    ground_truths = (

        solution

        .query("event not in ['start', 'end']")

        .reset_index(drop=True)

    )
 
    # Map each event class to its prevalence (needed for recall calculation)

    class_counts = ground_truths.value_counts(event_column_name).to_dict()
 
    # Create table for detections with a column indicating a match to a ground-truth event

    detections = submission.assign(matched = False)
 
    # Remove detections outside of scoring intervals

    if use_scoring_intervals:

        detections_filtered = []

        for (det_group, dets), (int_group, ints) in zip(

            detections.groupby(series_id_column_name), intervals.groupby(series_id_column_name)

        ):

            assert det_group == int_group

            detections_filtered.append(filter_detections(dets, ints))

        detections_filtered = pd.concat(detections_filtered, ignore_index=True)

    else:

        detections_filtered = detections
 
    # Create table of event-class x tolerance x series_id values

    aggregation_keys = pd.DataFrame(

        [(ev, tol, vid)

         for ev in tolerances.keys()

         for tol in tolerances[ev]

         for vid in ground_truths[series_id_column_name].unique()],

        columns=[event_column_name, 'tolerance', series_id_column_name],

    )
 
    # Create match evaluation groups: event-class x tolerance x series_id

    detections_grouped = (

        aggregation_keys

        .merge(detections_filtered, on=[event_column_name, series_id_column_name], how='left')

        .groupby([event_column_name, 'tolerance', series_id_column_name])

    )

    ground_truths_grouped = (

        aggregation_keys

        .merge(ground_truths, on=[event_column_name, series_id_column_name], how='left')

        .groupby([event_column_name, 'tolerance', series_id_column_name])

    )

    # Match detections to ground truth events by evaluation group

    detections_matched = []

    for key in aggregation_keys.itertuples(index=False):

        dets = detections_grouped.get_group(key)

        gts = ground_truths_grouped.get_group(key)

        detections_matched.append(

            match_detections(dets['tolerance'].iloc[0], gts, dets)

        )

    detections_matched = pd.concat(detections_matched)
 
    # Compute AP per event x tolerance group

    event_classes = ground_truths[event_column_name].unique()

    ap_table = (

        detections_matched

        .query("event in @event_classes")

        .groupby([event_column_name, 'tolerance']).apply(

            lambda group: average_precision_score(

                group['matched'].to_numpy(),

                group[score_column_name].to_numpy(),

                class_counts[group[event_column_name].iat[0]],

            )

        )

    )

    # Average over tolerances, then over event classes

    mean_ap = ap_table.groupby(event_column_name).mean().sum() / len(event_classes)
 
    return mean_ap

**Read data**

In [None]:
file='/kaggle/input/gammaa-train-test-validation-series/train_set_with_variables.parquet'
train  = pd.read_parquet(file)


In [None]:
file = '/kaggle/input/gammaa-train-test-validation-series/test_set_with_variables.parquet'
test  = pd.read_parquet(file)

In [None]:
file = '/kaggle/input/gammaa-train-test-validation-series/validation_set_with_variables.parquet'
val  = pd.read_parquet(file)

In [None]:
val.isnull().values.any()

In [None]:
train = train.sort_values(by=['series_id', 'timestamp'])
val = val.sort_values(by=['series_id', 'timestamp'])
test = test.sort_values(by=['series_id', 'timestamp'])

In [None]:
train.loc[train['event']=='onset','event'] = 0
train.loc[train['event']=='wakeup','event'] = 1

val.loc[val['event']=='onset','event'] = 0
val.loc[val['event']=='wakeup','event'] = 1

test.loc[test['event']=='onset','event'] = 0
test.loc[test['event']=='wakeup','event'] = 1

In [None]:
# normalize data 

train['anglez'] = train['anglez'] / max(abs(max(train['anglez'])),abs(min(train['anglez'])))
train['enmo'] = train['enmo'] / max(abs(max(train['enmo'])),abs(min(train['enmo'])))

val['anglez'] = val['anglez'] / max(abs(max(val['anglez'])),abs(min(val['anglez'])))
val['enmo'] = val['enmo'] / max(abs(max(val['enmo'])),abs(min(val['enmo'])))

test['anglez'] = test['anglez'] / max(abs(max(test['anglez'])),abs(min(test['anglez'])))
test['enmo'] = test['enmo'] / max(abs(max(test['enmo'])),abs(min(test['enmo'])))



In [None]:
# get unique individual  
series_id_train = train['series_id'].unique()
series_id_val = val['series_id'].unique()
series_id_test = test['series_id'].unique()

print('number of series in train:',len(series_id_train))
print('number of series in validation:',len(series_id_val))
print('number of series in test:',len(series_id_test))

In [None]:
#randomly select individual from dataset 

idx_train = random.choices(series_id_train, k = 100 )
idx_val = random.choices(series_id_val, k = 20)
#idx_test = random.choices(series_id_test, k = 20)

In [None]:
# select data 

cols = ['series_id', 'timestamp', 'step', 'anglez', 'enmo', 'event' ]

# Split data into input and output sequences
x_train = train.loc[train['series_id'].isin(idx_train)][['anglez','enmo']]
y_train = train.loc[train['series_id'].isin(idx_train)][['event']]

x_val = val.loc[val['series_id'].isin(idx_val)][['anglez', 'enmo']]
y_val = val.loc[val['series_id'].isin(idx_val)][['event']]

#x_test = test.loc[test['series_id'].isin(idx_test)][['anglez', 'enmo']]
#y_test = test.loc[test['series_id'].isin(idx_test)][['event']]

x_test_graph = test.loc[test['series_id'].isin(series_id_test)][cols]

In [None]:
# convert to torch
x_train = x_train.values.tolist()
y_train = y_train.values.tolist()

x_val = x_val.values.tolist()
y_val = y_val.values.tolist()

#x_test = x_test.values.tolist()
#y_test = y_test.values.tolist()

x_train = torch.tensor(x_train)
y_train = torch.tensor(y_train)

x_val = torch.tensor(x_val)
y_val = torch.tensor(y_val)

#x_test = torch.tensor(x_test)
#y_test = torch.tensor(y_test)

print(x_train.shape)
print(y_train.shape)
print(x_val.shape)
print(y_val.shape)

#print(x_test.shape)
#print(y_test.shape)

In [None]:
from torch.utils.data import Dataset, DataLoader

class dataset(Dataset):
  def __init__(self, x, y):
    self.x = x
    self.y = y

  def __len__(self):
    return len(self.x)

  def __getitem__(self, index):
    return self.x[index], self.y[index]


In [None]:
train_dataset = dataset(x_train, y_train)
val_dataset = dataset(x_val, y_val)
#test_dataset = dataset(x_test, y_test)

In [None]:
train_loader = DataLoader(train_dataset, batch_size = 100 )
val_loader = DataLoader(val_dataset, batch_size = 100)
#test_loader = DataLoader(test_dataset, batch_size = 40 )

In [None]:
print(len(train_loader))
print(len(val_loader))
#print(len(test_loader))

In [None]:

class RNN_Cell(nn.Module): 
    def __init__(self, input_size, hidden_size):
        
        super(RNN_Cell, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size

        # we randomly initialize our trainable parameters
        self.U = torch.nn.Parameter(torch.randn(input_size, hidden_size))
        self.W = torch.nn.Parameter(torch.randn(hidden_size, hidden_size))

        self.b = torch.nn.Parameter(torch.randn(hidden_size))

    def forward(self, x, state):

        h_prev = state

        a = torch.mm(x, self.U) + torch.mm(h_prev, self.W) + self.b
        #h = torch.sigmoid(a)
        h = torch.tanh(a)
        #print('h shape',h.shape)    
        
        return h

In [None]:

class RNN(nn.Module):
    def __init__(self, input_dim, hidden_size):
        super(RNN, self).__init__()

        self.hidden_size = hidden_size
        
        self.rnn_cell = RNN_Cell(input_dim, hidden_size)
        # here is the missing equation from before, 
        # in Torch the feed-forward layers are called Linear
        self.linear = nn.Linear(hidden_size, 1) 
        #self.sigmoid = nn.Linear(self.linear)
        
    def init_hidden(self,):
        return torch.zeros(1,self.hidden_size) #we initialize our hidden state with zeros
    
    def forward(self, X, h):
        
        self.h = h
        #print('h', h.shape)
        outputs = []

        # we will process the sequence here
        for X_t in X:
            #print("X_t shape", X_t.shape)
            self.h = self.rnn_cell.forward(X_t, self.h)
            #print('h shape', h.shape)
            
            y_t = self.linear.forward(self.h)
            
            y_t = torch.sigmoid(y_t)
           
            outputs.append(y_t)

        return torch.stack(outputs), self.h

In [None]:
# valiation to find best hidden unit

device = 'cuda' if torch.cuda.is_available() else 'cpu'

lr = 0.01
input_size = 2
hidden_sizes = [10, 20, 30, 40]
#hidden_sizes = [40]
output_size = 1

hs = []
accuracies = []

for hidden_size in hidden_sizes: 
    
    accuracy = 0
    total = 0
    correct = 0
    losses = []
    
    # Create an instance of the RNN model
    model = RNN(input_size, hidden_size)

    # Define loss function and optimizer
    criterion = nn.BCELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

    h = model.init_hidden()
    
    # Set number of epochs
    num_epochs = 10

    for epoch in range(num_epochs):
                
        #losses = []
        
        model.train()
        for bx,by in train_loader: 
            bx, by, model = bx.to(device), by.to(device), model.to(device)
            optimizer.zero_grad()
            output,h = model(bx.float().view(-1,1,2),h) 
            
            loss = criterion(output.view(-1), by.float().view(-1))
            loss.backward()
            optimizer.step()
            h.detach_()
        
              
        model.eval()
        for bx, by in val_loader:
            
            with torch.no_grad():
    
                y_prob, h = model(bx.float().view(-1,1,2), h)

            y_pred = torch.where(y_prob > 0.5, torch.tensor(1), torch.tensor(0))
    
            correct += torch.sum(torch.squeeze(y_pred) == torch.squeeze(by))
    
    
            total += len(by)
     
        accuracy = correct/total
       
        
    hs.append(hidden_size)
    accuracies.append(accuracy)  
    
        
           

    


In [None]:
print(hs)
print(accuracies)

**train model with best hidden size**

In [None]:
# train the RNN model with h = 20
lr = 0.01
input_size = 2
hidden_size = 20
output_size = 1
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = RNN(input_size, hidden_size)

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.RAdam(model.parameters(), lr=0.01)

h = model.init_hidden()

# Set number of epochs
num_epochs = 100
accuracy = 0
total = 0
correct = 0

for epoch in range(num_epochs):
                
        losses = []
        model.train()
        for bx,by in train_loader: 
            bx, by, model = bx.to(device), by.to(device), model.to(device)
            optimizer.zero_grad()
            output,h = model(bx.float().view(-1,1,2),h) 
            loss = criterion(output.view(-1), by.float().view(-1))
            loss.backward()
            optimizer.step()
            losses.append(loss.item())
            h.detach_()
        
        if epoch%10 == 0:
            print('loss:',np.mean(losses))


In [None]:
tests = []

with torch.no_grad():
   
    for name, group in x_test_graph.groupby('series_id'):
   
        group['timestamp'] = pd.to_datetime(group['timestamp'])
    
        x = group[['anglez', 'enmo']]
        y = group[['event']]
        x = x.values.tolist()
        y = y.values.tolist()
        x = torch.tensor(x)
        y = torch.tensor(y)
        
        y_prob, h = model(x.float().view(-1,1,2), h)
        
        y_pred = torch.where(y_prob > 0.5, torch.tensor(1), torch.tensor(0))
        
        group['prediction'] = torch.squeeze(y_pred)
        
        group['prob'] = torch.squeeze(y_prob)
        
        group['score'] = group['prob'].rolling(60*12*5, center=True, min_periods=10).mean().bfill().ffill()
        
        group['prediction'] = group['prediction'].rolling(360+1, center=True).median()
        
        group.loc[group['prediction']==0, 'prob'] = 1-group.loc[group['prediction']==0, 'prob']
        
        group['pred_diff'] = group['prediction'].diff()
        
        group = group.drop(columns='event')
        
        group['event'] = group['pred_diff'].replace({1:'wakeup', -1:'onset', 0:np.nan})
    
        test_wakeup = group[group['event']=='wakeup'].groupby(group['timestamp'].dt.date).agg('first')
        test_onset = group[group['event']=='onset'].groupby(group['timestamp'].dt.date).agg('last')
    
        group = pd.concat([test_wakeup, test_onset], ignore_index=True).sort_values('timestamp')
        
        tests.append(group)

In [None]:
events_submission = pd.concat(tests, ignore_index=True).reset_index(names='row_id')
len(events_submission)

In [None]:
%%time
import polars as pl
train_events = (pl.scan_csv('/kaggle/input/child-mind-institute-detect-sleep-states/train_events.csv')
                .with_columns(
                    (
                        (pl.col("timestamp").str.strptime(pl.Datetime, "%Y-%m-%dT%H:%M:%S%Z")),
                        (pl.col("timestamp").str.strptime(pl.Datetime, "%Y-%m-%dT%H:%M:%S%Z").dt.year().alias("year")),
                        (pl.col("timestamp").str.strptime(pl.Datetime, "%Y-%m-%dT%H:%M:%S%Z").dt.month().alias("month")),
                        (pl.col("timestamp").str.strptime(pl.Datetime, "%Y-%m-%dT%H:%M:%S%Z").dt.day().alias("day")),
                        (pl.col("timestamp").str.strptime(pl.Datetime, "%Y-%m-%dT%H:%M:%S%Z").dt.hour().alias("hour")),
                    )
                )
                .collect()
                .to_pandas()
               )

In [None]:
val_solution = train_events[train_events['series_id'].isin(series_id_test)][['series_id', 'event', 'step']]
#val_solution = train_events[train_events['series_id'].isin(series_id_test2)][['series_id', 'event', 'step']]
val_solution = val_solution[val_solution['step'].notna()]
val_solution = val_solution.reset_index(drop=True)
val_solution = val_solution.reset_index().rename(columns={'index': 'row_id'})

#xgb_submission=xgb_submission[(xgb_submission['score']>0.1)]

#for the weird case
#print(f"Model score: {score(val_solution, events_submission[(events_submission['row_id']<26)],tolerances, **column_names)}")
print(f"Model score: {score(val_solution, events_submission,tolerances, **column_names)}")



In [None]:
#for i in series_id_test2:
ser_ids = []
score_ids = []

for i in series_id_test:
    ser_ids.append(i)
    score_ids.append(score(val_solution[(val_solution['series_id']==i)],events_submission[(events_submission['series_id']==i)],tolerances, **column_names))
    print(f"Model score - {i} : {score(val_solution[(val_solution['series_id']==i)],events_submission[(events_submission['series_id']==i)],tolerances, **column_names)}")

In [None]:
score_df = pd.DataFrame({'series_id': ser_ids,
            'score': score_ids})

In [None]:
score_df.to_csv('score.csv')