In [1]:
import itertools
import pickle
from typing import List

import numpy as np
import pandas as pd
from scipy import stats
from sklearn.feature_extraction import image

from time_utils import Timebar
import multiprocessing


In [None]:
def compute_features(file: str, nr_splits: int = 24, skip_weekends: bool = True, slide_minutes: int = 1):
    """
    Computes features for the sniffed aggregated data
    :param raw_files: list of files containing serialized pickle Timebar data
    :param nr_splits: Split each day in how many blocks
    :param skip_weekends: Compute features only from monday to friday
    :return: list with all features from all raw_files
    """

    features = []

    print(f"Loading {file}...", end=" ")
    days = pickle.load(open(file, 'rb'))  # type: List[Timebar]
    print("done.", end="\t")

    print(f"Split size: {nr_splits}")

    nr_days = 0
    nr_blocks = 0
    nr_skip_days = 0

    for day in days:

        # check if day is empty
        if day.is_empty():
            nr_skip_days = nr_skip_days + 1
            continue

        if day.is_weekend() and skip_weekends:
            nr_skip_days = nr_skip_days + 1
            continue
        features.append(day.unroll_to_np())

        nr_days = nr_days + 1

    print(f"{nr_days} days splitted in {nr_blocks} blocks and skipped {nr_skip_days} days.")

    return np.array(features)

In [2]:
raw_files = [
    "counters_ubuntu.raw",
    "counters_freebsd.raw",
    "counters_toshiba.raw",
    "counters_windows10x86.raw",
    "counters_x58pc.raw",
    "counters_nuc.raw"
]

for file in raw_files:

    features = compute_features(file, skip_weekends=True)

    filename = f"numpy_{file}.raw"
    print(f"Dumping numpy {features.shape} table to {filename}...", end=" ")

    pickle.dump(np.array(features), open(filename, "wb"))

    print("Done.", end="\n\n")

# df_backup = df.copy()

NameError: name 'compute_features' is not defined