In [1]:
import os
import pandas as pd
import glob
import json
data_path = "../data/interim/v3"
dataset_target_path = "../data/processed"


In [2]:
from lib import read_dataset
data_on, data_off = read_dataset(data_path, classes = 2)

0 milenkosKitchenNotNearDevices1
1 milenkosRoomNearDevices1
2 milenkosRoomNearDevices


In [11]:
from scipy.fft import fft, fftfreq
import numpy as np
from matplotlib import pyplot as plt

def spectral_image(df: pd.DataFrame, column = 'Intensity', duration = 60):
    N = len(df)
    sampling_rate = N // duration
    T = 1 / sampling_rate

    yf = fft(df[column].to_numpy())

    # just right side
    yff = 2.0/N * np.abs(yf[0:N//2])
    xf = fftfreq(N, T)[:N//2]

    bins = np.array(range(round(xf[-1])))
    inds = np.digitize(xf, bins)
    df = pd.DataFrame({"bins": inds, "fourier": yff})
    groupped = df.groupby("bins").sum()
    return groupped

In [12]:
def specter_to_data_row(name, label, df, vec_len, column = 'Intensity', duration = 60,): 
    specter_intensity = spectral_image(df, column, duration)['fourier'].to_numpy()[:vec_len]
    padded = np.zeros((vec_len))
    padded[:specter_intensity.shape[0]] = specter_intensity
    data_row = [name, label] + list(padded)
    return data_row

vec_len = 50
duration=60
columns = ["Name", "Label"] + list(range(1,vec_len+1))

data_on = pd.DataFrame([specter_to_data_row(name, 6, df, vec_len=50, column='Intensity') for name, df in data_on.items()], columns=columns)
data_off = pd.DataFrame([specter_to_data_row(name, 5, df, vec_len=50, column='Intensity') for name, df in data_off.items()], columns=columns)

In [13]:
data_fourier = data_on.append(data_off, ignore_index=True)
data_fourier = data_fourier.sort_values(by=['Label', 'Name'], ignore_index = True)
data_fourier

  data_fourier = data_on.append(data_off, ignore_index=True)


Unnamed: 0,Name,Label,1,2,3,4,5,6,7,8,...,41,42,43,44,45,46,47,48,49,50
0,milenkosKitchenNotNearDevices1,5,166.914382,0.990365,0.822009,0.876246,0.956058,0.903413,0.828731,0.771207,...,0.842836,0.676475,0.738185,0.869702,0.912243,0.744218,0.660308,0.75253,1.11843,0.0
1,milenkosRoomNearDevices,5,192.406056,1.602865,1.191045,1.248905,1.110698,0.911207,0.8189,0.876682,...,0.762566,0.798861,0.783117,0.837648,0.798915,0.847812,0.69877,0.823476,1.290208,0.0
2,milenkosRoomNearDevices1,5,192.406056,1.602865,1.191045,1.248905,1.110698,0.911207,0.8189,0.876682,...,0.762566,0.798861,0.783117,0.837648,0.798915,0.847812,0.69877,0.823476,1.290208,0.0
3,milenkosKitchenNotNearDevices1,6,168.2267,1.896104,1.649988,1.315992,1.094898,1.220926,1.087022,0.972637,...,0.824714,0.896729,0.916614,0.732622,0.875792,0.949551,0.88961,0.918843,1.559893,0.0
4,milenkosRoomNearDevices,6,192.985566,1.637898,1.230603,1.162957,1.082392,1.234399,1.056224,0.991033,...,0.787571,0.640567,0.740041,0.800987,0.797628,0.774023,0.817842,0.774084,1.267119,0.0
5,milenkosRoomNearDevices1,6,191.855142,1.319011,1.091971,1.258759,1.045074,1.03754,0.971833,1.027048,...,0.815539,0.837696,0.883702,0.720174,0.844878,0.746106,0.95927,0.782418,1.231886,0.0


In [14]:
# save dataset
if not os.path.isdir(dataset_target_path):
    os.makedirs(dataset_target_path)

import time
# metadata
metadata = {
    "title": "Fourier dataset, 50 frequencies",
    "version": "v3",
    "description": "Dataset from v3 data, single device, without traffic, statistical features",
    "author": "Mihael",
    "places": [
        "room",
        "kitchen"
    ],
    "stages": 2,
    "traffic": False,
    "format": "processed",
    "created": int(time.time())
}
file_name = f"{metadata['version']}-{metadata['stages']}_stages-fourier"
data_fourier.to_csv(f"{dataset_target_path}/{file_name}.csv",index=True)
# Writing metadata
with open(f"{dataset_target_path}/{file_name}.json", "w") as outfile:
    outfile.write(json.dumps(metadata, indent=4))