Packages

In [1]:
import os 
import cv2 
import matplotlib.pyplot as plt 
from matplotlib.font_manager import FontProperties
import numpy as np 
import pandas as pd 
import scipy.spatial.distance as dist 
from scipy.stats import kurtosis, skew 
from pylab import mpl 
import pymannkendall as mk 
import statsmodels.api as sm 
from fastdtw import fastdtw 
from sklearn.decomposition import PCA 

# set font 
mpl.rcParams['font.sans-serif'] = ["SimHei"]
# solve the problem of negative sign display 
plt.rcParams['axes.unicode_minus'] = False 
# set the width of the coordinate axis 
plt.rcParams['axes.linewidth'] = 2

In [2]:
# variables
# get natural data file name 
file_names = os.listdir("../../event_csv/split_data/class2/")
# get artificial data file name 
files = os.listdir('../../event_csv/split_data/artificial/')

# true data of the original data 
nature_real_count = np.load('../npy_file/nature_data_real_count.npy')
# artificial data real count tag 
artificial_real_count = np.load('../npy_file/artificial_data_real_count.npy')

# repnet network's predicitve label on original data 
repnet_nature_pred_count = np.load('../npy_file/repnet_nature_data_real_count.npy')
# repnet network's prediction label on artificial syntheic data 
repnet_artificial_pred_count = np.load('../npy_file/repnet_artificial_data_real_count.npy')


In [3]:
import sys 
import os 
# os.getcwd()
sys.path.append('../') # going a step back so the importer can find the following module
from time_series_scripts import artificial_data_compose 

# 0 - Artificial synthesis data

## 0 - 1 - Synthetic synthesis

In [4]:
# A action synthesis repeat number 
# a_repeat_times = [3, 7, 12, 17]
# for i in a_repeat_times:
#     artificial_data_compose.get_same(file_name='a', repeat_times=i)

# # B action synthesis repeat number 
# b_repeat_times = [3, 7, 13, 18]
# for i in b_repeat_times:
#     artificial_data_compose.get_same(file_name='b', repeat_times=i)

## 0 - 2 - front and rear action stitching 

In [5]:
# A action stitching 3 times and then stitching B action 7 times 
# artificial_data_compose.get_front_or_tail(repeat_times_A=3, repeat_times_B=7)

# A action stitching 3 times and then stitching B action 7 times 
# artificial_data_compose.get_front_or_tail(repeat_times_A=3, repeat_times_B=13)

# A action stitching 3 times and then stitching B action 7 times 
# artificial_data_compose.get_front_or_tail(repeat_times_A=3, repeat_times_B=18)

## 0 - 3 - before the middle and rear action stitching

In [6]:
# # on both sides are A action, the middle is B action
# artificial_data_compose.get_mid(repeat_times_A=3, repeat_times_B=13, flag=True)
# artificial_data_compose.get_mid(repeat_times_A=3, repeat_times_B=18, flag=True)
# artificial_data_compose.get_mid(repeat_times_A=1, repeat_times_B=7, flag=True)

# Flag is false on behalf of A action to be sandwiched in the middle
# artificial_data_compose.get_mid(repeat_times_A=3, repeat_times_B=13, flag=False)
# artificial_data_compose.get_mid(repeat_times_A=3, repeat_times_B=18, flag=False)
# artificial_data_compose.get_mid(repeat_times_A=1, repeat_times_B=7, flag=False)

## 0 - 4 - Artificial data synthesis video 

In [7]:
# # store with 2,000 incident points as a picture 
# from time_series_scripts import csv_to_video
# for name in files:
#     csv_to_video.event_to_pic(name[:-4])
# # switch the picture into a video with a frame rate of 15
# for name in files:
    # csv_to_video.pic_to_video(f'../../video/artificial/{name[:-4]}/',f'../../video/artificial_video/{name[:-4]}.mp4')

# 1 - Event data processing

In [8]:
from time_series_scripts import data_compress
## 1 - 1 - natural data processing
for i in range(2, 8):
    if i == 3:
        continue 
    for name in file_names:
        data_compress.distance_mean_meanline(name, class_num=i)

  d1,d2 = compute_distance(row[1],row[2],df['x'][index+1],df['y'][index+1])
  baseline_x = row[1]
  baseline_y = row[2]
  x_dis, y_dis = compute_distance(baseline_x, baseline_y, row[1], row[2])
  temp_x = temp_x + row[1]
  temp_y = temp_y + row[2]
  baseline_x = row[1]
  baseline_y = row[2]
  temp_x = row[1]
  temp_y = row[2]
  x_dis,y_dis = compute_distance(row[1],row[2],next_event[1],next_event[2])
  d1,d2 = compute_distance(row[1],row[2],df['x'][index+1],df['y'][index+1])
  baseline_x = row[1]
  baseline_y = row[2]
  x_dis, y_dis = compute_distance(baseline_x, baseline_y, row[1], row[2])
  temp_x = temp_x + row[1]
  temp_y = temp_y + row[2]
  baseline_x = row[1]
  baseline_y = row[2]
  temp_x = row[1]
  temp_y = row[2]
  x_dis,y_dis = compute_distance(row[1],row[2],next_event[1],next_event[2])


KeyboardInterrupt: 

In [9]:
# manual data processing 
# for name in files:
#     data_compress.distance_mean_meanline(name, class_num=-1, nature_flag=False)

# 2 - Get Statistical counting results

In [14]:
# get the number of natural data cycle prediction values 
from time_series_scripts import cycle_count

pred_count = cycle_count.get_all_count(file_names)

Help on function fastdtw in module fastdtw.fastdtw:

fastdtw(x, y, radius=1, dist=None)
    return the approximate distance between 2 time series with O(N)
    time and memory complexity
    
    Parameters
    ----------
    x : array_like
        input array 1
    y : array_like
        input array 2
    radius : int
        size of neighborhood when expanding the path. A higher value will
        increase the accuracy of the calculation but also increase time
        and memory consumption. A radius equal to the size of x and y will
        yield an exact dynamic time warping calculation.
    dist : function or int
        The method for calculating the distance between x[i] and y[j]. If
        dist is an int of value p > 0, then the p-norm will be used. If
        dist is a function then dist(x[i], y[j]) will be used. If dist is
        None then abs(x[i] - y[j]) will be used.
    
    Returns
    -------
    distance : float
        the approximate distance between the 2 time ser

In [11]:
# obtain artificial synthetic data cycle prediction value 
pred_count_artificial = cycle_count.get_all_count(files, nature_flag=False)

The file name is a3_b7.csv the number of repetitions of the action is:7
The file name is b18.csv the number of repetitions of the action is:18
The file name is a.csv the number of repetitions of the action is:1
The file name is a17.csv the number of repetitions of the action is:16
The file name is b7.csv the number of repetitions of the action is:7
The file name is b3_a17_b3.csv the number of repetitions of the action is:22
The file name is b13_a3.csv the number of repetitions of the action is:13
The file name is b.csv the number of repetitions of the action is:1
The file name is b3_a12_b3.csv the number of repetitions of the action is:12
The file name is b18_a3.csv the number of repetitions of the action is:18
The file name is a3_b18.csv the number of repetitions of the action is:21
The file name is a_b7_a.csv the number of repetitions of the action is:6
The file name is b7_a3.csv the number of repetitions of the action is:7
The file name is a3_b13.csv the number of repetitions of the

In [22]:
cycle_count.print_artificial_MAE_OBO(files, artificial_real_count, pred_count_artificial, repnet_artificial_pred_count)

AttributeError: module 'time_series_scripts.cycle_count' has no attribute 'print_artificial_MAE_OBO'

In [16]:
from sklearn.decomposition import PCA, IncrementalPCA
def intermediate_PCA_method(data: pd.DataFrame):
    data = data.values 
    ipca = IncrementalPCA(n_components=1, batch_size=10)
    ipca_data = ipca.fit_transform(data)
    ipca_data = np.reshape(ipca_data, -1) 
    ...

In [17]:
# PCA 
def dimensionality_reduction_IPCA(file_name,class_num,nature_flag=True):
    df = None 
    to_file_path = None 
    os.makedirs(f'../../event_csv/compress_event_manhattan/articicial/smooth_by_ipca/', exist_ok=True) 

    if nature_flag:
        # Data after time and space filtration
        df = pd.read_csv(f'../../event_csv/compress_event_manhattan/class{class_num}/{file_name}')
        to_file_path = f'../../event_csv/compress_event_manhattan/class{class_num}/smooth_by_ipca/{file_name}'
    else:
        df = pd.read_csv(f'../../event_csv/compress_event_manhattan/articicial/{file_name}')
        to_file_path = f'../../event_csv/compress_event_manhattan/articicial/smooth_by_ipca/{file_name}'
    # PCA main component analysis, as long as the first dimension 
    data = intermediate_PCA_method(df)
    pd.DataFrame(data, columns=['value']).to_csv(
        to_file_path, mode='w', header=True, index=False)

In [18]:
file_names = os.listdir("../../event_csv/split_data/class2/")
for i in range(2, 8):
    if i == 3:
        continue 
    for name in file_names:
        dimensionality_reduction_IPCA(name, class_num=i)

OSError: Cannot save file into a non-existent directory: '../../event_csv/compress_event_manhattan/class4/smooth_by_ipca'