Load required packages

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Read in the data

In [None]:
raw_data_file = '90deg_50x50_labelled_old.csv'
raw_data = pd.read_csv(raw_data_file)
print(raw_data.columns)
print(raw_data.shape)
print(raw_data[:10])

Set up parameters

In [None]:
window = 100
step = 50
in_threshold = 0.5

Get summaries of each window

In [None]:

# get mean data:
df_summary = raw_data[["X", "Y", "GDD_2P67", "inside_pol"]].rolling(window=window, step=step, center=True).mean()

# threshold the value
df_summary["Target"] = 0 # set up initial column
df_summary["Target"] = df_summary.Target.where(df_summary.inside_pol < in_threshold, 1)
print(df_summary)

In [None]:
fig, ax = plt.subplots(figsize=(10, 15))
ax.set_aspect(1)

plt.scatter(df_summary["X"], df_summary["Y"], c=df_summary["Target"], alpha=0.5)
plt.show()

Get the window values themselves:

In [None]:
def rolling_window(array, window_size, freq):
    shape = (array.shape[0] - window_size + 1, window_size)
    strides = (array.strides[0],) + array.strides
    rolled = np.lib.stride_tricks.as_strided(array, shape=shape, strides=strides)
    return rolled[np.arange(0,shape[0],freq)]

# get the windowed data
windowed_data = rolling_window(np.array(raw_data.GDD_2P67), window, step)

windowed_data.shape


join it all together

In [None]:
# summary data:
df_summary = df_summary[df_summary['X'].notna()]
df_summary = df_summary.reset_index(drop=True)
print(df_summary.shape)

# windowed data
df_windowed = pd.DataFrame(windowed_data)
print(df_windowed.shape)

df_training = df_summary.join(df_windowed)
df_training.to_csv("training_data_{}_{}_{}.csv".format(window, step, in_threshold))



## Turn it into a function

In [None]:
def rolling_window(array, window_size, freq):
    shape = (array.shape[0] - window_size + 1, window_size)
    strides = (array.strides[0],) + array.strides
    rolled = np.lib.stride_tricks.as_strided(array, shape=shape, strides=strides)
    return rolled[np.arange(0,shape[0],freq)]

def get_training_data(raw_data, window, step, in_threshold):
    if len(raw_data) < window:
        return
    
    # get mean data:
    df_summary = raw_data[["X", "Y", "GDD_2P67", "inside_pol"]].rolling(window=window, step=step, center=True).mean()

    # threshold the value
    df_summary["Target"] = 0 # set up initial column
    df_summary["Target"] = df_summary.Target.where(df_summary.inside_pol < in_threshold, 1)

    # get the windowed data
    windowed_data = rolling_window(np.array(raw_data.GDD_2P67), window, step)

    # summary data:
    df_summary = df_summary[df_summary['X'].notna()]
    df_summary = df_summary.reset_index(drop=True)

    # windowed data
    df_windowed = pd.DataFrame(windowed_data)

    # join together
    df_training = df_summary.join(df_windowed)
    return(df_training)


In [None]:
training_data = get_training_data(raw_data, window=100, step=50, in_threshold=0.5)
training_data.head

In [None]:
fig, ax = plt.subplots(figsize=(10, 15))
ax.set_aspect(1)

plt.scatter(training_data["X"], training_data["Y"], c=training_data["Target"], alpha=0.5)
plt.show()

## apply that to lines!

In [None]:
line_data = raw_data.groupby('Line').apply(lambda x: get_training_data(x, window=100, step=50, in_threshold=0.5))

In [None]:
fig, ax = plt.subplots(figsize=(10, 15))
ax.set_aspect(1)

plt.scatter(line_data["X"], line_data["Y"], c=line_data["Target"], alpha=0.5)
plt.show()

## everything

In [None]:

def make_data(raw_data_file, window, step, in_threshold):
    raw_data = pd.read_csv(raw_data_file)
    line_data = raw_data.groupby('Line').apply(lambda x: get_training_data(x, window=window, step=step, in_threshold=in_threshold))
    line_data.to_csv("{}_{}_{}_{}.csv".format(raw_data_file, window, step, in_threshold))

    fig, ax = plt.subplots(figsize=(10, 15))
    ax.set_aspect(1)
    plt.scatter(line_data["X"], line_data["Y"], c=line_data["Target"], alpha=0.5)
    plt.show()



In [None]:
make_data('90deg_50x50_labelled_old.csv', 100, 10, 0.5)