In [1]:
#note: temp readings are 20s apart

import pandas as pd
import os
import matplotlib.pyplot as plt
import random
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization, Normalization
import numpy as np

In [3]:
def allocate_data(training_files):
    dfs = {}
    for file in training_files:
        df = pd.read_excel(file, sheet_name="DATA", usecols='C,D', skiprows = 1)
        df.columns = ['temp', 'label']
        df.loc[df.shape[0]] = [None, None]# to ensure that there are breaks between each recorded shower
        dfs[file] = df
    data = pd.concat(list(dfs.values()), ignore_index = True)# concatenating all our data into one big dataframe
    return data

In [10]:
def train_model(data, model_type):
    if model_type == 'Start':
        window_size = 19
    elif model_type == 'End':
        window_size = 25
    #list where the first item is 1 for model_type and 0 for not model_type:
    pts = []

    for i, row in data.iterrows():
        if not pd.isnull(row['label']):
            if row['label'].startswith(model_type):
                pts.append([1] + data.iloc[i-window_size//2:i+window_size//2+1]['temp'].tolist())

    initial_length = len(pts)

    #taking a random sample of indices (without replacement) of size k
    random_indices = random.sample(range(len(data) - window_size), k = len(data) - window_size)

    i = 0
    for _ in range(initial_length):
        # none of the temp data can be null, and the midpoint can't be a pointed labeled with the model type.
        while any(pd.isnull(data.iloc[i:i+window_size]['temp'])) or (
            not pd.isnull(data.iloc[i+window_size//2]['label']) and data.iloc[i+window_size//2]['label'].startswith(model_type)
        ):
            i += 1
        pts.append([0] + data.iloc[i:i+window_size]['temp'].tolist())
        i += 1

    #tensors holding the label, followed by a list of 10 temperatures:
    data = np.asarray(pts)
    np.random.shuffle(data)

    model = Sequential(
        [
            BatchNormalization(),
            Dense(2**4, activation = 'relu', input_shape=(window_size,)),
            Dense(1, activation = 'sigmoid')
        ]
    )

    model.compile(
        loss='mse',
        metrics=['mae'],
        optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
    )

    model.fit(
        data[:,1:],
        data[:,0],
        epochs = int(4*1e5),
        verbose = 0
    )
    return model

In [5]:
def simulate(test_file, training_files):
    data = allocate_data(training_files)    
    start_model = train_model(data, 'Start')
    end_model = train_model(data, 'End')
    test_data = pd.read_excel(test_file, sheet_name="DATA", usecols='C,D', skiprows = 1)
    test_data.columns = ['temp', 'label']
    start_indices = []
    end_indices = []
    total_duration = sim_total_duration = shower_count = sim_shower_count = 0
    sim_start_indices = []
    sin_end_indices = []
    for i in range(test_data.shape[0]):
        if not pd.isnull(test_data['label'][i]):
            if test_data['label'][i].startswith('Start'):
                start_indices.append(i)
            elif test_data['label'][i].startswith('End'):
                end_indices.append(i)
                total_duration += i - start_indices[-1]
                shower_count += 1
    window_sizes = (19, 25)
    models = (start_model, end_model)
    index_lists = (sim_start_indices, sim_end_indices)
    Start = 0
    End = 1
    curr_test = Start
    #last point detected should be an end point, so we must allow enough space to run the end model at all points.
    for i in range(test_data.shape[0] - window_sizes[1] + 1):
        if models[curr_test] == 1
            

SyntaxError: invalid syntax (1698133245.py, line 12)