### Development of Code for Machine Learning Approach to Feedback Loop

In [1]:
import pandas as pd
import random
from datetime import datetime
import json
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_extraction.text import CountVectorizer
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, LSTM, Dense, concatenate
from itertools import product
import ml_utils

#### Data Processing

In [5]:
# Watering data

watering_df = pd.read_csv("../database/watering.csv")
watering_df.head()

# We'll get sunrise time from the forecasts eventually

Unnamed: 0,time,zone,amount (L),sunrise_time
0,2023-11-01 07:00:00,A,174.591886,2023-11-01 07:18:00
1,2023-11-01 07:00:00,B,373.512957,2023-11-01 07:18:00
2,2023-11-02 09:00:00,A,91.95759,2023-11-02 07:19:00
3,2023-11-02 09:00:00,B,314.513729,2023-11-02 07:19:00
4,2023-11-03 09:00:00,A,24.671447,2023-11-03 07:20:00


In [6]:
# Weather 

weather_df = pd.read_csv("../database/weather.csv")
weather_df = pd.concat([weather_df, pd.DataFrame({"zone":["all"]*len(weather_df)})], axis=1)
weather_df.head()

Unnamed: 0,time,temperature (F),humidity (percent),clouds (percent coverage),wind (mph),weather,zone
0,2023-11-01 00:00:00,78.964427,55.503182,6.880237,22.053473,light thunderstorm,all
1,2023-11-01 03:00:00,83.11409,13.687578,12.209624,39.532128,thunderstorm with light drizzle,all
2,2023-11-01 06:00:00,61.864413,84.838494,1.548351,49.83519,shower snow,all
3,2023-11-01 09:00:00,68.893392,29.481363,48.603347,21.712115,squalls,all
4,2023-11-01 12:00:00,82.019913,6.730281,3.760608,20.158317,volcanic ash,all


In [7]:
# Sensor JSON data

sensor_df = pd.read_csv("../database/sensors.csv")
sensor_df.head()

Unnamed: 0,time,zone,moisture,humidity,temperature,ir,vis,uv,ph
0,2023-11-01 00:00:00,A,59.744445,76.773547,79.912747,60.009712,1.126177,27.026376,5.714852
1,2023-11-01 00:00:00,B,80.714948,68.191679,89.990233,51.489772,16.590905,25.349669,5.870008
2,2023-11-01 00:20:00,A,45.375371,55.658841,64.959041,28.768476,40.927938,76.297926,6.982068
3,2023-11-01 00:20:00,B,77.872448,94.707283,76.96679,32.427918,74.772111,69.587675,5.936124
4,2023-11-01 00:40:00,A,96.299986,43.761356,72.922938,78.547684,28.089171,45.714027,6.847


Really ought to check whether the data is valid

In [8]:
# Get "watering" features that occur once per day

def get_watering_feats(watering_df, start_row, end_row):

    # Should check for valid start and end rows

    watering_feats = {}
    for zone in list(sensor_df["zone"].unique()):
        sensor_df_zone = sensor_df[sensor_df["zone"] == zone]
        watering_df_zone = watering_df[watering_df["zone"] == zone]

        # Get amount watered
        watering_dict = {"amount (L)" : list(watering_df_zone["amount (L)"].iloc[start_row:end_row])}

        # Find how many minutes after sunrise the lawn was watered each day
        watering_dict["min_after_sunrise"] = []
        for i in range(start_row, end_row):
            n_seconds = (datetime.strptime(watering_df_zone["time"].iat[i], '%Y-%m-%d %H:%M:%S') - datetime.strptime(watering_df_zone["sunrise_time"].iat[i], '%Y-%m-%d %H:%M:%S')).seconds

            if n_seconds > 6*60*60:
                n_minutes = -1*round((24*60*60 - n_seconds)/60)
            else:
                n_minutes = round(n_seconds/60)
            watering_dict["min_after_sunrise"].append(n_minutes)

        # Find initial soil moisture before watering
        #watering_dict["initial_moisture"]= []
        #sensor_datetimes = {datetime.strptime(sensor_df_zone["time"].iat[i], '%Y-%m-%d %H:%M:%S'):i for i in range(len(sensor_df_zone))}
        #for i in range(start_row, end_row):
        #    water_stamp = datetime.strptime(watering_df_zone["time"].iat[i], '%Y-%m-%d %H:%M:%S')
        #    closest_datetime = min((dt for dt in sensor_datetimes.keys() if dt < water_stamp), key=lambda x: abs(x - water_stamp))
        #    watering_dict["initial_moisture"].append(sensor_df_zone["moisture"].iat[sensor_datetimes[closest_datetime]])
                
        watering_feats_zone = np.array(pd.DataFrame(watering_dict))

        # Make into sequences of 5 days
        watering_feats_zone = np.lib.stride_tricks.sliding_window_view(watering_feats_zone, window_shape=(5,2)).reshape(watering_feats_zone.shape[0]-4, 5, 2)

        watering_feats[zone] = watering_feats_zone

    return watering_feats

watering_feats = get_watering_feats(watering_df, 0, len(watering_df[watering_df["zone"]=="A"]))

print(watering_feats["A"].shape)
watering_feats["A"][0]

(32, 5, 2)


array([[174.59188554, -18.        ],
       [ 91.95758964, 101.        ],
       [ 24.67144692, 100.        ],
       [446.95302756, -81.        ],
       [342.73624852,  98.        ]])

In [208]:
# Transform textual weather descriptions to vectors

def vectorize_weather(weather_df, start_row, end_row, weather_descriptions):

    weather_data = list(weather_df["weather"].iloc[start_row:end_row])
    
    vectorizer = CountVectorizer()
    vectorizer.fit(weather_descriptions)
    weather_vec = vectorizer.transform(weather_data).toarray()

    return weather_vec

In [209]:
# Get weather features

def get_weather_feats(weather_df, start_row, end_row, zones, weather_descriptions_path):

    # First, vectorize weather descriptions:
    
    with open(weather_descriptions_path, 'r') as json_file:
        data = json.load(json_file)
    weather_descriptions = data["weather descriptions"]

    weather_vec = vectorize_weather(weather_df, start_row, end_row, weather_descriptions)
    weather_df_vec = pd.concat((weather_df.iloc[start_row:end_row], pd.DataFrame(weather_vec)), axis=1)

    # Then add a column with dates so we can seperate by day:
    date_dict = {"date": [datetime.strptime(weather_df["time"].iat[irow], '%Y-%m-%d %H:%M:%S').date() for irow in range(start_row, end_row)]}
    weather_df_date = pd.concat((weather_df_vec, pd.DataFrame(date_dict)), axis=1)

    # get relevant columns:
    weather_cols = list(weather_df_date.columns)
    weather_cols.remove("time")
    weather_cols.remove("zone")
    weather_cols.remove("weather")
    weather_cols.remove("date")

    # make sequential by 5 days
    weather_seqs = np.empty((len(weather_df_date["date"].unique())-4, 5, 8, len(weather_cols))) # 8 is 24/3 (how many weather reports per day)
    last_start_date = list(weather_df_date["date"].unique())[-1] - pd.Timedelta(days=4)
    for istartdate, start_date in enumerate(weather_df_date["date"].unique()):
        if start_date <= last_start_date:
            for iday in range(5):
                date = start_date + pd.Timedelta(days=iday)
                weather_seq = np.array(weather_df_date[(weather_df_date["date"] == date)][weather_cols])
                weather_seqs[istartdate][iday] = (weather_seq)

    weather_seqs = weather_seqs.reshape((len(weather_df_date["date"].unique())-4), 40, len(weather_cols))
    weather_feats = {zone:weather_seqs for zone in zones}
    return weather_feats

weather_descriptions_path = '../configs/weather_descriptions.json'
weather_feats = get_weather_feats(weather_df, 0, len(weather_df), list(sensor_df["zone"].unique()), weather_descriptions_path)
print(weather_feats["A"].shape)
weather_feats["A"][0]

(32, 40, 45)


array([[78.96442721, 55.50318163,  6.88023691, ...,  0.        ,
         0.        ,  0.        ],
       [83.11409002, 13.6875778 , 12.20962367, ...,  0.        ,
         0.        ,  1.        ],
       [61.86441271, 84.83849409,  1.54835111, ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [83.60584314, 93.16127315, 77.49225317, ...,  0.        ,
         1.        ,  0.        ],
       [51.47884603, 74.65213528, 73.29906988, ...,  0.        ,
         0.        ,  0.        ],
       [77.60345162, 76.89226588,  7.46263843, ...,  0.        ,
         0.        ,  1.        ]])

In [11]:
# Get target feature -- soil moisture

def get_target_feat(sensor_df, start_row, end_row):

    target_feat = {}

    for zone in sensor_df["zone"].unique():
        sensor_df_zone = sensor_df[sensor_df["zone"] == zone]
        date_dict = {"date": [datetime.strptime(sensor_df_zone["time"].iat[irow], '%Y-%m-%d %H:%M:%S').date() for irow in range(start_row, end_row)]}
        sensor_df_date = pd.concat((pd.DataFrame({"moisture":list(sensor_df_zone["moisture"].iloc[start_row:end_row])}), pd.DataFrame(date_dict)), axis=1)

        # make sequential by 5 days
        moisture_seqs = np.empty((len(sensor_df_date["date"].unique()) - 4, 5, 72)) # 72 is 24*60/20 (how many sensor reports per day)
        last_start_date = list(sensor_df_date["date"].unique())[-1] - pd.Timedelta(days=4)
        for istartdate, start_date in enumerate(sensor_df_date["date"].unique()):
            if start_date <= last_start_date:
                for iday in range(5):
                    date = start_date + pd.Timedelta(days=iday)
                    moisture_seq = np.array(sensor_df_date[(sensor_df_date["date"] == date)]["moisture"])
                    moisture_seqs[istartdate][iday] = moisture_seq

        moisture_seqs = moisture_seqs.reshape((len(sensor_df_date["date"].unique())-4), 360)

        target_feat[zone] = moisture_seqs
    
    return target_feat

target_feat = get_target_feat(sensor_df, 0, len(sensor_df[sensor_df["zone"] == "A"]))

print(target_feat["A"].shape)
target_feat["A"][0]

(32, 360)


array([59.74444546, 45.37537122, 96.29998644, 94.26070161, 88.06739556,
        6.67655026, 38.99165025, 76.89681743, 28.5022862 , 32.67999246,
       73.08445305, 79.55484739, 52.4435267 , 95.95660406, 42.13285175,
       10.21828929, 10.88436812, 70.16525878, 50.24482006, 13.32089197,
       16.1029102 , 69.90439197, 97.20954186, 60.42518701, 44.50924841,
        4.0117354 , 91.2832576 , 81.18504314, 77.52855655, 65.06799267,
       70.07933182,  7.32973486, 93.37538541, 98.75153204,  8.78972615,
       30.43430039,  9.88886082, 92.44762177,  6.62840641, 57.80983389,
       70.90995985,  0.12491934, 42.67396443, 82.11908671, 22.69330372,
       66.00956304, 90.45798008, 74.55901811, 95.57601382, 89.13682687,
        0.83978187, 28.04090653, 81.95357256,  1.7185371 , 27.81083271,
        4.36077628, 22.02189262, 87.47337889, 50.92913202, 68.5450755 ,
       36.18009515, 99.21583296,  1.128964  , 39.81808607, 84.81689614,
       40.70301436, 42.86917691, 91.10921115, 42.68163209, 64.44

### Create and Train LSTM model

In [12]:
def create_training_data(watering_feats, weather_feats, target_feat, zone):

    watering_feats_normalized = MinMaxScaler().fit_transform(watering_feats[zone].reshape(-1, 5*2)).reshape(watering_feats[zone].shape)
    weather_feats_normalized = MinMaxScaler().fit_transform(weather_feats[zone].reshape(-1, 40*45)).reshape(weather_feats[zone].shape)
    target_feat_normalized = MinMaxScaler().fit_transform(target_feat[zone])

    # Split data into training and testing sets
    watering_train, watering_test, weather_train, weather_test, target_train, target_test = train_test_split(
        watering_feats_normalized, weather_feats_normalized,
        target_feat_normalized, test_size=0.2, random_state=42
    )

    return watering_train, watering_test, weather_train, weather_test, target_train, target_test

watering_train, watering_test, weather_train, weather_test, target_train, target_test = create_training_data(watering_feats, weather_feats, target_feat, "A")

In [13]:
def create_model(watering_shape, weather_shape, output_shape):
    watering_input = Input(shape=watering_shape)
    weather_input = Input(shape=weather_shape)

    # LSTM layers for processing sequential watering features
    watering_lstm = LSTM(16, return_sequences=True)(watering_input)

    # LSTM layers for processing sequential weather features
    weather_lstm = LSTM(32, return_sequences=True)(weather_input)
    weather_lstm = LSTM(16, return_sequences=True)(weather_lstm)

    # Flatten and concatenate sequential inputs
    flat_watering_lstm = tf.keras.layers.Flatten()(watering_lstm)
    flat_weather_lstm = tf.keras.layers.Flatten()(weather_lstm)
    concatenated_inputs = concatenate([flat_watering_lstm, flat_weather_lstm])

    # Dense layers for final predictions
    dense1 = Dense(64, activation='relu')(concatenated_inputs)
    output = Dense(output_shape, activation='relu')(dense1)

    # Create the model
    model = Model(inputs=[watering_input, weather_input], outputs=output)

    return model

In [14]:
zone = "A" # arbitrary, since this is supposed to represent original training data

watering_train, watering_test, weather_train, weather_test, target_train, target_test = create_training_data(watering_feats, weather_feats, target_feat, zone)

model = create_model((5,2), (40,45), (360))

model.compile(optimizer='adam', loss='mean_squared_error')

# Train Model
model.fit([watering_train, weather_train], target_train, epochs=10, batch_size=32, validation_data=([watering_test, weather_test], target_test))

# Save Model
model.save(f'../models/base_model.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Fine-tuning

When the user sticks it in his or her own lawn, we need to finetune to that lawns behavior.

Suppose now we have another 5 day's worth of data or more:

In [15]:
# Get last input to csvs:

# if this happens at midnight, we want to access the previous day's data
#yesterday_date = pd.to_datetime(pd.Timestamp.now().date()) - pd.Timedelta(days = 1)
# except we can't do that haha bc I don't have time for rn
#yesterday_date = pd.to_datetime(datetime.strptime(watering_df["time"].iat[-1], '%Y-%m-%d %H:%M:%S').date()) - pd.Timedelta(days = 1)

user_watering_feats = get_watering_feats(watering_df, 0, len(watering_df[watering_df["zone"]=="A"]))
user_weather_feats = get_weather_feats(weather_df, 0, len(weather_df), list(sensor_df["zone"].unique()))
user_target_feat = get_target_feat(sensor_df, 0, len(sensor_df[sensor_df["zone"] == "A"]))

# Get Pretrained model

from keras.models import load_model

pre_trained_model = load_model('../models/base_model.h5')
pre_trained_model.compile(optimizer='adam', loss='mean_squared_error')  # Same as above

# Finetune for each zone

for zone in sensor_df["zone"].unique():

    watering_train, watering_test, weather_train, weather_test, target_train, target_test = create_training_data(watering_feats, weather_feats, target_feat, zone)

    # Train Model
    model.fit([watering_train, weather_train], target_train, epochs=10, batch_size=32, validation_data=([watering_test, weather_test], target_test))

    # Save Model
    model.save(f'../models/{zone}_model.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Prediction

In [17]:
A_model = load_model('../models/A_model.h5')
A_model.compile(optimizer='adam', loss='mean_squared_error')  # Same as above

In [192]:
# Get weather forecast

with open('../database/forecast.json', 'r') as json_file:
        data = json.load(json_file)["list"]

forecast_dict = {column:[] for column in weather_df.columns}

for i in range(len(data)):

        forecast_dict["time"].append(datetime.utcfromtimestamp(data[i]["dt"]).strftime('%Y-%m-%d %H:%M:%S'))
        forecast_dict["temperature (F)"].append(data[i]["main"]["temp"])
        forecast_dict["humidity (percent)"].append(data[i]["main"]["humidity"])
        forecast_dict["clouds (percent coverage)"].append(data[i]["clouds"]["all"])
        forecast_dict["wind (mph)"].append(data[i]["wind"]["speed"])
        forecast_dict["weather"].append(data[i]["weather"][0]["description"])
        forecast_dict["zone"].append("all")

weather_forecast = pd.DataFrame(forecast_dict)
        

In [6]:
weather_forecast = weather_df

weather_forecast_feats = get_weather_feats(weather_forecast, 0, len(weather_forecast), list(sensor_df["zone"].unique()), weather_descriptions_path)["A"][0]

NameError: name 'weather_df' is not defined

In [7]:
# Get optimal moisture sequence
# from a file I guess. but for now

optimal_sequence = np.random.uniform(0, 100, 360)

# guesses for good starts to these:
initial_amount_watered = [0, 200, 0, 200, 0]
initial_time_of_watering = [0, 0, 0, 0, 0]

In [13]:
# Get range of possible amounts and times to test

# Define the range of values for water_amount and water_time
amount_range = np.linspace(100, 300, 20)
time_range = np.linspace(-120, 180, 20)

# Create a grid of all possible combinations of water_amount and water_time
water_amount, water_time = np.meshgrid(amount_range, time_range)

# Flatten the grids to get 1D arrays
water_amount_combos = water_amount.flatten()
water_time_combos= water_time.flatten()

# Those are all the possible combinations of amounts and times, but now we need to account for sequences of 5 days where any day could either have a watering event or not

# Generate all possible combinations
combinations = list(product([0,1], repeat=5))

# Convert each combination to a string
binary_arrays = np.array([[combo[x] for x in range(len(combo))] for combo in combinations])

N_val_combos = len(water_amount_combos)
N_event_combos = binary_arrays.shape[0]
N_days = binary_arrays.shape[1]

water_amount = np.empty((N_val_combos, N_event_combos, N_days))
water_time = np.empty((N_val_combos, N_event_combos, N_days))
for i in range(N_val_combos):
    water_amount[i] = binary_arrays*water_amount_combos[i]
    water_time[i] = binary_arrays*water_time_combos[i]

Nsamples = N_val_combos*N_event_combos
water_amount = water_amount.reshape(Nsamples, N_days)
water_time = water_time.reshape(Nsamples, N_days)

NameError: name 'product' is not defined

In [162]:
from scipy import spatial

def calculate_cosine_similarity(x, y):
    
    # Ensure length of x and y are the same
    if len(x) != len(y) :
        return None
    
    return 1 - spatial.distance.cosine(x, y)

In [170]:
scaler_water_amount = MinMaxScaler()
scaler_water_time = MinMaxScaler()
scaler_weather = MinMaxScaler()
scaler_target = MinMaxScaler()

# Normalize data
water_amount_normalized = scaler_water_amount.fit_transform(water_amount.reshape(-1,N_days)).reshape(water_amount.shape)
water_time_normalized = scaler_water_time.fit_transform(water_time.reshape(-1,N_days)).reshape(water_time.shape)
watering_feats_normalized = np.array([[[water_amount_normalized[x][y],water_time_normalized[x][y]] for y in range(N_days)] for x in range(Nsamples)])
weather_feats_normalized = scaler_weather.fit_transform(weather_forecast_feats.reshape(-1, 40*45)).reshape(weather_forecast_feats.shape)

# Define the optimization objective function
def objective_function(params, model, scaler_water_amount, scaler_water_time):

    # Transform back into matrix shape
    params = params.reshape(N_days, 2)
    
    # Inverse transform parameters
    amount_watered = scaler_water_amount.inverse_transform([[params[iday][0] for iday in range(N_days)]])
    time_of_watering = scaler_water_time.inverse_transform([[params[iday][1] for iday in range(N_days)]])

    # Create input for the model
    watering_input = np.array([[[amount_watered[0][iday], time_of_watering[0][iday]] for iday in range(N_days)]])
    weather_input = np.array([weather_feats_normalized])

    # Use the base model to predict soil moisture
    input_data = [watering_input, weather_input]
    predicted_sequence = model.predict(input_data)
    predicted_sequence = predicted_sequence.reshape(predicted_sequence.shape[1])

    # Calculate cosine similarity
    similarity = calculate_cosine_similarity(predicted_sequence, optimal_sequence)

    return -similarity  # Minimize the negative similarity to maximize actual similarity

# Initial parameter values
initial_params = np.array([[scaler_water_amount.transform(np.array([initial_amount_watered]))[0][i],
           scaler_water_time.transform(np.array([initial_time_of_watering]))[0][i]] for i in range(N_days)]).reshape(N_days*2)


# Perform optimization
result = minimize(objective_function, initial_params, args=(A_model, scaler_water_amount, scaler_water_time), method='Nelder-Mead')

# Extract optimal parameters
optimal_params = result.x.reshape(5,2)

# Extract optimal values
optimal_amount_watered = scaler_water_amount.inverse_transform([[params[iday][0] for iday in range(N_days)]])
optimal_time_of_watering = scaler_water_time.inverse_transform([[params[iday][1] for iday in range(N_days)]])

print(f"Optimal Amount Watered: {optimal_amount_watered}, Optimal Time of Watering: {optimal_time_of_watering}")

Optimal Amount Watered: [[-7.33076063e+00  3.30787088e+02 -2.02859538e+01  3.41478754e+03
  -2.05803272e+00]], Optimal Time of Watering: [[  22.47822968  -25.03892653   50.31389998 -230.58649922    5.75857782]]


### Set watering schedule for the next day

In [203]:
# First, need sunrise time

with open('../database/forecast.json', 'r') as json_file:
    data = json.load(json_file)

# assume today is the first day
sunrise = data["city"]["sunrise"]
timezone = data["city"]["timezone"] # shift in seconds from UTC

sunrise_dt = datetime.utcfromtimestamp(sunrise+timezone)

# Calculate optimal watering time
optimal_watering_time = sunrise_dt + pd.Timedelta(minutes=round(optimal_time_of_watering[0][0]))

# Get amount:
optimal_watering_amount = optimal_amount_watered[0][0]

# do this for each zone

In [90]:
# suppose we do some magic and come up with our ideal watering schedule for the next day

watering_schedules = {zone:[random.uniform(0,500) for _ in range(24)] for zone in zones}

In [95]:
# then we want to add scheduled background tasks
# we'll save it in a file

watering_schedules_dict ={"date":datetime.now().strftime('%Y-%m-%d'), "zones":[{"zone":zone, "watering_schedule": watering_schedules[zone]} for zone in zones]}

with open("../database/watering_schedule.json", 'w') as json_file:
    json.dump(watering_schedules_dict, json_file, indent=4)