# Import and preprocces data

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize as minimize

In [15]:
teams = ["AiCU", "Croperators", "DeepGreens", "iGrow", "Reference(Growers)","Sonoma"]
data = {}
for team in teams:
    GHClim = pd.read_csv(f"data/{team}/Greenhouse_climate.csv") 
    Crop = pd.read_csv(f"data/{team}/CropManagement.csv") 
    irri = pd.read_csv(f"data/{team}/Irrigation.csv") 
    prod = pd.read_csv(f"data/{team}/Production.csv") 
    vip = pd.read_csv(f"data/{team}/vip.csv") 
    GHClim = GHClim.drop(['VentLee', 'Ventwind', 'AssimLight', 'BlackScr', 'EnScr'], axis=1)
    
    GHClim.fillna(method='ffill', inplace=True)
    Crop.fillna(method='ffill', inplace=True)
    irri.fillna(method='ffill', inplace=True)
    prod.fillna(method='ffill', inplace=True)
    vip.fillna(method='ffill', inplace=True)
    
    # If the last column is NaN
    GHClim.fillna(method='ffill', inplace=True)
    Crop.fillna(method='ffill', inplace=True)
    irri.fillna(method='ffill', inplace=True)
    prod.fillna(method='ffill', inplace=True)
    vip.fillna(method='ffill', inplace=True)
    data[team] = {'GHClim':GHClim, 'Crop':Crop, 'irri':irri, 'prod':prod, 'vip':vip}

In [72]:
data[teams[4]]['prod']

Unnamed: 0,ProdA_cum,ProdA_num,ProdB_cum,ProdB_num,Prod_value_cum,Total_Prod_cum,time
0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,43326
1,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,43327
2,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,43328
3,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,43329
4,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,43330
...,...,...,...,...,...,...,...
111,44.765123,3.278689,1.344877,0.000000,0.0,46.110000,43437
112,45.693115,2.254098,1.354918,0.040984,0.0,47.048033,43438
113,46.098279,0.860656,1.354918,0.000000,0.0,47.453197,43439
114,47.274057,2.745902,1.367992,0.040984,0.0,48.642049,43440


# Weekly average

Like mentioned the GHtime is the timestamp per 5 minutes. Every time it changes with +- 0.0034722, this is $\frac{1}{1440}$ of a day: 5 minutes. So the GHtime is in days. One week is 7 days, so if we would like to take the average of every week, we would need to take the average of time the GHtime, changed by 7. 1 week is 10080 minutes. So 1 week is $\frac{10080}{5}$ timestamps, is 2016 timestamps.

In [78]:
# Create a dictionary with weekly data to create more usuable data
weekly_data = dict()
for team in teams:
    # Add important features from greenhouse climate dataset
    weekly_CO2, weekly_HumDef = [], []
    for i in range(len(data[team]['GHClim']['GHtime'])//2016):
        weekly_HumDef.append(data[team]['GHClim']['HumDef'][i*2016:i*2016+2016].mean())
        weekly_CO2.append(data[team]['GHClim']['CO2air'][i*2016:i*2016+2016].mean())
    
    # Add weekly production value (fruit number multiplied by price) 
    weekly_prodA, weekly_prodB = [], []
    # Timestamp is per day 
    for i in range(len(data[team]['prod']['time'])//7):
        weekly_prodA.append(np.mean(data[team]['prod']['ProdA_num'][i*7:i*7+7]))
        weekly_prodB.append(np.mean(data[team]['prod']['ProdB_num'][i*7:i*7+7]))
    weekly_data[f'{team}'] = {'CO2air':weekly_CO2, 'HumDef':weekly_HumDef, 
                            'prodA':weekly_prodA, 'prodB':weekly_prodB}
    


In [79]:
weekly_data

{'AiCU': {'CO2air': [563.1819761637191,
   573.9791666725539,
   564.8472223518248,
   590.2331349042038,
   618.0205852130524,
   608.2805059619104,
   666.1433531168445,
   684.220734106994,
   570.1187996809566,
   649.1284722515422,
   682.950181963947,
   660.2135415409464,
   643.3301091143031,
   633.6428571554965,
   662.7428075314475,
   691.0773809415875],
  'HumDef': [5.127527308449584,
   4.214275793961963,
   4.653953373027555,
   3.862051091257693,
   3.944933035916518,
   3.36115079433183,
   3.3910069439772954,
   2.455545635073304,
   3.457150298880829,
   2.7370659718981765,
   2.3919596560968013,
   2.5239236115623003,
   2.170012400659526,
   2.8207043654222894,
   2.6951066465633917,
   2.4366517859003842],
  'prodA': [0.0,
   0.0,
   0.0,
   0.32201405152224855,
   1.188524590163934,
   1.3700234192037473,
   1.241217798594849,
   1.0889929742388742,
   0.9016393442622946,
   0.6323185011709596,
   0.5737704918032787,
   0.351288056206089,
   0.10538641686182662,
