In [3]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

### <b> Constructing Dataframe for Biometric Data - 1st Iteration </b>

##### <b> Main Biometric Data </b>

In [4]:
# Biometry dataframe
biometric_df = pd.read_csv("../../data/biom_mod.csv", delimiter=';')
biometric_df["Date"] = pd.to_datetime(biometric_df["Date"], format="%d/%m/%Y") #Convert Date column values to datatime datatype

In [5]:
#Setting column Date as the index
biometric_df.set_index("Date", inplace=True)

In [6]:
# List of all of the days biometry samples were taken
biom_dates_list = biometric_df.index.unique().tolist()
biom_dates_list, len(biom_dates_list)

([Timestamp('2024-09-07 00:00:00'),
  Timestamp('2024-09-11 00:00:00'),
  Timestamp('2024-09-14 00:00:00'),
  Timestamp('2024-09-17 00:00:00'),
  Timestamp('2024-09-20 00:00:00'),
  Timestamp('2024-09-24 00:00:00'),
  Timestamp('2024-09-29 00:00:00'),
  Timestamp('2024-10-03 00:00:00')],
 8)

##### <b> Biometric Labels </b>

In [7]:
# Labels dataframe
bbch_labels_df = pd.read_excel("../../raw_data/biom_to_BBCH.xlsx")

In [8]:
bbch_labels_df[0:45]

Unnamed: 0,Number,Line,Sample,CODE,2024-09-07 00:00:00,2024-09-11 00:00:00,2024-09-14 00:00:00,2024-09-17 00:00:00,2024-09-20 00:00:00,2024-09-24 00:00:00,2024-09-29 00:00:00,2024-10-03 00:00:00
0,1,1,W,RWGRB5,19,19,19,19,19,37,39,44
1,2,1,W,RWGRA3,19,19,19,19,19,19,19,19
2,3,1,W,RWGRC1,19,19,35,19,19,37,37,39
3,4,1,W,RWGRA2,19,19,19,19,19,19,19,19
4,5,1,W,RWGRB3,19,19,19,19,19,19,19,19
5,6,1,W,RWGRA4,19,19,19,19,19,19,18,19
6,7,1,W,RWGRC3,19,19,19,19,19,37,37,39
7,8,1,W,RWGRA5,19,19,19,19,19,19,19,19
8,9,2,W,RWGRC2,19,19,37,37,37,37,39,44
9,10,2,W,RWGRB4,19,19,19,19,19,19,19,19


##### <b> Environmental Variables 1 and 2 </b>

In [9]:
# Environmental variables 1
env_vars_1_df = pd.read_excel("../../raw_data/inside_1.xlsx")

In [10]:
env_vars_1_df.set_index("date", inplace=True)

In [11]:
# List of all the days environment variables 1 were measured
env_1_dates_list = env_vars_1_df.index.unique().tolist()
env_1_dates_list, len(env_1_dates_list)

([Timestamp('2024-08-22 00:00:00'),
  Timestamp('2024-08-23 00:00:00'),
  Timestamp('2024-08-24 00:00:00'),
  Timestamp('2024-08-25 00:00:00'),
  Timestamp('2024-08-26 00:00:00'),
  Timestamp('2024-08-27 00:00:00'),
  Timestamp('2024-08-28 00:00:00'),
  Timestamp('2024-08-29 00:00:00'),
  Timestamp('2024-08-30 00:00:00'),
  Timestamp('2024-08-31 00:00:00'),
  Timestamp('2024-09-01 00:00:00'),
  Timestamp('2024-09-02 00:00:00'),
  Timestamp('2024-09-03 00:00:00'),
  Timestamp('2024-09-04 00:00:00'),
  Timestamp('2024-09-16 00:00:00'),
  Timestamp('2024-09-17 00:00:00'),
  Timestamp('2024-09-18 00:00:00'),
  Timestamp('2024-09-19 00:00:00'),
  Timestamp('2024-09-20 00:00:00'),
  Timestamp('2024-09-21 00:00:00'),
  Timestamp('2024-09-22 00:00:00'),
  Timestamp('2024-09-23 00:00:00'),
  Timestamp('2024-09-24 00:00:00'),
  Timestamp('2024-09-25 00:00:00'),
  Timestamp('2024-09-26 00:00:00'),
  Timestamp('2024-09-27 00:00:00'),
  Timestamp('2024-09-28 00:00:00'),
  Timestamp('2024-09-29 00:0

In [12]:
env_vars_1_df

Unnamed: 0_level_0,timestamp,temperature,humidity,415nm,445nm,480nm,515nm,555nm,590nm,630nm,...,480nm.1,515nm.1,555nm.1,590nm.1,630nm.1,680nm.1,clear.1,nir.1,gain (n),integration (ms)
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-08-22,00:02:04,18.800000,61.400000,0.000000,0.000000,0.000000,0.000000,0.011719,0.031250,0.031250,...,0.0,0.0,3.0,8.0,8.0,2.0,21.0,0.0,10.0,500000.0
2024-08-22,01:02:04,18.300000,64.800000,0.000000,0.000000,0.000000,0.000000,0.000000,0.003906,0.007812,...,0.0,0.0,0.0,1.0,2.0,0.0,9.0,0.0,10.0,500000.0
2024-08-22,02:02:05,17.400000,68.400000,0.000000,0.000000,0.000000,0.000000,0.000000,0.015625,0.011719,...,0.0,0.0,0.0,4.0,3.0,0.0,13.0,0.0,10.0,500000.0
2024-08-22,03:02:06,17.300000,71.900000,0.000000,0.000000,0.000000,0.000000,0.000000,0.019531,0.015625,...,0.0,0.0,0.0,5.0,4.0,0.0,14.0,0.0,10.0,500000.0
2024-08-22,04:02:07,18.000000,74.700000,0.000000,0.000000,0.000000,0.000000,0.000000,0.011719,0.015625,...,0.0,0.0,0.0,3.0,4.0,1.0,14.0,0.0,10.0,500000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-03,11:02:33,21.564848,86.443733,129.572998,189.802811,228.108200,294.968475,338.299774,338.280945,421.844177,...,,,,,,,,,,
2024-10-03,12:02:27,22.493755,84.966934,271.345673,398.767883,482.346466,623.695374,715.089355,717.434998,896.284546,...,,,,,,,,,,
2024-10-03,13:02:27,23.084545,82.951881,268.166809,394.411713,476.071960,614.603455,703.906921,704.613037,879.269714,...,,,,,,,,,,
2024-10-03,14:02:28,22.347782,81.459694,194.670502,287.111176,345.562866,445.066803,505.059418,503.463348,627.850342,...,,,,,,,,,,


In [13]:
env_vars_1_df_temp = env_vars_1_df[['temperature']].copy()

In [14]:
env_vars_1_df_temp['temperature']

date
2024-08-22    18.800000
2024-08-22    18.300000
2024-08-22    17.400000
2024-08-22    17.300000
2024-08-22    18.000000
                ...    
2024-10-03    21.564848
2024-10-03    22.493755
2024-10-03    23.084545
2024-10-03    22.347782
2024-10-03    22.367418
Name: temperature, Length: 692, dtype: float64

In [15]:
env_vars_1_df_temp.count()

temperature    692
dtype: int64

In [16]:
env_vars_1_df_temp.isnull().any()

temperature    False
dtype: bool

#### <b> Get the Average and Standard Deviation for Temperature and Humidity for 7 previous day to any given date </b>

In [17]:
daily_temp_1= env_vars_1_df_temp.resample('D').mean()

In [18]:
daily_temp_1

Unnamed: 0_level_0,temperature
date,Unnamed: 1_level_1
2024-08-22,20.884
2024-08-23,24.775
2024-08-24,22.466667
2024-08-25,24.020833
2024-08-26,24.945833
2024-08-27,20.704167
2024-08-28,20.970833
2024-08-29,21.133333
2024-08-30,22.316667
2024-08-31,20.8375


In [19]:
daily_temp_1['Weekly Average'] = daily_temp_1['temperature'].rolling(window=7).mean().shift(1)

In [20]:
daily_temp_1['Weekly Std Dev'] = daily_temp_1['Weekly Average'].rolling(window=7).std().shift(1)

In [21]:
daily_temp_1

Unnamed: 0_level_0,temperature,Weekly Average,Weekly Std Dev
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-08-22,20.884,,
2024-08-23,24.775,,
2024-08-24,22.466667,,
2024-08-25,24.020833,,
2024-08-26,24.945833,,
2024-08-27,20.704167,,
2024-08-28,20.970833,,
2024-08-29,21.133333,22.681048,
2024-08-30,22.316667,22.716667,
2024-08-31,20.8375,22.365476,


In [22]:
# Environmental variables 2
env_vars_2_df = pd.read_excel("../../raw_data/inside_2.xlsx")

In [23]:
env_vars_2_df.set_index("date", inplace=True)

In [24]:
# List of all the days environment variables 1 were measured
env_2_dates_list = env_vars_2_df.index.unique().tolist()
env_2_dates_list, len(env_2_dates_list)

([Timestamp('2024-08-22 00:00:00'),
  Timestamp('2024-08-23 00:00:00'),
  Timestamp('2024-08-24 00:00:00'),
  Timestamp('2024-08-25 00:00:00'),
  Timestamp('2024-08-26 00:00:00'),
  Timestamp('2024-08-27 00:00:00'),
  Timestamp('2024-08-28 00:00:00'),
  Timestamp('2024-08-29 00:00:00'),
  Timestamp('2024-08-30 00:00:00'),
  Timestamp('2024-08-31 00:00:00'),
  Timestamp('2024-09-01 00:00:00'),
  Timestamp('2024-09-02 00:00:00'),
  Timestamp('2024-09-03 00:00:00'),
  Timestamp('2024-09-04 00:00:00'),
  Timestamp('2024-09-05 00:00:00'),
  Timestamp('2024-09-06 00:00:00'),
  Timestamp('2024-09-07 00:00:00'),
  Timestamp('2024-09-08 00:00:00'),
  Timestamp('2024-09-09 00:00:00'),
  Timestamp('2024-09-10 00:00:00'),
  Timestamp('2024-09-11 00:00:00'),
  Timestamp('2024-09-12 00:00:00'),
  Timestamp('2024-09-13 00:00:00'),
  Timestamp('2024-09-14 00:00:00'),
  Timestamp('2024-09-15 00:00:00'),
  Timestamp('2024-09-16 00:00:00'),
  Timestamp('2024-09-17 00:00:00'),
  Timestamp('2024-09-18 00:0

In [25]:
env_vars_2_df_temp = env_vars_2_df[['temperature']].copy()

In [26]:
daily_temp_2 = env_vars_2_df_temp.resample('D').mean()

In [27]:
daily_temp_2

Unnamed: 0_level_0,temperature
date,Unnamed: 1_level_1
2024-08-22,20.432
2024-08-23,23.733333
2024-08-24,21.679167
2024-08-25,22.520833
2024-08-26,23.533333
2024-08-27,20.925
2024-08-28,20.6875
2024-08-29,20.8625
2024-08-30,21.908333
2024-08-31,20.470833


In [28]:
daily_temp_2['Weekly Average'] = daily_temp_2['temperature'].rolling(window=7).mean().shift(1)

In [29]:
daily_temp_2['Weekly Std Dev'] = daily_temp_2['temperature'].rolling(window=7).std().shift(1)

In [30]:
daily_temp_2

Unnamed: 0_level_0,temperature,Weekly Average,Weekly Std Dev
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-08-22,20.432,,
2024-08-23,23.733333,,
2024-08-24,21.679167,,
2024-08-25,22.520833,,
2024-08-26,23.533333,,
2024-08-27,20.925,,
2024-08-28,20.6875,,
2024-08-29,20.8625,21.930167,1.356007
2024-08-30,21.908333,21.991667,1.284618
2024-08-31,20.470833,21.730952,1.032731


In [31]:
weekly_averages = pd.merge(daily_temp_1, daily_temp_2, how='outer', left_index=True, right_index=True, suffixes=('_env1', '_env2'))

In [32]:
weekly_averages.columns

Index(['temperature_env1', 'Weekly Average_env1', 'Weekly Std Dev_env1',
       'temperature_env2', 'Weekly Average_env2', 'Weekly Std Dev_env2'],
      dtype='object')

In [33]:
weekly_averages

Unnamed: 0_level_0,temperature_env1,Weekly Average_env1,Weekly Std Dev_env1,temperature_env2,Weekly Average_env2,Weekly Std Dev_env2
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-08-22,20.884,,,20.432,,
2024-08-23,24.775,,,23.733333,,
2024-08-24,22.466667,,,21.679167,,
2024-08-25,24.020833,,,22.520833,,
2024-08-26,24.945833,,,23.533333,,
2024-08-27,20.704167,,,20.925,,
2024-08-28,20.970833,,,20.6875,,
2024-08-29,21.133333,22.681048,,20.8625,21.930167,1.356007
2024-08-30,22.316667,22.716667,,21.908333,21.991667,1.284618
2024-08-31,20.8375,22.365476,,20.470833,21.730952,1.032731


In [34]:
def combine_average(row, column_1, column_2):
    if pd.notnull(row[column_1]) and pd.notnull(row[column_2]):
        return np.mean([row[column_1], row[column_2]])
    
    elif pd.notnull(row[column_1]) and pd.isnull(row[column_2]):
        return row[column_1]
    
    elif pd.isnull(row[column_1]) and pd.notnull(row[column_2]):
        return row[column_2]

    else:
        return np.nan
    
def combine_std_dev(row, column_1, column_2):
    if pd.notnull(row[column_1]) and pd.notnull(row[column_2]):
        return np.sqrt(
            (row[column_1]**2 + row[column_2]) / 2)
    
    elif pd.notnull(row[column_1]) and pd.isnull(row[column_2]):
        return row[column_1]
    
    elif pd.isnull(row[column_1]) and pd.notnull(row[column_2]):
        return row[column_2]

    else:
        return np.nan


In [35]:
weekly_averages['Combined Temperature Average'] = weekly_averages.apply(combine_average, axis=1, args=('Weekly Average_env1', 'Weekly Average_env2'))

In [36]:
weekly_averages['Combined Temperature Std Dev'] = weekly_averages.apply(combine_std_dev, axis=1, args=('Weekly Std Dev_env1','Weekly Std Dev_env2'))

In [37]:
weekly_averages

Unnamed: 0_level_0,temperature_env1,Weekly Average_env1,Weekly Std Dev_env1,temperature_env2,Weekly Average_env2,Weekly Std Dev_env2,Combined Temperature Average,Combined Temperature Std Dev
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-08-22,20.884,,,20.432,,,,
2024-08-23,24.775,,,23.733333,,,,
2024-08-24,22.466667,,,21.679167,,,,
2024-08-25,24.020833,,,22.520833,,,,
2024-08-26,24.945833,,,23.533333,,,,
2024-08-27,20.704167,,,20.925,,,,
2024-08-28,20.970833,,,20.6875,,,,
2024-08-29,21.133333,22.681048,,20.8625,21.930167,1.356007,22.305607,1.356007
2024-08-30,22.316667,22.716667,,21.908333,21.991667,1.284618,22.354167,1.284618
2024-08-31,20.8375,22.365476,,20.470833,21.730952,1.032731,22.048214,1.032731


In [38]:
env_vars_1_df_hum = env_vars_1_df[['humidity']].copy()
env_vars_2_df_hum = env_vars_2_df[['humidity']].copy()

daily_hum_1 = env_vars_1_df_hum.resample('D').mean()
daily_hum_2 = env_vars_2_df_hum.resample('D').mean()

daily_hum_1['Humidity Weekly Average'] = daily_hum_1['humidity'].rolling(window=7).mean().shift(1)
daily_hum_1['Humidity Weekly Std Dev'] = daily_hum_1['humidity'].rolling(window=7).std().shift(1)
daily_hum_2['Humidity Weekly Average'] = daily_hum_2['humidity'].rolling(window=7).mean().shift(1)
daily_hum_2['Humidity Weekly Std Dev'] = daily_hum_2['humidity'].rolling(window=7).std().shift(1)

In [39]:
humidity_averages = pd.merge(daily_hum_1, daily_hum_2, how='outer', left_index=True, right_index=True, suffixes=('_env1', '_env2'))

In [40]:
humidity_averages

Unnamed: 0_level_0,humidity_env1,Humidity Weekly Average_env1,Humidity Weekly Std Dev_env1,humidity_env2,Humidity Weekly Average_env2,Humidity Weekly Std Dev_env2
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-08-22,68.716,,,69.26,,
2024-08-23,63.058333,,,64.7125,,
2024-08-24,70.2,,,71.779167,,
2024-08-25,52.558333,,,57.15,,
2024-08-26,51.016667,,,55.5375,,
2024-08-27,67.541667,,,64.9375,,
2024-08-28,68.045833,,,67.679167,,
2024-08-29,73.325,63.019548,7.991967,72.245833,64.436548,6.059541
2024-08-30,66.745833,63.677976,8.698154,65.895833,64.863095,6.541584
2024-08-31,70.958333,64.204762,8.765772,70.870833,65.032143,6.552324


In [41]:
humidity_averages

Unnamed: 0_level_0,humidity_env1,Humidity Weekly Average_env1,Humidity Weekly Std Dev_env1,humidity_env2,Humidity Weekly Average_env2,Humidity Weekly Std Dev_env2
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-08-22,68.716,,,69.26,,
2024-08-23,63.058333,,,64.7125,,
2024-08-24,70.2,,,71.779167,,
2024-08-25,52.558333,,,57.15,,
2024-08-26,51.016667,,,55.5375,,
2024-08-27,67.541667,,,64.9375,,
2024-08-28,68.045833,,,67.679167,,
2024-08-29,73.325,63.019548,7.991967,72.245833,64.436548,6.059541
2024-08-30,66.745833,63.677976,8.698154,65.895833,64.863095,6.541584
2024-08-31,70.958333,64.204762,8.765772,70.870833,65.032143,6.552324


In [42]:
weekly_averages = pd.merge(weekly_averages, humidity_averages, how='outer', left_index=True, right_index=True)

In [43]:
weekly_averages

Unnamed: 0_level_0,temperature_env1,Weekly Average_env1,Weekly Std Dev_env1,temperature_env2,Weekly Average_env2,Weekly Std Dev_env2,Combined Temperature Average,Combined Temperature Std Dev,humidity_env1,Humidity Weekly Average_env1,Humidity Weekly Std Dev_env1,humidity_env2,Humidity Weekly Average_env2,Humidity Weekly Std Dev_env2
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2024-08-22,20.884,,,20.432,,,,,68.716,,,69.26,,
2024-08-23,24.775,,,23.733333,,,,,63.058333,,,64.7125,,
2024-08-24,22.466667,,,21.679167,,,,,70.2,,,71.779167,,
2024-08-25,24.020833,,,22.520833,,,,,52.558333,,,57.15,,
2024-08-26,24.945833,,,23.533333,,,,,51.016667,,,55.5375,,
2024-08-27,20.704167,,,20.925,,,,,67.541667,,,64.9375,,
2024-08-28,20.970833,,,20.6875,,,,,68.045833,,,67.679167,,
2024-08-29,21.133333,22.681048,,20.8625,21.930167,1.356007,22.305607,1.356007,73.325,63.019548,7.991967,72.245833,64.436548,6.059541
2024-08-30,22.316667,22.716667,,21.908333,21.991667,1.284618,22.354167,1.284618,66.745833,63.677976,8.698154,65.895833,64.863095,6.541584
2024-08-31,20.8375,22.365476,,20.470833,21.730952,1.032731,22.048214,1.032731,70.958333,64.204762,8.765772,70.870833,65.032143,6.552324


In [44]:
weekly_averages['Combined Humidity Average'] = weekly_averages.apply(combine_average, axis=1, args=('Humidity Weekly Average_env1', 'Humidity Weekly Average_env2'))

In [45]:
weekly_averages['Combined Humidity Std Dev'] = weekly_averages.apply(combine_std_dev, axis=1, args=('Humidity Weekly Std Dev_env1', 'Humidity Weekly Std Dev_env2'))

In [46]:
weekly_averages

Unnamed: 0_level_0,temperature_env1,Weekly Average_env1,Weekly Std Dev_env1,temperature_env2,Weekly Average_env2,Weekly Std Dev_env2,Combined Temperature Average,Combined Temperature Std Dev,humidity_env1,Humidity Weekly Average_env1,Humidity Weekly Std Dev_env1,humidity_env2,Humidity Weekly Average_env2,Humidity Weekly Std Dev_env2,Combined Humidity Average,Combined Humidity Std Dev
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2024-08-22,20.884,,,20.432,,,,,68.716,,,69.26,,,,
2024-08-23,24.775,,,23.733333,,,,,63.058333,,,64.7125,,,,
2024-08-24,22.466667,,,21.679167,,,,,70.2,,,71.779167,,,,
2024-08-25,24.020833,,,22.520833,,,,,52.558333,,,57.15,,,,
2024-08-26,24.945833,,,23.533333,,,,,51.016667,,,55.5375,,,,
2024-08-27,20.704167,,,20.925,,,,,67.541667,,,64.9375,,,,
2024-08-28,20.970833,,,20.6875,,,,,68.045833,,,67.679167,,,,
2024-08-29,21.133333,22.681048,,20.8625,21.930167,1.356007,22.305607,1.356007,73.325,63.019548,7.991967,72.245833,64.436548,6.059541,63.728048,5.913167
2024-08-30,22.316667,22.716667,,21.908333,21.991667,1.284618,22.354167,1.284618,66.745833,63.677976,8.698154,65.895833,64.863095,6.541584,64.270536,6.410907
2024-08-31,20.8375,22.365476,,20.470833,21.730952,1.032731,22.048214,1.032731,70.958333,64.204762,8.765772,70.870833,65.032143,6.552324,64.618452,6.457209


#### <b> Getting Temperature and Humidity for a given day on env_vars_1 and env_vars_2 dataframes </b>

In [47]:
temp_max = env_vars_2_df["2024-09-07":"2024-09-07"]["temperature"].max()
temp_min = env_vars_2_df["2024-09-07":"2024-09-07"]["temperature"].min()
temp_mean = env_vars_2_df["2024-09-07":"2024-09-07"]["temperature"].mean()
temp_max, temp_min, temp_mean

(28.5, 15.4, 19.395833333333332)

In [48]:
hum_max = env_vars_2_df["2024-09-07":"2024-09-07"]["humidity"].max()
hum_min = env_vars_2_df["2024-09-07":"2024-09-07"]["humidity"].min()
hum_mean = env_vars_2_df["2024-09-07":"2024-09-07"]["humidity"].mean()
hum_max, hum_min, hum_mean

(79.5, 44.1, 64.70416666666667)

In [49]:
#Check which biometry dates are in environment 1 variables list
biom_in_env1_dates = []
for date in biom_dates_list:
    if date in env_1_dates_list:
        biom_in_env1_dates.append(date)
biom_in_env1_dates, len(biom_in_env1_dates)

([Timestamp('2024-09-17 00:00:00'),
  Timestamp('2024-09-20 00:00:00'),
  Timestamp('2024-09-24 00:00:00'),
  Timestamp('2024-09-29 00:00:00'),
  Timestamp('2024-10-03 00:00:00')],
 5)

In [50]:
#Check which biometry dates are in environment 2 variables list
biom_in_env2_dates = []
for date in biom_dates_list:
    if date in env_2_dates_list:
        biom_in_env2_dates.append(date)
biom_in_env2_dates, len(biom_in_env2_dates)

([Timestamp('2024-09-07 00:00:00'),
  Timestamp('2024-09-11 00:00:00'),
  Timestamp('2024-09-14 00:00:00'),
  Timestamp('2024-09-17 00:00:00'),
  Timestamp('2024-09-20 00:00:00'),
  Timestamp('2024-09-24 00:00:00'),
  Timestamp('2024-09-29 00:00:00'),
  Timestamp('2024-10-03 00:00:00')],
 8)

In [51]:
# Adding columns for Temperature
biometric_df["Max. Temp."] = np.nan
biometric_df["Min. Temp."] = np.nan
biometric_df["Mean Temp."] = np.nan

biometric_df.head(3)
    

Unnamed: 0_level_0,Number,Line,Sample,CODE,No leaves,Diameter,Perpendicular,Height,Thickness 1,Thickness 2,Thickness 3,Thickness 4,Thickness 5,Max. Temp.,Min. Temp.,Mean Temp.
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2024-09-07,1,1,W,RWGRB5,10,145,104,95,,,,,,,,
2024-09-07,2,1,W,RWGRA3,10,109,97,98,,,,,,,,
2024-09-07,3,1,W,RWGRC1,13,158,139,94,,,,,,,,


In [52]:
# Adding Temperature data from environmental variables 2 dataframe to biometric dataframe
for date in biom_in_env2_dates:

    temp_max = env_vars_2_df[date:date]["temperature"].max()
    temp_min = env_vars_2_df[date:date]["temperature"].min()
    temp_mean = env_vars_2_df[date:date]["temperature"].mean()

    biometric_df.at[date,"Max. Temp."] = temp_max
    biometric_df.at[date,"Min. Temp."] = temp_min
    biometric_df.at[date,"Mean. Temp."] = temp_mean



In [53]:
# Adding columns to biometric_df for Humidity
biometric_df["Max. Hum."] = np.nan
biometric_df["Min. Hum."] = np.nan
biometric_df["Mean Hum."] = np.nan

In [54]:
biometric_df

Unnamed: 0_level_0,Number,Line,Sample,CODE,No leaves,Diameter,Perpendicular,Height,Thickness 1,Thickness 2,Thickness 3,Thickness 4,Thickness 5,Max. Temp.,Min. Temp.,Mean Temp.,Mean. Temp.,Max. Hum.,Min. Hum.,Mean Hum.
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2024-09-07,1,1,W,RWGRB5,10,145,104,95,,,,,,28.500000,15.400000,,19.395833,,,
2024-09-07,2,1,W,RWGRA3,10,109,97,98,,,,,,28.500000,15.400000,,19.395833,,,
2024-09-07,3,1,W,RWGRC1,13,158,139,94,,,,,,28.500000,15.400000,,19.395833,,,
2024-09-07,4,1,W,RWGRA2,10,121,72,95,,,,,,28.500000,15.400000,,19.395833,,,
2024-09-07,5,1,W,RWGRB3,11,142,105,125,,,,,,28.500000,15.400000,,19.395833,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-03,41,6,N,RNROA3,11,268,245,1015,04,032,027,031,,21.758629,18.687616,,19.879193,,,
2024-10-03,42,6,N,RNROB5,7,301,226,1185,032,026,033,037,,21.758629,18.687616,,19.879193,,,
2024-10-03,43,6,N,RNROA2,11,228,205,1035,04,035,039,032,,21.758629,18.687616,,19.879193,,,
2024-10-03,44,6,N,RNROC4,10,305,262,1155,038,032,033,037,,21.758629,18.687616,,19.879193,,,


In [55]:
biometric_df.drop(columns='Mean Temp.', inplace=True)

In [56]:
# Adding Humidity data from environmental variables 2 dataframe to biometric dataframe
for date in biom_in_env2_dates:

    hum_max = env_vars_2_df[date:date]["humidity"].max()
    hum_min = env_vars_2_df[date:date]["humidity"].min()
    hum_mean = env_vars_2_df[date:date]["humidity"].mean()

    biometric_df.at[date,"Max. Hum."] = hum_max
    biometric_df.at[date,"Min. Hum."] = hum_min
    biometric_df.at[date,"Mean. Hum."] = hum_mean

In [57]:
biometric_df.drop("Mean Hum.", axis=1, inplace=True)

In [58]:
biometric_df

Unnamed: 0_level_0,Number,Line,Sample,CODE,No leaves,Diameter,Perpendicular,Height,Thickness 1,Thickness 2,Thickness 3,Thickness 4,Thickness 5,Max. Temp.,Min. Temp.,Mean. Temp.,Max. Hum.,Min. Hum.,Mean. Hum.
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2024-09-07,1,1,W,RWGRB5,10,145,104,95,,,,,,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167
2024-09-07,2,1,W,RWGRA3,10,109,97,98,,,,,,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167
2024-09-07,3,1,W,RWGRC1,13,158,139,94,,,,,,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167
2024-09-07,4,1,W,RWGRA2,10,121,72,95,,,,,,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167
2024-09-07,5,1,W,RWGRB3,11,142,105,125,,,,,,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-03,41,6,N,RNROA3,11,268,245,1015,04,032,027,031,,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222
2024-10-03,42,6,N,RNROB5,7,301,226,1185,032,026,033,037,,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222
2024-10-03,43,6,N,RNROA2,11,228,205,1035,04,035,039,032,,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222
2024-10-03,44,6,N,RNROC4,10,305,262,1155,038,032,033,037,,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222


#### <b> Merging weekly_averages and biometric_df </b>

In [59]:
final_biometric = pd.merge(biometric_df, weekly_averages[['Combined Temperature Average', 'Combined Temperature Std Dev','Combined Humidity Average', 'Combined Humidity Std Dev']], how='left', left_index=True , right_index=True)

In [60]:
final_biometric

Unnamed: 0,Number,Line,Sample,CODE,No leaves,Diameter,Perpendicular,Height,Thickness 1,Thickness 2,...,Max. Temp.,Min. Temp.,Mean. Temp.,Max. Hum.,Min. Hum.,Mean. Hum.,Combined Temperature Average,Combined Temperature Std Dev,Combined Humidity Average,Combined Humidity Std Dev
2024-09-07,1,1,W,RWGRB5,10,145,104,95,,,...,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846
2024-09-07,2,1,W,RWGRA3,10,109,97,98,,,...,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846
2024-09-07,3,1,W,RWGRC1,13,158,139,94,,,...,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846
2024-09-07,4,1,W,RWGRA2,10,121,72,95,,,...,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846
2024-09-07,5,1,W,RWGRB3,11,142,105,125,,,...,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-03,41,6,N,RNROA3,11,268,245,1015,04,032,...,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231
2024-10-03,42,6,N,RNROB5,7,301,226,1185,032,026,...,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231
2024-10-03,43,6,N,RNROA2,11,228,205,1035,04,035,...,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231
2024-10-03,44,6,N,RNROC4,10,305,262,1155,038,032,...,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231


##### <b> Getting the BBCH labels and adding it to the Biometric dataframe </b>

In [61]:
bbch_labels_df.columns

Index([           'Number',              'Line',            'Sample',
                    'CODE', 2024-09-07 00:00:00, 2024-09-11 00:00:00,
       2024-09-14 00:00:00, 2024-09-17 00:00:00, 2024-09-20 00:00:00,
       2024-09-24 00:00:00, 2024-09-29 00:00:00, 2024-10-03 00:00:00],
      dtype='object')

In [62]:
date_columns = bbch_labels_df.iloc[:,4:]

In [63]:
# Get the labels for each plant on each day into a list
labels = []
for i in range(0, date_columns.shape[0]):
    for j in range(0,date_columns.shape[1]):
        labels.append(date_columns.iat[i,j])

len(labels)

360

In [64]:
# Turn list into biometric_df column
final_biometric["BBCH"] = labels

In [65]:
final_biometric

Unnamed: 0,Number,Line,Sample,CODE,No leaves,Diameter,Perpendicular,Height,Thickness 1,Thickness 2,...,Min. Temp.,Mean. Temp.,Max. Hum.,Min. Hum.,Mean. Hum.,Combined Temperature Average,Combined Temperature Std Dev,Combined Humidity Average,Combined Humidity Std Dev,BBCH
2024-09-07,1,1,W,RWGRB5,10,145,104,95,,,...,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19
2024-09-07,2,1,W,RWGRA3,10,109,97,98,,,...,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19
2024-09-07,3,1,W,RWGRC1,13,158,139,94,,,...,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19
2024-09-07,4,1,W,RWGRA2,10,121,72,95,,,...,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19
2024-09-07,5,1,W,RWGRB3,11,142,105,125,,,...,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-03,41,6,N,RNROA3,11,268,245,1015,04,032,...,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19
2024-10-03,42,6,N,RNROB5,7,301,226,1185,032,026,...,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19
2024-10-03,43,6,N,RNROA2,11,228,205,1035,04,035,...,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19
2024-10-03,44,6,N,RNROC4,10,305,262,1155,038,032,...,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19


In [66]:
final_biometric.columns

Index(['Number', 'Line', 'Sample', 'CODE', 'No leaves', 'Diameter',
       'Perpendicular', 'Height', 'Thickness 1', 'Thickness 2', 'Thickness 3',
       'Thickness 4', 'Thickness 5', 'Max. Temp.', 'Min. Temp.', 'Mean. Temp.',
       'Max. Hum.', 'Min. Hum.', 'Mean. Hum.', 'Combined Temperature Average',
       'Combined Temperature Std Dev', 'Combined Humidity Average',
       'Combined Humidity Std Dev', 'BBCH'],
      dtype='object')

In [67]:
final_biometric.isnull()

Unnamed: 0,Number,Line,Sample,CODE,No leaves,Diameter,Perpendicular,Height,Thickness 1,Thickness 2,...,Min. Temp.,Mean. Temp.,Max. Hum.,Min. Hum.,Mean. Hum.,Combined Temperature Average,Combined Temperature Std Dev,Combined Humidity Average,Combined Humidity Std Dev,BBCH
2024-09-07,False,False,False,False,False,False,False,False,True,True,...,False,False,False,False,False,False,False,False,False,False
2024-09-07,False,False,False,False,False,False,False,False,True,True,...,False,False,False,False,False,False,False,False,False,False
2024-09-07,False,False,False,False,False,False,False,False,True,True,...,False,False,False,False,False,False,False,False,False,False
2024-09-07,False,False,False,False,False,False,False,False,True,True,...,False,False,False,False,False,False,False,False,False,False
2024-09-07,False,False,False,False,False,False,False,False,True,True,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-03,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2024-10-03,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2024-10-03,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2024-10-03,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [68]:
final_biometric.isnull().any()

Number                          False
Line                            False
Sample                          False
CODE                            False
No leaves                       False
Diameter                        False
Perpendicular                   False
Height                          False
Thickness 1                      True
Thickness 2                      True
Thickness 3                      True
Thickness 4                      True
Thickness 5                      True
Max. Temp.                      False
Min. Temp.                      False
Mean. Temp.                     False
Max. Hum.                       False
Min. Hum.                       False
Mean. Hum.                      False
Combined Temperature Average    False
Combined Temperature Std Dev    False
Combined Humidity Average       False
Combined Humidity Std Dev       False
BBCH                            False
dtype: bool

#### <b> Several columns present the decimal separator of numbers as "," but using "." as a decimal separator is prefered </b>


In [69]:
final_biometric["Diameter"] = final_biometric["Diameter"].str.replace(',','.')
final_biometric["Perpendicular"] = final_biometric["Perpendicular"].str.replace(',','.')
final_biometric["Height"] = final_biometric["Height"].str.replace(',','.')

In [70]:
for i in range(1,6):
    column_name = "Thickness %d" %i
    final_biometric[column_name] = biometric_df[column_name].str.replace(',','.')

In [71]:
final_biometric["Diameter"] = pd.to_numeric(final_biometric["Diameter"])
final_biometric["Perpendicular"] = pd.to_numeric(final_biometric["Perpendicular"])
final_biometric["Height"] = pd.to_numeric(final_biometric["Height"])


In [72]:
for i in range(1,6):
    column_name = "Thickness %d" %i
    final_biometric[column_name] = pd.to_numeric(final_biometric[column_name])

#### <b> Average Leaf Thickness </b>

In [73]:
thickness_columns = ['Thickness 1','Thickness 2', 'Thickness 3', 'Thickness 4', 'Thickness 5' ]
final_biometric['Average Leaf Thickness'] = final_biometric[thickness_columns].mean(axis=1)

In [74]:
for i in range(1,6):
    column_name = "Thickness %d" %i
    final_biometric.drop(columns=column_name, inplace=True)

In [75]:
final_biometric

Unnamed: 0,Number,Line,Sample,CODE,No leaves,Diameter,Perpendicular,Height,Max. Temp.,Min. Temp.,Mean. Temp.,Max. Hum.,Min. Hum.,Mean. Hum.,Combined Temperature Average,Combined Temperature Std Dev,Combined Humidity Average,Combined Humidity Std Dev,BBCH,Average Leaf Thickness
2024-09-07,1,1,W,RWGRB5,10,14.5,10.4,9.50,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,
2024-09-07,2,1,W,RWGRA3,10,10.9,9.7,9.80,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,
2024-09-07,3,1,W,RWGRC1,13,15.8,13.9,9.40,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,
2024-09-07,4,1,W,RWGRA2,10,12.1,7.2,9.50,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,
2024-09-07,5,1,W,RWGRB3,11,14.2,10.5,12.50,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-03,41,6,N,RNROA3,11,26.8,24.5,10.15,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3250
2024-10-03,42,6,N,RNROB5,7,30.1,22.6,11.85,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3200
2024-10-03,43,6,N,RNROA2,11,22.8,20.5,10.35,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3650
2024-10-03,44,6,N,RNROC4,10,30.5,26.2,11.55,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3500


#### <b> Rename columns </b>

In [76]:
final_biometric.rename(columns={'Max. Temp.':'Daily Max Temp',
                                'Min. Temp.':'Daily Min Temp',
                                'Mean. Temp.':'Daily Average Temp',
                               'Max. Hum.':'Daily Max Hum',
                                'Min. Hum.':'Daily Min Hum',
                                'Mean. Hum.':'Daily Average Hum',
                                'Combined Temperature Average':'Temperature Average of prior week',
                                'Combined Temperature Std Dev':'Temperature Std Dev of prior week',
                                'Combined Humidity Average':'Humidity Average of prior week',
                                'Combined Humidity Std Dev':'Humidity Std Dev of prior week',
                                'Diameter':'Leaf Diameter',
                                'Perpendicular':'Perpendicular to Leaf Diameter'})

Unnamed: 0,Number,Line,Sample,CODE,No leaves,Leaf Diameter,Perpendicular to Leaf Diameter,Height,Daily Max Temp,Daily Min Temp,Daily Average Temp,Daily Max Hum,Daily Min Hum,Daily Average Hum,Temperature Average of prior week,Temperature Std Dev of prior week,Humidity Average of prior week,Humidity Std Dev of prior week,BBCH,Average Leaf Thickness
2024-09-07,1,1,W,RWGRB5,10,14.5,10.4,9.50,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,
2024-09-07,2,1,W,RWGRA3,10,10.9,9.7,9.80,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,
2024-09-07,3,1,W,RWGRC1,13,15.8,13.9,9.40,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,
2024-09-07,4,1,W,RWGRA2,10,12.1,7.2,9.50,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,
2024-09-07,5,1,W,RWGRB3,11,14.2,10.5,12.50,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-03,41,6,N,RNROA3,11,26.8,24.5,10.15,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3250
2024-10-03,42,6,N,RNROB5,7,30.1,22.6,11.85,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3200
2024-10-03,43,6,N,RNROA2,11,22.8,20.5,10.35,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3650
2024-10-03,44,6,N,RNROC4,10,30.5,26.2,11.55,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3500


#### <b> Handle NaN </b>

In [77]:
#Replace Thickness (of leaves) NaN values to 0 
final_biometric.fillna(0, inplace=True)

In [78]:
final_biometric

Unnamed: 0,Number,Line,Sample,CODE,No leaves,Diameter,Perpendicular,Height,Max. Temp.,Min. Temp.,Mean. Temp.,Max. Hum.,Min. Hum.,Mean. Hum.,Combined Temperature Average,Combined Temperature Std Dev,Combined Humidity Average,Combined Humidity Std Dev,BBCH,Average Leaf Thickness
2024-09-07,1,1,W,RWGRB5,10,14.5,10.4,9.50,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,0.0000
2024-09-07,2,1,W,RWGRA3,10,10.9,9.7,9.80,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,0.0000
2024-09-07,3,1,W,RWGRC1,13,15.8,13.9,9.40,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,0.0000
2024-09-07,4,1,W,RWGRA2,10,12.1,7.2,9.50,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,0.0000
2024-09-07,5,1,W,RWGRB3,11,14.2,10.5,12.50,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-03,41,6,N,RNROA3,11,26.8,24.5,10.15,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3250
2024-10-03,42,6,N,RNROB5,7,30.1,22.6,11.85,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3200
2024-10-03,43,6,N,RNROA2,11,22.8,20.5,10.35,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3650
2024-10-03,44,6,N,RNROC4,10,30.5,26.2,11.55,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3500


#### <b> Drop BBCH </b>

In [79]:
final_biometric.drop(columns='BBCH')

Unnamed: 0,Number,Line,Sample,CODE,No leaves,Diameter,Perpendicular,Height,Max. Temp.,Min. Temp.,Mean. Temp.,Max. Hum.,Min. Hum.,Mean. Hum.,Combined Temperature Average,Combined Temperature Std Dev,Combined Humidity Average,Combined Humidity Std Dev,Average Leaf Thickness
2024-09-07,1,1,W,RWGRB5,10,14.5,10.4,9.50,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,0.0000
2024-09-07,2,1,W,RWGRA3,10,10.9,9.7,9.80,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,0.0000
2024-09-07,3,1,W,RWGRC1,13,15.8,13.9,9.40,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,0.0000
2024-09-07,4,1,W,RWGRA2,10,12.1,7.2,9.50,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,0.0000
2024-09-07,5,1,W,RWGRB3,11,14.2,10.5,12.50,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-03,41,6,N,RNROA3,11,26.8,24.5,10.15,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,0.3250
2024-10-03,42,6,N,RNROB5,7,30.1,22.6,11.85,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,0.3200
2024-10-03,43,6,N,RNROA2,11,22.8,20.5,10.35,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,0.3650
2024-10-03,44,6,N,RNROC4,10,30.5,26.2,11.55,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,0.3500


#### <b> Daily Irrigation </b>

In [80]:
daily_irrigation = pd.read_excel("../raw_data/irrigation.xlsx")

FileNotFoundError: [Errno 2] No such file or directory: '../raw_data/irrigation.xlsx'

In [79]:
daily_irrigation["Sample"] = daily_irrigation["Sample"].str.upper()

In [80]:
daily_irrigation.set_index("Date", inplace=True)

In [81]:
daily_irrigation

Unnamed: 0_level_0,Sample,Quantity (mL)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-08-24,RNGRA1,50.0
2024-08-24,RNGRA2,50.0
2024-08-24,RNGRA3,50.0
2024-08-24,RNGRA4,50.0
2024-08-24,RNGRA5,50.0
...,...,...
2024-10-01,RWGRC1,7.5
2024-10-01,RWGRC2,14.5
2024-10-01,RWGRC3,5.0
2024-10-01,RWGRC4,12.0


##### <b> Join dataframes (daily_irrigation and final_biometric) </b>

In [82]:
final_biometric.reset_index().rename(columns={'index': 'Date'}, inplace=True)

In [83]:
final_biometric.reset_index(inplace=True)

In [84]:
final_biometric.rename(columns={'index': 'Date'}, inplace=True)

In [85]:
final_biometric.head(45)

Unnamed: 0,Date,Number,Line,Sample,CODE,No leaves,Diameter,Perpendicular,Height,Max. Temp.,...,Mean. Temp.,Max. Hum.,Min. Hum.,Mean. Hum.,Combined Temperature Average,Combined Temperature Std Dev,Combined Humidity Average,Combined Humidity Std Dev,BBCH,Average Leaf Thickness
0,2024-09-07,1,1,W,RWGRB5,10,14.5,10.4,9.5,28.5,...,19.395833,79.5,44.1,64.704167,20.911128,1.844903,60.768349,9.135846,19,0.0
1,2024-09-07,2,1,W,RWGRA3,10,10.9,9.7,9.8,28.5,...,19.395833,79.5,44.1,64.704167,20.911128,1.844903,60.768349,9.135846,19,0.0
2,2024-09-07,3,1,W,RWGRC1,13,15.8,13.9,9.4,28.5,...,19.395833,79.5,44.1,64.704167,20.911128,1.844903,60.768349,9.135846,19,0.0
3,2024-09-07,4,1,W,RWGRA2,10,12.1,7.2,9.5,28.5,...,19.395833,79.5,44.1,64.704167,20.911128,1.844903,60.768349,9.135846,19,0.0
4,2024-09-07,5,1,W,RWGRB3,11,14.2,10.5,12.5,28.5,...,19.395833,79.5,44.1,64.704167,20.911128,1.844903,60.768349,9.135846,19,0.0
5,2024-09-07,6,1,W,RWGRA4,10,11.7,9.4,10.1,28.5,...,19.395833,79.5,44.1,64.704167,20.911128,1.844903,60.768349,9.135846,37,0.0
6,2024-09-07,7,1,W,RWGRC3,12,11.1,9.6,11.5,28.5,...,19.395833,79.5,44.1,64.704167,20.911128,1.844903,60.768349,9.135846,39,0.0
7,2024-09-07,8,1,W,RWGRA5,10,11.3,10.8,7.8,28.5,...,19.395833,79.5,44.1,64.704167,20.911128,1.844903,60.768349,9.135846,44,0.0
8,2024-09-07,9,2,W,RWGRC2,12,15.2,12.1,12.3,28.5,...,19.395833,79.5,44.1,64.704167,20.911128,1.844903,60.768349,9.135846,19,0.0
9,2024-09-07,10,2,W,RWGRB4,10,12.5,13.2,14.6,28.5,...,19.395833,79.5,44.1,64.704167,20.911128,1.844903,60.768349,9.135846,19,0.0


In [86]:
daily_irrigation.reset_index(inplace=True)

In [87]:
daily_irrigation.rename(columns={'index': 'Date', 'Sample':'CODE'}, inplace=True)

In [88]:
di = daily_irrigation['CODE'].unique()

In [89]:
len(daily_irrigation['CODE'].unique())

45

In [90]:
fb = final_biometric['CODE'].unique()

In [91]:
arrays_equal = np.array_equal(np.sort(di), np.sort(fb))

In [92]:
print(arrays_equal)

True


In [93]:
print(np.sort(di))

['RNGRA1' 'RNGRA2' 'RNGRA3' 'RNGRA4' 'RNGRA5' 'RNGRB1' 'RNGRB2' 'RNGRB3'
 'RNGRB4' 'RNGRB5' 'RNGRC1' 'RNGRC2' 'RNGRC3' 'RNGRC4' 'RNGRC5' 'RNROA1'
 'RNROA2' 'RNROA3' 'RNROA4' 'RNROA5' 'RNROB1' 'RNROB2' 'RNROB3' 'RNROB4'
 'RNROB5' 'RNROC1' 'RNROC2' 'RNROC3' 'RNROC4' 'RNROC5' 'RWGRA1' 'RWGRA2'
 'RWGRA3' 'RWGRA4' 'RWGRA5' 'RWGRB1' 'RWGRB2' 'RWGRB3' 'RWGRB4' 'RWGRB5'
 'RWGRC1' 'RWGRC2' 'RWGRC3' 'RWGRC4' 'RWGRC5']


In [94]:
print(np.sort(fb))

['RNGRA1' 'RNGRA2' 'RNGRA3' 'RNGRA4' 'RNGRA5' 'RNGRB1' 'RNGRB2' 'RNGRB3'
 'RNGRB4' 'RNGRB5' 'RNGRC1' 'RNGRC2' 'RNGRC3' 'RNGRC4' 'RNGRC5' 'RNROA1'
 'RNROA2' 'RNROA3' 'RNROA4' 'RNROA5' 'RNROB1' 'RNROB2' 'RNROB3' 'RNROB4'
 'RNROB5' 'RNROC1' 'RNROC2' 'RNROC3' 'RNROC4' 'RNROC5' 'RWGRA1' 'RWGRA2'
 'RWGRA3' 'RWGRA4' 'RWGRA5' 'RWGRB1' 'RWGRB2' 'RWGRB3' 'RWGRB4' 'RWGRB5'
 'RWGRC1' 'RWGRC2' 'RWGRC3' 'RWGRC4' 'RWGRC5']


In [95]:
len(final_biometric['CODE'].unique())

45

In [96]:
daily_irrigation.head(45)

Unnamed: 0,Date,CODE,Quantity (mL)
0,2024-08-24,RNGRA1,50.0
1,2024-08-24,RNGRA2,50.0
2,2024-08-24,RNGRA3,50.0
3,2024-08-24,RNGRA4,50.0
4,2024-08-24,RNGRA5,50.0
5,2024-08-24,RNGRB1,50.0
6,2024-08-24,RNGRB2,50.0
7,2024-08-24,RNGRB3,50.0
8,2024-08-24,RNGRB4,50.0
9,2024-08-24,RNGRB5,50.0


In [97]:
merged_biom = pd.merge(final_biometric,
                       daily_irrigation,
                       on=['Date', 'CODE'],
                       how='left')

In [98]:
final_biometric["Date"].unique()

<DatetimeArray>
['2024-09-07 00:00:00', '2024-09-11 00:00:00', '2024-09-14 00:00:00',
 '2024-09-17 00:00:00', '2024-09-20 00:00:00', '2024-09-24 00:00:00',
 '2024-09-29 00:00:00', '2024-10-03 00:00:00']
Length: 8, dtype: datetime64[ns]

In [99]:
final_biometric["Date"].nunique()

8

In [100]:
final_biometric

Unnamed: 0,Date,Number,Line,Sample,CODE,No leaves,Diameter,Perpendicular,Height,Max. Temp.,...,Mean. Temp.,Max. Hum.,Min. Hum.,Mean. Hum.,Combined Temperature Average,Combined Temperature Std Dev,Combined Humidity Average,Combined Humidity Std Dev,BBCH,Average Leaf Thickness
0,2024-09-07,1,1,W,RWGRB5,10,14.5,10.4,9.50,28.500000,...,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,0.0000
1,2024-09-07,2,1,W,RWGRA3,10,10.9,9.7,9.80,28.500000,...,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,0.0000
2,2024-09-07,3,1,W,RWGRC1,13,15.8,13.9,9.40,28.500000,...,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,0.0000
3,2024-09-07,4,1,W,RWGRA2,10,12.1,7.2,9.50,28.500000,...,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,0.0000
4,2024-09-07,5,1,W,RWGRB3,11,14.2,10.5,12.50,28.500000,...,19.395833,79.50000,44.100000,64.704167,20.911128,1.844903,60.768349,9.135846,19,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,2024-10-03,41,6,N,RNROA3,11,26.8,24.5,10.15,21.758629,...,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3250
356,2024-10-03,42,6,N,RNROB5,7,30.1,22.6,11.85,21.758629,...,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3200
357,2024-10-03,43,6,N,RNROA2,11,22.8,20.5,10.35,21.758629,...,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3650
358,2024-10-03,44,6,N,RNROC4,10,30.5,26.2,11.55,21.758629,...,19.879193,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3500


In [101]:
daily_irrigation["Date"].unique()

<DatetimeArray>
['2024-08-24 00:00:00', '2024-08-26 00:00:00', '2024-08-28 00:00:00',
 '2024-08-30 00:00:00', '2024-09-02 00:00:00', '2024-09-04 00:00:00',
 '2024-09-07 00:00:00', '2024-09-09 00:00:00', '2024-09-11 00:00:00',
 '2024-09-12 00:00:00', '2024-09-13 00:00:00', '2024-09-16 00:00:00',
 '2024-09-17 00:00:00', '2024-09-18 00:00:00', '2024-09-20 00:00:00',
 '2024-09-22 00:00:00', '2024-09-21 00:00:00', '2024-09-23 00:00:00',
 '2024-09-25 00:00:00', '2024-09-26 00:00:00', '2024-09-27 00:00:00',
 '2024-09-30 00:00:00', '2024-10-01 00:00:00']
Length: 23, dtype: datetime64[ns]

In [102]:
daily_irrigation["Date"].nunique()

23

In [109]:
merged_biom[89:]

Unnamed: 0,Date,Number,Line,Sample,CODE,No leaves,Diameter,Perpendicular,Height,Max. Temp.,...,Max. Hum.,Min. Hum.,Mean. Hum.,Combined Temperature Average,Combined Temperature Std Dev,Combined Humidity Average,Combined Humidity Std Dev,BBCH,Average Leaf Thickness,Quantity (mL)
89,2024-09-11,45,6,N,RNROB4,8,14.6,14.3,12.10,32.900000,...,60.10000,34.100000,50.670833,20.492857,1.847495,58.488690,5.950718,19,0.0000,64.5
90,2024-09-14,1,1,W,RWGRB5,13,19.1,16.8,10.40,36.100000,...,34.80000,18.300000,27.079167,21.976984,2.458189,56.901065,9.096700,19,0.0000,
91,2024-09-14,2,1,W,RWGRA3,13,17.8,15.1,11.30,36.100000,...,34.80000,18.300000,27.079167,21.976984,2.458189,56.901065,9.096700,37,0.0000,
92,2024-09-14,3,1,W,RWGRC1,17,18.1,18.5,9.70,36.100000,...,34.80000,18.300000,27.079167,21.976984,2.458189,56.901065,9.096700,37,0.0000,
93,2024-09-14,4,1,W,RWGRA2,14,16.2,16.5,10.20,36.100000,...,34.80000,18.300000,27.079167,21.976984,2.458189,56.901065,9.096700,39,0.0000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,2024-10-03,41,6,N,RNROA3,11,26.8,24.5,10.15,21.758629,...,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3250,
356,2024-10-03,42,6,N,RNROB5,7,30.1,22.6,11.85,21.758629,...,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3200,
357,2024-10-03,43,6,N,RNROA2,11,22.8,20.5,10.35,21.758629,...,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3650,
358,2024-10-03,44,6,N,RNROC4,10,30.5,26.2,11.55,21.758629,...,87.61628,81.984047,86.349222,20.409005,0.793840,72.251447,7.783231,19,0.3500,


RO: Read Oak Leaf
GR: Grand Rapid

<b> Fertilização: acrescentar em função da rega com regra de 3 simples </b>

<b> Composição química das plantas: input feature a acrescentar</b>

<b> Peso final das plantas: acrescentar (target variable) </b>

#### <b> Export dataframe to CSV </b>

In [1]:
#biometric_df.to_csv("../data/biometric_data.csv")
#final_biometric.to_csv("../data/final_biometric_data.csv")