In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

### <b> Constructing Dataframe for Biometric Data </b>

##### <b> Main Biometric Data </b>

In [3]:
# Biometry dataframe
biometric_df = pd.read_csv("data/biom_mod.csv", delimiter=';')
biometric_df["Date"] = pd.to_datetime(biometric_df["Date"], format="%d/%m/%Y") #Convert Date column values to datatime datatype

In [4]:
#Setting column Date as the index
biometric_df.set_index("Date", inplace=True)

In [5]:
# List of all of the days biometry samples were taken
biom_dates_list = biometric_df.index.unique().tolist()
biom_dates_list, len(biom_dates_list)

([Timestamp('2024-09-07 00:00:00'),
  Timestamp('2024-09-11 00:00:00'),
  Timestamp('2024-09-14 00:00:00'),
  Timestamp('2024-09-17 00:00:00'),
  Timestamp('2024-09-20 00:00:00'),
  Timestamp('2024-09-24 00:00:00'),
  Timestamp('2024-09-29 00:00:00'),
  Timestamp('2024-10-03 00:00:00')],
 8)

##### <b> Biometric Labels </b>

In [6]:
# Labels dataframe
bbch_labels_df = pd.read_excel("raw_data/biom_to_BBCH.xlsx")

In [7]:
bbch_labels_df.head()

Unnamed: 0,Number,Line,Sample,CODE,2024-09-07 00:00:00,2024-09-11 00:00:00,2024-09-14 00:00:00,2024-09-17 00:00:00,2024-09-20 00:00:00,2024-09-24 00:00:00,2024-09-29 00:00:00,2024-10-03 00:00:00
0,1,1,W,RWGRB5,19,19,19,19,19,37,39,44
1,2,1,W,RWGRA3,19,19,19,19,19,19,19,19
2,3,1,W,RWGRC1,19,19,35,19,19,37,37,39
3,4,1,W,RWGRA2,19,19,19,19,19,19,19,19
4,5,1,W,RWGRB3,19,19,19,19,19,19,19,19


##### <b> Environmental Variables 1 and 2 </b>

In [8]:
# Environmental variables 1
env_vars_1_df = pd.read_excel("raw_data/inside_1.xlsx")

In [9]:
env_vars_1_df.set_index("date", inplace=True)

In [10]:
# List of all the days environment variables 1 were measured
env_1_dates_list = env_vars_1_df.index.unique().tolist()
env_1_dates_list, len(env_1_dates_list)

([Timestamp('2024-08-22 00:00:00'),
  Timestamp('2024-08-23 00:00:00'),
  Timestamp('2024-08-24 00:00:00'),
  Timestamp('2024-08-25 00:00:00'),
  Timestamp('2024-08-26 00:00:00'),
  Timestamp('2024-08-27 00:00:00'),
  Timestamp('2024-08-28 00:00:00'),
  Timestamp('2024-08-29 00:00:00'),
  Timestamp('2024-08-30 00:00:00'),
  Timestamp('2024-08-31 00:00:00'),
  Timestamp('2024-09-01 00:00:00'),
  Timestamp('2024-09-02 00:00:00'),
  Timestamp('2024-09-03 00:00:00'),
  Timestamp('2024-09-04 00:00:00'),
  Timestamp('2024-09-16 00:00:00'),
  Timestamp('2024-09-17 00:00:00'),
  Timestamp('2024-09-18 00:00:00'),
  Timestamp('2024-09-19 00:00:00'),
  Timestamp('2024-09-20 00:00:00'),
  Timestamp('2024-09-21 00:00:00'),
  Timestamp('2024-09-22 00:00:00'),
  Timestamp('2024-09-23 00:00:00'),
  Timestamp('2024-09-24 00:00:00'),
  Timestamp('2024-09-25 00:00:00'),
  Timestamp('2024-09-26 00:00:00'),
  Timestamp('2024-09-27 00:00:00'),
  Timestamp('2024-09-28 00:00:00'),
  Timestamp('2024-09-29 00:0

In [11]:
env_vars_1_df

Unnamed: 0_level_0,timestamp,temperature,humidity,415nm,445nm,480nm,515nm,555nm,590nm,630nm,...,480nm.1,515nm.1,555nm.1,590nm.1,630nm.1,680nm.1,clear.1,nir.1,gain (n),integration (ms)
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-08-22,00:02:04,18.800000,61.400000,0.000000,0.000000,0.000000,0.000000,0.011719,0.031250,0.031250,...,0.0,0.0,3.0,8.0,8.0,2.0,21.0,0.0,10.0,500000.0
2024-08-22,01:02:04,18.300000,64.800000,0.000000,0.000000,0.000000,0.000000,0.000000,0.003906,0.007812,...,0.0,0.0,0.0,1.0,2.0,0.0,9.0,0.0,10.0,500000.0
2024-08-22,02:02:05,17.400000,68.400000,0.000000,0.000000,0.000000,0.000000,0.000000,0.015625,0.011719,...,0.0,0.0,0.0,4.0,3.0,0.0,13.0,0.0,10.0,500000.0
2024-08-22,03:02:06,17.300000,71.900000,0.000000,0.000000,0.000000,0.000000,0.000000,0.019531,0.015625,...,0.0,0.0,0.0,5.0,4.0,0.0,14.0,0.0,10.0,500000.0
2024-08-22,04:02:07,18.000000,74.700000,0.000000,0.000000,0.000000,0.000000,0.000000,0.011719,0.015625,...,0.0,0.0,0.0,3.0,4.0,1.0,14.0,0.0,10.0,500000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-03,11:02:33,21.564848,86.443733,129.572998,189.802811,228.108200,294.968475,338.299774,338.280945,421.844177,...,,,,,,,,,,
2024-10-03,12:02:27,22.493755,84.966934,271.345673,398.767883,482.346466,623.695374,715.089355,717.434998,896.284546,...,,,,,,,,,,
2024-10-03,13:02:27,23.084545,82.951881,268.166809,394.411713,476.071960,614.603455,703.906921,704.613037,879.269714,...,,,,,,,,,,
2024-10-03,14:02:28,22.347782,81.459694,194.670502,287.111176,345.562866,445.066803,505.059418,503.463348,627.850342,...,,,,,,,,,,


In [12]:
# Environmental variables 2
env_vars_2_df = pd.read_excel("raw_data/inside_2.xlsx")

In [13]:
env_vars_2_df.set_index("date", inplace=True)

In [14]:
# List of all the days environment variables 1 were measured
env_2_dates_list = env_vars_2_df.index.unique().tolist()
env_2_dates_list, len(env_2_dates_list)

([Timestamp('2024-08-22 00:00:00'),
  Timestamp('2024-08-23 00:00:00'),
  Timestamp('2024-08-24 00:00:00'),
  Timestamp('2024-08-25 00:00:00'),
  Timestamp('2024-08-26 00:00:00'),
  Timestamp('2024-08-27 00:00:00'),
  Timestamp('2024-08-28 00:00:00'),
  Timestamp('2024-08-29 00:00:00'),
  Timestamp('2024-08-30 00:00:00'),
  Timestamp('2024-08-31 00:00:00'),
  Timestamp('2024-09-01 00:00:00'),
  Timestamp('2024-09-02 00:00:00'),
  Timestamp('2024-09-03 00:00:00'),
  Timestamp('2024-09-04 00:00:00'),
  Timestamp('2024-09-05 00:00:00'),
  Timestamp('2024-09-06 00:00:00'),
  Timestamp('2024-09-07 00:00:00'),
  Timestamp('2024-09-08 00:00:00'),
  Timestamp('2024-09-09 00:00:00'),
  Timestamp('2024-09-10 00:00:00'),
  Timestamp('2024-09-11 00:00:00'),
  Timestamp('2024-09-12 00:00:00'),
  Timestamp('2024-09-13 00:00:00'),
  Timestamp('2024-09-14 00:00:00'),
  Timestamp('2024-09-15 00:00:00'),
  Timestamp('2024-09-16 00:00:00'),
  Timestamp('2024-09-17 00:00:00'),
  Timestamp('2024-09-18 00:0

##### <b> Getting Temperature and Humidity for a given day on env_vars_1 and env_vars_2 dataframes </b>

In [15]:
temp_max = env_vars_2_df["2024-09-07":"2024-09-07"]["temperature"].max()
temp_min = env_vars_2_df["2024-09-07":"2024-09-07"]["temperature"].min()
temp_mean = env_vars_2_df["2024-09-07":"2024-09-07"]["temperature"].mean()
temp_max, temp_min, temp_mean

(28.5, 15.4, 19.395833333333332)

In [16]:
hum_max = env_vars_2_df["2024-09-07":"2024-09-07"]["humidity"].max()
hum_min = env_vars_2_df["2024-09-07":"2024-09-07"]["humidity"].min()
hum_mean = env_vars_2_df["2024-09-07":"2024-09-07"]["humidity"].mean()
hum_max, hum_min, hum_mean

(79.5, 44.1, 64.70416666666667)

In [17]:
#Check which biometry dates are in environment 1 variables list
biom_in_env1_dates = []
for date in biom_dates_list:
    if date in env_1_dates_list:
        biom_in_env1_dates.append(date)
biom_in_env1_dates, len(biom_in_env1_dates)

([Timestamp('2024-09-17 00:00:00'),
  Timestamp('2024-09-20 00:00:00'),
  Timestamp('2024-09-24 00:00:00'),
  Timestamp('2024-09-29 00:00:00'),
  Timestamp('2024-10-03 00:00:00')],
 5)

In [18]:
#Check which biometry dates are in environment 2 variables list
biom_in_env2_dates = []
for date in biom_dates_list:
    if date in env_2_dates_list:
        biom_in_env2_dates.append(date)
biom_in_env2_dates, len(biom_in_env2_dates)

([Timestamp('2024-09-07 00:00:00'),
  Timestamp('2024-09-11 00:00:00'),
  Timestamp('2024-09-14 00:00:00'),
  Timestamp('2024-09-17 00:00:00'),
  Timestamp('2024-09-20 00:00:00'),
  Timestamp('2024-09-24 00:00:00'),
  Timestamp('2024-09-29 00:00:00'),
  Timestamp('2024-10-03 00:00:00')],
 8)

In [19]:
# Adding columns for Temperature
biometric_df["Max. Temp."] = np.nan
biometric_df["Min. Temp."] = np.nan
biometric_df["Mean Temp."] = np.nan

biometric_df.head(3)
    

Unnamed: 0_level_0,Number,Line,Sample,CODE,No leaves,Diameter,Perpendicular,Height,Thickness 1,Thickness 2,Thickness 3,Thickness 4,Thickness 5,Max. Temp.,Min. Temp.,Mean Temp.
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2024-09-07,1,1,W,RWGRB5,10,145,104,95,,,,,,,,
2024-09-07,2,1,W,RWGRA3,10,109,97,98,,,,,,,,
2024-09-07,3,1,W,RWGRC1,13,158,139,94,,,,,,,,


In [20]:
# Adding Temperature data from environmental variables 2 dataframe to biometric dataframe
for date in biom_in_env2_dates:

    temp_max = env_vars_2_df[date:date]["temperature"].max()
    temp_min = env_vars_2_df[date:date]["temperature"].min()
    temp_mean = env_vars_2_df[date:date]["temperature"].mean()

    biometric_df.at[date,"Max. Temp."] = temp_max
    biometric_df.at[date,"Min. Temp."] = temp_min
    biometric_df.at[date,"Mean. Temp."] = temp_mean



In [21]:
# Adding columns to biometric_df for Humidity
biometric_df["Max. Hum."] = np.nan
biometric_df["Min. Hum."] = np.nan
biometric_df["Mean Hum."] = np.nan

In [22]:
biometric_df

Unnamed: 0_level_0,Number,Line,Sample,CODE,No leaves,Diameter,Perpendicular,Height,Thickness 1,Thickness 2,Thickness 3,Thickness 4,Thickness 5,Max. Temp.,Min. Temp.,Mean Temp.,Mean. Temp.,Max. Hum.,Min. Hum.,Mean Hum.
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2024-09-07,1,1,W,RWGRB5,10,145,104,95,,,,,,28.500000,15.400000,,19.395833,,,
2024-09-07,2,1,W,RWGRA3,10,109,97,98,,,,,,28.500000,15.400000,,19.395833,,,
2024-09-07,3,1,W,RWGRC1,13,158,139,94,,,,,,28.500000,15.400000,,19.395833,,,
2024-09-07,4,1,W,RWGRA2,10,121,72,95,,,,,,28.500000,15.400000,,19.395833,,,
2024-09-07,5,1,W,RWGRB3,11,142,105,125,,,,,,28.500000,15.400000,,19.395833,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-03,41,6,N,RNROA3,11,268,245,1015,04,032,027,031,,21.758629,18.687616,,19.879193,,,
2024-10-03,42,6,N,RNROB5,7,301,226,1185,032,026,033,037,,21.758629,18.687616,,19.879193,,,
2024-10-03,43,6,N,RNROA2,11,228,205,1035,04,035,039,032,,21.758629,18.687616,,19.879193,,,
2024-10-03,44,6,N,RNROC4,10,305,262,1155,038,032,033,037,,21.758629,18.687616,,19.879193,,,


In [23]:
# Adding Humidity data from environmental variables 2 dataframe to biometric dataframe
for date in biom_in_env2_dates:

    hum_max = env_vars_2_df[date:date]["humidity"].max()
    hum_min = env_vars_2_df[date:date]["humidity"].min()
    hum_mean = env_vars_2_df[date:date]["humidity"].mean()

    biometric_df.at[date,"Max. Hum."] = hum_max
    biometric_df.at[date,"Min. Hum."] = hum_min
    biometric_df.at[date,"Mean. Hum."] = hum_mean

In [24]:
biometric_df.drop("Mean Hum.", axis=1, inplace=True)

In [25]:
biometric_df.drop("Mean Temp.", axis=1, inplace=True)

In [26]:
biometric_df

Unnamed: 0_level_0,Number,Line,Sample,CODE,No leaves,Diameter,Perpendicular,Height,Thickness 1,Thickness 2,Thickness 3,Thickness 4,Thickness 5,Max. Temp.,Min. Temp.,Mean. Temp.,Max. Hum.,Min. Hum.,Mean. Hum.
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2024-09-07,1,1,W,RWGRB5,10,145,104,95,,,,,,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167
2024-09-07,2,1,W,RWGRA3,10,109,97,98,,,,,,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167
2024-09-07,3,1,W,RWGRC1,13,158,139,94,,,,,,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167
2024-09-07,4,1,W,RWGRA2,10,121,72,95,,,,,,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167
2024-09-07,5,1,W,RWGRB3,11,142,105,125,,,,,,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-03,41,6,N,RNROA3,11,268,245,1015,04,032,027,031,,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222
2024-10-03,42,6,N,RNROB5,7,301,226,1185,032,026,033,037,,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222
2024-10-03,43,6,N,RNROA2,11,228,205,1035,04,035,039,032,,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222
2024-10-03,44,6,N,RNROC4,10,305,262,1155,038,032,033,037,,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222


##### <b> Getting the BBCH labels and adding it to the Biometric dataframe </b>

In [27]:
bbch_labels_df.columns

Index([           'Number',              'Line',            'Sample',
                    'CODE', 2024-09-07 00:00:00, 2024-09-11 00:00:00,
       2024-09-14 00:00:00, 2024-09-17 00:00:00, 2024-09-20 00:00:00,
       2024-09-24 00:00:00, 2024-09-29 00:00:00, 2024-10-03 00:00:00],
      dtype='object')

In [28]:
date_columns = bbch_labels_df.iloc[:,4:]

In [29]:
# Get the labels for each plant on each day into a list
labels = []
for i in range(0, date_columns.shape[0]):
    for j in range(0,date_columns.shape[1]):
        labels.append(date_columns.iat[i,j])

len(labels)

360

In [30]:
# Turn list into biometric_df column
biometric_df["BBCH"] = labels

In [31]:
biometric_df

Unnamed: 0_level_0,Number,Line,Sample,CODE,No leaves,Diameter,Perpendicular,Height,Thickness 1,Thickness 2,Thickness 3,Thickness 4,Thickness 5,Max. Temp.,Min. Temp.,Mean. Temp.,Max. Hum.,Min. Hum.,Mean. Hum.,BBCH
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2024-09-07,1,1,W,RWGRB5,10,145,104,95,,,,,,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,19
2024-09-07,2,1,W,RWGRA3,10,109,97,98,,,,,,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,19
2024-09-07,3,1,W,RWGRC1,13,158,139,94,,,,,,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,19
2024-09-07,4,1,W,RWGRA2,10,121,72,95,,,,,,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,19
2024-09-07,5,1,W,RWGRB3,11,142,105,125,,,,,,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-03,41,6,N,RNROA3,11,268,245,1015,04,032,027,031,,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,19
2024-10-03,42,6,N,RNROB5,7,301,226,1185,032,026,033,037,,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,19
2024-10-03,43,6,N,RNROA2,11,228,205,1035,04,035,039,032,,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,19
2024-10-03,44,6,N,RNROC4,10,305,262,1155,038,032,033,037,,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,19


In [32]:
biometric_df.columns

Index(['Number', 'Line', 'Sample', 'CODE', 'No leaves', 'Diameter',
       'Perpendicular', 'Height', 'Thickness 1', 'Thickness 2', 'Thickness 3',
       'Thickness 4', 'Thickness 5', 'Max. Temp.', 'Min. Temp.', 'Mean. Temp.',
       'Max. Hum.', 'Min. Hum.', 'Mean. Hum.', 'BBCH'],
      dtype='object')

In [33]:
biometric_df.isnull().sum()

Number             0
Line               0
Sample             0
CODE               0
No leaves          0
Diameter           0
Perpendicular      0
Height             0
Thickness 1      184
Thickness 2      184
Thickness 3      184
Thickness 4      315
Thickness 5      355
Max. Temp.         0
Min. Temp.         0
Mean. Temp.        0
Max. Hum.          0
Min. Hum.          0
Mean. Hum.         0
BBCH               0
dtype: int64

In [34]:
biometric_df.nunique()

Number            45
Line               6
Sample             2
CODE              45
No leaves         17
Diameter         159
Perpendicular    163
Height           121
Thickness 1       31
Thickness 2       28
Thickness 3       28
Thickness 4       17
Thickness 5        5
Max. Temp.         8
Min. Temp.         8
Mean. Temp.        8
Max. Hum.          8
Min. Hum.          8
Mean. Hum.         8
BBCH              13
dtype: int64

#### <b> Handle NaN </b>

In [48]:
#Replace Thickness (of leaves) NaN values to 0 
biometric_df.fillna(0, inplace=True)

In [36]:
biometric_df.dtypes

Number             int64
Line               int64
Sample            object
CODE              object
No leaves          int64
Diameter          object
Perpendicular     object
Height            object
Thickness 1       object
Thickness 2       object
Thickness 3       object
Thickness 4       object
Thickness 5       object
Max. Temp.       float64
Min. Temp.       float64
Mean. Temp.      float64
Max. Hum.        float64
Min. Hum.        float64
Mean. Hum.       float64
BBCH               int64
dtype: object

##### <b> Several columns present the decimal separator of numbers as "," but using "." as a decimal separator is prefered </b>


In [42]:
biometric_df["Diameter"] = biometric_df["Diameter"].str.replace(',','.')
biometric_df["Perpendicular"] = biometric_df["Perpendicular"].str.replace(',','.')
biometric_df["Height"] = biometric_df["Height"].str.replace(',','.')

In [44]:
biometric_df["Diameter"] = pd.to_numeric(biometric_df["Diameter"])
biometric_df["Perpendicular"] = pd.to_numeric(biometric_df["Perpendicular"])
biometric_df["Height"] = pd.to_numeric(biometric_df["Height"])


In [45]:
biometric_df.dtypes

Number             int64
Line               int64
Sample            object
CODE              object
No leaves          int64
Diameter         float64
Perpendicular    float64
Height           float64
Thickness 1       object
Thickness 2       object
Thickness 3       object
Thickness 4       object
Thickness 5       object
Max. Temp.       float64
Min. Temp.       float64
Mean. Temp.      float64
Max. Hum.        float64
Min. Hum.        float64
Mean. Hum.       float64
BBCH               int64
dtype: object

In [49]:
biometric_df

Unnamed: 0_level_0,Number,Line,Sample,CODE,No leaves,Diameter,Perpendicular,Height,Thickness 1,Thickness 2,Thickness 3,Thickness 4,Thickness 5,Max. Temp.,Min. Temp.,Mean. Temp.,Max. Hum.,Min. Hum.,Mean. Hum.,BBCH
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2024-09-07,1,1,W,RWGRB5,10,14.5,10.4,9.50,0,0,0,0,0,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,19
2024-09-07,2,1,W,RWGRA3,10,10.9,9.7,9.80,0,0,0,0,0,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,19
2024-09-07,3,1,W,RWGRC1,13,15.8,13.9,9.40,0,0,0,0,0,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,19
2024-09-07,4,1,W,RWGRA2,10,12.1,7.2,9.50,0,0,0,0,0,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,19
2024-09-07,5,1,W,RWGRB3,11,14.2,10.5,12.50,0,0,0,0,0,28.500000,15.400000,19.395833,79.50000,44.100000,64.704167,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-03,41,6,N,RNROA3,11,26.8,24.5,10.15,04,032,027,031,0,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,19
2024-10-03,42,6,N,RNROB5,7,30.1,22.6,11.85,032,026,033,037,0,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,19
2024-10-03,43,6,N,RNROA2,11,22.8,20.5,10.35,04,035,039,032,0,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,19
2024-10-03,44,6,N,RNROC4,10,30.5,26.2,11.55,038,032,033,037,0,21.758629,18.687616,19.879193,87.61628,81.984047,86.349222,19


##### <b> Export dataframe to CSV </b>

In [50]:
biometric_df.to_csv("data/biometric_data.csv")

### <b> Exploratory Data Analysis </b>