In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt

#### Find Mean Hourly Bluetooth Temperature Sensor Readings and Match with CO2 Flux Data

In [2]:
pd.options.mode.chained_assignment = None

In [25]:
basepath = 'C:/Users/roseh/Desktop/NYBG_R/'
file_location = 'data/processed/QC2/Temp_Sensors'
os.chdir(os.path.join(basepath, file_location))

#### Read in and format temperature sensor data

In [26]:
all_temps = pd.read_csv('all_temp_sensors.csv', parse_dates = ['Date'],na_values= ' ',dtype={'Temp_F': np.float32})
all_temps.dropna(inplace=True)
all_temps['Temp_C'] = (all_temps['Temp_F'] - 32) * (5/9)
all_temps.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 102456 entries, 0 to 102546
Data columns (total 4 columns):
 #   Column  Non-Null Count   Dtype         
---  ------  --------------   -----         
 0   Date    102456 non-null  datetime64[ns]
 1   Temp_F  102456 non-null  float32       
 2   collar  102456 non-null  object        
 3   Temp_C  102456 non-null  float32       
dtypes: datetime64[ns](1), float32(2), object(1)
memory usage: 3.1+ MB


In [27]:
def split_date_columns(df):
    """Split the original Date column into three 'date', 'time', and 'hour' columns
    and also convert the new 'date' column to a string"""
    df['date'] = df['Date'].dt.date
    df['date'] = df['date'].astype(str)
    df['time'] = df['Date'].dt.time
    df['hour'] = df['Date'].dt.hour

In [28]:
split_date_columns(all_temps)    # apply split dates function and display dataframe
all_temps.head()

Unnamed: 0,Date,Temp_F,collar,Temp_C,date,time,hour
0,2022-04-25 13:40:00,65.199997,BE_SV3,18.444443,2022-04-25,13:40:00,13
1,2022-04-25 13:55:00,57.400002,BE_SV3,14.111113,2022-04-25,13:55:00,13
2,2022-04-25 14:10:00,56.240002,BE_SV3,13.466668,2022-04-25,14:10:00,14
3,2022-04-25 14:25:00,56.32,BE_SV3,13.511111,2022-04-25,14:25:00,14
4,2022-04-25 14:40:00,56.400002,BE_SV3,13.555557,2022-04-25,14:40:00,14


In [29]:
all_temps.replace(to_replace=['bw_tp1','BW-HL1'],value=['BW_TP1','BW_HL1'],inplace=True)

In [30]:
all_temps.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 102456 entries, 0 to 102546
Data columns (total 7 columns):
 #   Column  Non-Null Count   Dtype         
---  ------  --------------   -----         
 0   Date    102456 non-null  datetime64[ns]
 1   Temp_F  102456 non-null  float32       
 2   collar  102456 non-null  object        
 3   Temp_C  102456 non-null  float32       
 4   date    102456 non-null  object        
 5   time    102456 non-null  object        
 6   hour    102456 non-null  int64         
dtypes: datetime64[ns](1), float32(2), int64(1), object(3)
memory usage: 5.5+ MB


#### Group dataframe by collar, date, and hour and find mean temperature for each hour

In [31]:
collar_groups = all_temps.groupby(by = ['collar', 'date', 'hour'])    # group temperatures by collar, date, and hour

In [32]:
collar_groups.first()                                           # display grouped dataframe

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Date,Temp_F,Temp_C,time
collar,date,hour,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BE_SV3,2022-04-25,13,2022-04-25 13:40:00,65.199997,18.444443,13:40:00
BE_SV3,2022-04-25,14,2022-04-25 14:10:00,56.240002,13.466668,14:10:00
BE_SV3,2022-04-25,15,2022-04-25 15:10:00,56.630001,13.683334,15:10:00
BE_SV3,2022-04-25,16,2022-04-25 16:10:00,56.779999,13.766666,16:10:00
BE_SV3,2022-04-25,17,2022-04-25 17:10:00,56.470001,13.594446,17:10:00
...,...,...,...,...,...,...
BW_TP3,2022-08-16,7,2022-08-16 07:02:00,75.930000,24.405558,07:02:00
BW_TP3,2022-08-16,8,2022-08-16 08:02:00,75.860001,24.366669,08:02:00
BW_TP3,2022-08-16,9,2022-08-16 09:02:00,76.010002,24.450003,09:02:00
BW_TP3,2022-08-16,10,2022-08-16 10:02:00,77.320000,25.177778,10:02:00


In [33]:
mean_temps = collar_groups['Temp_C'].mean()                                # get the mean temperature for each hour of each day, for each collar

In [49]:
mean_temps['BW_TP3','2022-04-25']                                            # display mean hourly temps for one collar on one date

hour
14    17.844446
15    15.915278
16    15.677778
17    15.473612
18    15.238890
19    14.969445
20    14.691668
21    14.434723
22    14.208334
23    14.058334
Name: Temp_C, dtype: float32

#### Read in and format flux dataframe. Group by collar.

In [35]:
file_location = 'data/processed/QC2'                                     # read in CO2 flux dataframe
os.chdir(os.path.join(basepath, file_location))

df = pd.read_csv('NYBG_CO2_Flux_and_Temps.csv',parse_dates = ['Date'])
df.drop('Unnamed: 0', axis=1,inplace=True)

In [36]:
split_date_columns(df)   # apply split dates function and display dataframe
df.head()

Unnamed: 0,Date,Chamber_Temp,Collar,CO2_Flux,Site,Type,Chamber Used,Temperature,Moisture,date,time,hour
0,2022-05-31 08:13:00,31.95,BW_TP1,33.04108,BW,TP,R,23.1,38.0,2022-05-31,08:13:00,8
1,2022-05-31 08:17:00,31.07364,BW_TP2,21.48947,BW,TP,R,23.4,38.0,2022-05-31,08:17:00,8
2,2022-05-31 08:22:00,32.255,BW_TP3,15.38946,BW,TP,R,23.6,51.1,2022-05-31,08:22:00,8
3,2022-05-31 08:55:00,30.45091,BR_HL3,22.07139,BR,HL,R,22.3,47.1,2022-05-31,08:55:00,8
4,2022-05-31 09:49:00,33.74273,BW_HL1,24.25844,BW,HL,R,23.0,32.3,2022-05-31,09:49:00,9


In [37]:
collar_list = ['BW_TP1','BW_HL1','BW_TP3','BG_UL3','BT_FE3','BT_FI3','BE_SV3','BE_UL1','BR_HL3','BL_ML1','BT_FI1','BT_FE2']  # select only collars that have temp sensor data
df = df.loc[df['Collar'].isin(collar_list)]

In [38]:
df_collar_groups = df.groupby('Collar')        # group flux dataframe by collar

#### Loop through grouped flux dataframe and match each flux reading to mean temperature based on collar, date, and hour

In [39]:
group_list = []

for name, group in df_collar_groups:
            group['sensor_mean'] = group.apply(lambda row: mean_temps[name,row['date'],row['hour']] if row['date'] in mean_temps[name].index.get_level_values('date') else np.NaN, axis=1)
            group_list.append(group) 

#### Concatenate dataframes into on single df and export to csv file

In [43]:
combined_df = pd.concat(group_list)

In [45]:
combined_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 95 entries, 10 to 170
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Date          95 non-null     datetime64[ns]
 1   Chamber_Temp  95 non-null     float64       
 2   Collar        95 non-null     object        
 3   CO2_Flux      95 non-null     float64       
 4   Site          95 non-null     object        
 5   Type          95 non-null     object        
 6   Chamber Used  95 non-null     object        
 7   Temperature   93 non-null     float64       
 8   Moisture      93 non-null     float64       
 9   date          95 non-null     object        
 10  time          95 non-null     object        
 11  hour          95 non-null     int64         
 12  sensor_mean   62 non-null     float64       
dtypes: datetime64[ns](1), float64(5), int64(1), object(6)
memory usage: 10.4+ KB


In [47]:
file_location = 'data/processed/QC2/Temp_Sensors'
combined_df.to_csv(os.path.join(basepath,file_location,'temp_sensor_means.csv'))