In [1]:
import pandas as pd

In [2]:
efficiency_df = pd.read_csv('PLANT TON_EFFICIENCY/separated_24_08.csv')
temperature_df = pd.read_csv('TEMPERATURE/separated_24_08.csv')

In [3]:
columns_to_drop = ['CH1', 'CH2', 'CH3', 'CH4']
efficiency_df.drop(columns=columns_to_drop, inplace=True)

In [4]:
efficiency_df.columns

Index(['Time', 'RT', 'kW_Tot', 'kW_RT', 'CH Load', 'kW_CHH', 'kW_CHP',
       'kW_CHS', 'kW_CDS', 'kW_CT', 'GPM', 'DeltaCHW', 'CHWS', 'CHWR',
       'DeltaCDW', 'CDHI', 'CDLO', 'WBT', 'DeltaCT', 'Hz_ CHP', 'Hz_CHS',
       'Hz_CDS', 'Hz_CT', 'Precent_CH', 'Precent_ CHP', 'Precent_CDS',
       'Precent_CT', 'year', 'month', 'day', 'hour', 'minute', 'second'],
      dtype='object')

In [5]:
temperature_df.columns

Index(['DateTime', 'RH [%]', 'Temperature [°C]', 'WBT_C', 'Time', 'year',
       'month', 'day', 'hour', 'minute', 'second'],
      dtype='object')

In [6]:
efficiency_df.head()

Unnamed: 0,Time,RT,kW_Tot,kW_RT,CH Load,kW_CHH,kW_CHP,kW_CHS,kW_CDS,kW_CT,...,Precent_CH,Precent_ CHP,Precent_CDS,Precent_CT,year,month,day,hour,minute,second
0,2024-08-01 00:00:00,162.0,188.2,1.162,36.4,123.6,19.0,0,27.2,18.5,...,65.6,10.1,14.4,9.8,2024,8,1,0,0,0
1,2024-08-01 00:10:00,146.8,181.8,1.238,34.6,117.3,19.1,0,27.2,18.2,...,64.6,10.5,14.9,10.0,2024,8,1,0,10,0
2,2024-08-01 00:20:00,181.1,174.8,0.965,32.4,110.2,19.0,0,27.1,18.4,...,63.0,10.9,15.5,10.5,2024,8,1,0,20,0
3,2024-08-01 00:30:00,156.3,173.9,1.113,32.2,109.3,19.0,0,27.1,18.4,...,62.9,10.9,15.6,10.6,2024,8,1,0,30,0
4,2024-08-01 00:40:00,165.3,185.1,1.12,35.5,120.6,19.0,0,27.0,18.5,...,65.2,10.2,14.6,10.0,2024,8,1,0,40,0


In [7]:
temperature_df.head()

Unnamed: 0,DateTime,RH [%],Temperature [°C],WBT_C,Time,year,month,day,hour,minute,second
0,2024-08-01 00:01:26.000,80.2,30.1,27.3,2024-08-01 00:01:26,2024,8,1,0,1,26
1,2024-08-01 00:11:26.000,80.3,30.1,27.2,2024-08-01 00:11:26,2024,8,1,0,11,26
2,2024-08-01 00:21:26.000,80.4,30.0,27.2,2024-08-01 00:21:26,2024,8,1,0,21,26
3,2024-08-01 00:31:26.000,79.7,30.0,27.1,2024-08-01 00:31:26,2024,8,1,0,31,26
4,2024-08-01 00:41:27.000,78.5,29.9,26.8,2024-08-01 00:41:27,2024,8,1,0,41,27


In [8]:
efficiency_df.shape

(4455, 33)

In [9]:
temperature_df.shape

(4489, 11)

In [10]:
# Efficiency Data
efficiency_df['Time'] = pd.to_datetime(efficiency_df['Time'])

# Temperature Data
temperature_df['DateTime'] = pd.to_datetime(temperature_df['DateTime'])

In [11]:
efficiency_df.sort_values('Time', inplace=True)
temperature_df.sort_values('DateTime', inplace=True)

In [12]:
merged_df = pd.merge_asof(
    efficiency_df,
    temperature_df,
    left_on='Time',
    right_on='DateTime',
    direction='nearest',
    tolerance=pd.Timedelta('6 minutes')  # Adjust as needed
)

In [13]:
merged_df.shape

(4455, 44)

In [14]:
# Assuming your merged dataframe is called 'merged_df'

# List of temperature columns from df_temperature
temperature_columns = ['Temperature [°C]', 'RH [%]', 'WBT_C']

# Identify rows where any of the temperature columns are NaN
unmatched_rows = merged_df[merged_df[temperature_columns].isna().any(axis=1)]

print(f"Number of unmatched rows: {len(unmatched_rows)}")

Number of unmatched rows: 10


In [15]:
# Remove rows with NaN in any of the temperature columns
merged_df_clean = merged_df.dropna(subset=temperature_columns)

print(f"Rows before cleaning: {len(merged_df)}")
print(f"Rows after cleaning: {len(merged_df_clean)}")

Rows before cleaning: 4455
Rows after cleaning: 4445


In [16]:
# Verify that there are no NaNs in the temperature columns
print(merged_df_clean[temperature_columns].isna().sum())

Temperature [°C]    0
RH [%]              0
WBT_C               0
dtype: int64


In [17]:
merged_df_clean.columns

Index(['Time_x', 'RT', 'kW_Tot', 'kW_RT', 'CH Load', 'kW_CHH', 'kW_CHP',
       'kW_CHS', 'kW_CDS', 'kW_CT', 'GPM', 'DeltaCHW', 'CHWS', 'CHWR',
       'DeltaCDW', 'CDHI', 'CDLO', 'WBT', 'DeltaCT', 'Hz_ CHP', 'Hz_CHS',
       'Hz_CDS', 'Hz_CT', 'Precent_CH', 'Precent_ CHP', 'Precent_CDS',
       'Precent_CT', 'year_x', 'month_x', 'day_x', 'hour_x', 'minute_x',
       'second_x', 'DateTime', 'RH [%]', 'Temperature [°C]', 'WBT_C', 'Time_y',
       'year_y', 'month_y', 'day_y', 'hour_y', 'minute_y', 'second_y'],
      dtype='object')

In [18]:
# List of columns to drop
columns_to_drop = [
    'Time_y',           # Duplicate time column from temperature data
    'year_x', 'month_x', 'day_x', 'hour_x', 'minute_x', 'second_x',  # Date components from efficiency data
    'year_y', 'month_y', 'day_y', 'hour_y', 'minute_y', 'second_y'   # Date components from temperature data
]

# Drop the columns
merged_df_clean = merged_df_clean.drop(columns=columns_to_drop)

In [19]:
# Rename 'Time_x' to 'Time'
merged_df_clean.rename(columns={'Time_x': 'Time'}, inplace=True)

In [20]:
merged_df_clean.columns

Index(['Time', 'RT', 'kW_Tot', 'kW_RT', 'CH Load', 'kW_CHH', 'kW_CHP',
       'kW_CHS', 'kW_CDS', 'kW_CT', 'GPM', 'DeltaCHW', 'CHWS', 'CHWR',
       'DeltaCDW', 'CDHI', 'CDLO', 'WBT', 'DeltaCT', 'Hz_ CHP', 'Hz_CHS',
       'Hz_CDS', 'Hz_CT', 'Precent_CH', 'Precent_ CHP', 'Precent_CDS',
       'Precent_CT', 'DateTime', 'RH [%]', 'Temperature [°C]', 'WBT_C'],
      dtype='object')

In [21]:
# Calculate the absolute time difference in seconds
merged_df_clean['Time_Difference'] = (merged_df_clean['Time'] - merged_df_clean['DateTime']).dt.total_seconds().abs()

In [22]:
max_time_diff = merged_df_clean['Time_Difference'].max()
print(f"Maximum Time Difference: {max_time_diff} seconds")

Maximum Time Difference: 280.0 seconds


In [23]:
merged_df_clean.to_csv('cleaned2.csv')