## 1. Import Necessary Libraries

In [5]:
import pandas as pd
from functools import reduce

## 2. Load Datasets
Specify the file paths to your CSV files and load them into DataFrames.


In [None]:
# File paths (update these paths as necessary)
manual_sensors_path = 'path/to/manual_sensors.csv'
weather_data_path = 'data\cleaned_data\cleaned_weather_data.csv'

# Load CSV files into DataFrames
manual_sensors = pd.read_csv(manual_sensors_path)
weather_data = pd.read_csv(weather_data_path)


## 3. Data Preprocessing
Ensure that the timestamp columns are in datetime format and set them as the index.

In [None]:
def prepare_data(df, timestamp_column='timestamp'):
    """
    Prepare DataFrame by converting timestamp column to datetime and setting it as the index.
    Resample to hourly frequency and handle missing values.
    """
    df[timestamp_column] = pd.to_datetime(df[timestamp_column])
    df.set_index(timestamp_column, inplace=True)
    df = df.asfreq('H')  # Set frequency to hourly
    df = df.fillna(method='ffill').fillna(method='bfill')  # Impute missing values
    return df

# Prepare each dataset
manual_sensors = prepare_data(manual_sensors, timestamp_column='timestamp')
weather_data = prepare_data(weather_data, timestamp_column='timestamp')


## 4. Concatenate DataFrames
Combine all datasets based on the timestamp index.

In [None]:

# List of DataFrames to concatenate
data_frames = [manual_sensors, weather_data]

# Concatenate DataFrames on the timestamp index
merged_data = reduce(lambda left, right: pd.concat([left, right], axis=1, join='inner'), data_frames)

# Optional: Handle any remaining missing values
merged_data.fillna(method='ffill', inplace=True)  # Forward fill

# Save the combined dataset to a new CSV
merged_data.to_csv('path/to/merged_sensor_data.csv')


## 5. Review the Combined Data
Check the first few rows of the merged DataFrame to ensure it looks correct.

In [None]:
# Display the first few rows of the merged dataset
merged_data.head()
