# Sensor Data Analysis

This notebook computes differences between sensor 17 and sensor 20 at each timestamp for temperature, humidity, irradiance, and power.


In [None]:
import pandas as pd
import numpy as np

# Read data from both sensor files
sensor_17_file = 'data/sensor_17_combined_data.xlsx'
sensor_20_file = 'data/sensor_20_combined_data.xlsx'

df_17 = pd.read_excel(sensor_17_file)
df_20 = pd.read_excel(sensor_20_file)

print("Sensor 17 data:")
print(f"  Shape: {df_17.shape}")
print(f"  Columns: {df_17.columns.tolist()}")
print(f"  Time range: {df_17['datetime'].min()} to {df_17['datetime'].max()}")
print(f"\nSensor 20 data:")
print(f"  Shape: {df_20.shape}")
print(f"  Columns: {df_20.columns.tolist()}")
print(f"  Time range: {df_20['datetime'].min()} to {df_20['datetime'].max()}")


In [None]:
# Ensure datetime columns are datetime type
df_17['datetime'] = pd.to_datetime(df_17['datetime'])
df_20['datetime'] = pd.to_datetime(df_20['datetime'])

# Merge on datetime to align timestamps
# Use inner join to keep only timestamps present in both sensors
merged = pd.merge(
    df_17[['datetime', 'upper_temp_C', 'upper_humidity', 'upper_irradiance', 'power_W']],
    df_20[['datetime', 'upper_temp_C', 'upper_humidity', 'upper_irradiance', 'power_W']],
    on='datetime',
    how='inner',
    suffixes=('_17', '_20')
)

print(f"Merged data shape: {merged.shape}")
print(f"Common timestamps: {len(merged)}")
print(f"Time range: {merged['datetime'].min()} to {merged['datetime'].max()}")
print(f"\nFirst few rows:")
print(merged.head(10))


In [None]:
# Compute differences (sensor_20 - sensor_17) for each factor
differences = pd.DataFrame()
differences['datetime'] = merged['datetime']

# Calculate differences
differences['temp_diff_C'] = merged['upper_temp_C_20'] - merged['upper_temp_C_17']
differences['humidity_diff_pct'] = merged['upper_humidity_20'] - merged['upper_humidity_17']
differences['irradiance_diff'] = merged['upper_irradiance_20'] - merged['upper_irradiance_17']
differences['power_diff_W'] = merged['power_W_20'] - merged['power_W_17']

# Sort by datetime
differences = differences.sort_values('datetime').reset_index(drop=True)

print("Differences (Sensor 20 - Sensor 17):")
print(f"Data shape: {differences.shape}")
print(f"\nFirst few rows:")
print(differences.head(20))
print(f"\nLast few rows:")
print(differences.tail(20))


In [None]:
# Summary statistics of differences
print("Summary Statistics of Differences (Sensor 20 - Sensor 17):")
print("="*60)
print(differences[['temp_diff_C', 'humidity_diff_pct', 'irradiance_diff', 'power_diff_W']].describe())
print("\nMean differences:")
print(f"  Temperature: {differences['temp_diff_C'].mean():.4f} °C")
print(f"  Humidity: {differences['humidity_diff_pct'].mean():.4f} %")
print(f"  Irradiance: {differences['irradiance_diff'].mean():.4f} μW/cm²")
print(f"  Power: {differences['power_diff_W'].mean():.4f} W")


In [None]:
# Save differences to Excel file
output_file = 'data/sensor_17_20_differences.xlsx'
differences.to_excel(output_file, index=False)

print(f"Differences saved to: {output_file}")
print(f"File contains {len(differences)} rows with differences at each timestamp")
