# Govee Wireless Temperature Sensor Analysis with Python

This page documents my experiments using a pair of [Govee](https://govee.com/) Bluetooth thermometer/hygrometer devices to measure and compare temperature outside vs. in my attic. I can't find the exact product on the Govee website, but a Bluetooth sensor is available on Amazon for $15.

In [None]:
import pandas as pd
import numpy as np
import datetime
import matplotlib
import matplotlib.pyplot as plt

Let's start by creating a function to read Govee export files and return just temperature

In [None]:
def fToC(f):
    """Convert Fahrenheit to Celsius"""
    return f - 32 * 5 / 9.

def readTemperatureData(filename: str):
    """Read a Govee export file and return just temperature columns"""
    name = filename.split(".")[0]
    df = pd.read_csv(filename)
    df[df.columns[0]] = [datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S")
                         for x in df[df.columns[0]].values]
    df.drop(df.columns[2], axis=1, inplace=True)  # humidity column
    df.columns = [f"{name} Timestamp", f"{name} F"]
    return df


Now let's load our data and compare the timestamps

In [None]:
outdoor = readTemperatureData("outdoor.csv")
attic = readTemperatureData("attic.csv")
df = pd.concat([attic, outdoor], axis=1)
print(df)

Excellent! They're already aligned. I tried to activate both sensors at the same time, and it looks like it worked. 

I'll trim the data so it starts and ends at midnight then remove the duplicate timestamp column.

In [None]:
firstIndex = 285 + 60 * 24
lastIndex = -718
df = df.iloc[firstIndex:lastIndex, :]
df.drop(columns="outdoor Timestamp", inplace=True)
df.columns = ["Timestamp", "Attic (F)", "Outdoor (F)"]
print(df)


In [None]:
def twinTemperatureAxes(ax1):
    """Configure a figure to display temperature on both Y axes (F on left, C on right)"""
    ax1.legend()
    ax1.grid(alpha=.5, ls='--')
    ax1.set_ylabel("Temperature (F)", fontsize=16)

    # setup secondary Y axis
    ax2 = ax1.twinx()
    ax2.set_ylabel("Temperature (C)", fontsize=16)
    ymin, ymax = ax1.get_ylim()
    ax2.set_ylim((fToC(ymin), fToC(ymax)))
    ax2.plot([], [])

In [None]:
fig, ax1 = plt.subplots(figsize=(10, 4))
fig.autofmt_xdate()
ax1.plot(df["Timestamp"], df["Outdoor (F)"], label="Outdoor")
ax1.plot(df["Timestamp"], df["Attic (F)"], label="Attic")
twinTemperatureAxes(ax1)
fig.tight_layout()


In [None]:
maxOutdoorF = max(df["Outdoor (F)"])
maxAtticF = max(df["Attic (F)"])
print(f"Maximum measured outdoor temperature: {maxOutdoorF} F ({fToC(maxOutdoorF):.01f} C)")
print(f"Maximum measured attic temperature: {maxAtticF} F ({fToC(maxAtticF):.01f} C)")

Let's rearrange the time series data by segmenting it into days and overlapping it so we can get an impression of the typical day

In [None]:
xs = np.arange(60*24) / 60  # hours

fig, ax1 = plt.subplots(figsize=(6, 4))

days = df.groupby(df["Timestamp"].dt.date)
for i, dailyTemperatures in enumerate(days["Outdoor (F)"]):
    plt.plot(xs, dailyTemperatures[1].values, color='C0', alpha=.5)
for i, dailyTemperatures in enumerate(days["Attic (F)"]):
    plt.plot(xs, dailyTemperatures[1].values, color='C1', alpha=.5)

twinTemperatureAxes(ax1)
ax1.grid(alpha=.5, ls='--')
ax1.set_xlabel("Time of Day (Hour)", fontsize=16)

I'm curious to know what the difference is between the outdoor and attic temperatures...

In [None]:
fig, ax1 = plt.subplots(figsize=(6, 4))

days = df.groupby(df["Timestamp"].dt.date)
valuesAttic = np.ndarray((len(days), 60*24))
valuesOutdoor = np.ndarray((len(days), 60*24))
for i, dailyTemperatures in enumerate(days["Attic (F)"]):
    valuesAttic[i] = dailyTemperatures[1].values
for i, dailyTemperatures in enumerate(days["Outdoor (F)"]):
    valuesOutdoor[i] = dailyTemperatures[1].values

diffsByDay = valuesAttic - valuesOutdoor
for i in range(len(days)):
    plt.plot(xs, diffsByDay[i,:], color='C3', alpha=.5)

ax1.grid(alpha=.5, ls='--')
ax1.set_xlabel("Time of Day (Hour)", fontsize=16)
ax1.set_ylabel("Difference (F)")
ax1.grid(alpha=.5, ls='--')
fig.tight_layout()

It appears that the attic maximally exceeds the outdoor temperature near 3PM. Let's take a closer look at the daily maximum difference between the attic and outdoor temperatures...

In [None]:
peakDiffsByDay = np.max(diffsByDay, axis=1)
plt.plot(peakDiffsByDay, '.-', color="C3")
plt.ylabel("Peak Difference (F)")
plt.xlabel("Day (#)")
plt.grid(alpha=.5, ls='--')

meanDiffF = np.mean(peakDiffsByDay)
peakDiffF = np.max(peakDiffsByDay)
print(f"Mean daily peak difference: {meanDiffF:.1f} ({fToC(meanDiffF):.1f} C)")
print(f"Maximum daily peak difference: {peakDiffF:.1f} ({fToC(peakDiffF):.1f} C)")

Let's try to identify days it rained vs. days it didn't based on the mean daily temperature

In [None]:
xs = np.arange(60*24) / 60

for series in df.groupby(df["Timestamp"].dt.date)["Outdoor (F)"]:
    plt.plot(xs, series[1].values, color="C0", alpha=.5)

for series in df.groupby(df["Timestamp"].dt.date)["Attic (F)"]:
    plt.plot(xs, series[1].values, color="C1", alpha=.5)
    
twinTemperatureAxes(plt.gca())
plt.xlabel("Time of Day (Hour)")


In [None]:
def plotMeanByDay(column, label, color):
    days = df.groupby(df["Timestamp"].dt.date)
    values = np.ndarray((len(days), 60*24))
    for i, dailyTemperatures in enumerate(days[column]):
        values[i] = dailyTemperatures[1].values
    mean = np.mean(values, axis=0)
    err = np.std(values, axis=0)
    xs = np.arange(60*24) / 60
    plt.plot(xs, mean, label=label, color=color)
    plt.fill_between(xs, mean - err, mean + err, color=color, alpha=.3)

plotMeanByDay("Outdoor (F)", "Outdoor", "C0")
plotMeanByDay("Attic (F)", "Attic", "C1")
twinTemperatureAxes(plt.gca())
plt.xlabel("Time of Day (Hour)")


Let's plot one against the other to see how tightly correlated they are. I suspect changes in the attic temperature will lag the outdoor temperature due to its large thermal mass resulting in some type of hysteresis.

In [None]:
def plotDailyComparisons():
    """Plot each day's attic vs. outdoor temperature as a light gray line"""
    days = df.groupby(df["Timestamp"].dt.date)
    valuesAttic = np.ndarray((len(days), 60*24))
    valuesOutdoor = np.ndarray((len(days), 60*24))
    for i, dailyTemperatures in enumerate(days["Attic (F)"]):
        valuesAttic[i] = dailyTemperatures[1].values
    for i, dailyTemperatures in enumerate(days["Outdoor (F)"]):
        valuesOutdoor[i] = dailyTemperatures[1].values
    for i in range(len(valuesAttic)):
        plt.plot(valuesOutdoor[i, :], valuesAttic[i, :], color='.7', alpha=.5)


def getMeanValuesByDay(column):
    """Return mean temperature for each minute in a day"""
    days = df.groupby(df["Timestamp"].dt.date)
    values = np.ndarray((len(days), 60*24))
    for i, dailyTemperatures in enumerate(days[column]):
        values[i] = dailyTemperatures[1].values
    return np.mean(values, axis=0)


def plotDailyMeanComparison():
    """Plot the daily mean attic vs. daily mean outdoor temperature"""
    meanOutdoor = getMeanValuesByDay("Outdoor (F)")
    meanAttic = getMeanValuesByDay("Attic (F)")
    plt.plot(meanOutdoor, meanAttic, color='k')


plotDailyComparisons()
plotDailyMeanComparison()
plt.ylabel("Mean Attic Temperature (F)")
plt.xlabel("Mean Outdoor Temperature (F)")
plt.grid(alpha=.5, ls='--')


Let's add color to encode time of day

In [None]:
sp = plt.scatter(
    x=getMeanValuesByDay("Outdoor (F)"),
    y=getMeanValuesByDay("Attic (F)"),
    c=[x/60 for x in range(60*24)])

cb = plt.colorbar()
cb.ax.locator_params(nbins=12)
cb.set_label(label='Time of Day (Hour)')

plt.ylabel("Mean Attic Temperature (F)")
plt.xlabel("Mean Outdoor Temperature (F)")
plt.grid(alpha=.5, ls='--')


Notice each day starts near the lower-left (dark blue) and forms a clockwise rotation each day. The shape is a circle instead of a straight line because the attic temperature always lags the outdoor temperature, especially when the temperature is changing quickly at the upper right near hour 15 (3PM).