In [None]:
import os
import re
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt

## Future plan

I'm going to generate new data that is better for time series analysis. Rather than randomly set usage and see the temperatures, I'm going to get the CPU to max temp while logging, then let it cool down to minimum temp while logging. This data should give a good estimate of how much temperature cases dissipate. For example, a case without a top may drop 1C per second when in cooldown.

I am not sure that I'll use ARIMA. It may not be necessary here.

Below is just some code I've been using to observe data collection schemes.

In [None]:
raw_files = [x for x in os.listdir("./data") if re.match(r"^timeseries_break_1111010011_m80_w60_i600_d60.csv$", x)]

raw_files

raw = pd.concat([pd.read_csv(f"./data/{x}").assign(filename=x) for x in raw_files])

raw

def dummify_filename(filename):
    # The filenames have an underscore
    # Split on _ and keep the second half
    code = filename.split("_")[1]
    # The string will still have .csv at the end
    # Split on . and keep the first half
    code = filename.split(".")[0]
    # Get rid of non-digit characters
    code = re.sub(r"\D", "", code)
    # Break the code into a list of 0/1 integers
    flags = [int(x) for x in list(code)]
    # List the dummy labels
    keys = ["case_under",
            "case_frame",
            "case_cable",
            "case_gpio",
            "top_solid",
            "top_holed",
            "top_intake",
            "top_exhaust",
            "heatsink_main",
            "heatsink_sub"
           ]
    # Output into a dictionary, which pandas can transform into a dataframe
    values = {k:v for k,v in zip(keys, flags)}
    values["filename"] = filename
    return values

# Only get unique filenames
# Feed them into the dummify function
flags = pd.DataFrame([dummify_filename(x) for x in raw.filename.drop_duplicates()])

flags

# The .join() method joins on indexes
df_flagged = raw.merge(flags, on = "filename")

# .drop() the filename, with axis=1 for columns
df_flagged = df_flagged.drop("filename", axis=1)

df_flagged

df_dt = df_flagged

# Convert the datetime column to a datetime dtype
# This works fine since the datetime is already in a standard format
df_dt["datetime"] = pd.to_datetime(df_dt["datetime"])
df_dt["datetime"] = pd.DatetimeIndex(df_dt["datetime"]).round("s")

# Set datetime as the index
df_dt = df_dt.set_index("datetime")

df_dt

In [None]:
plt.plot(df_dt.index, df_dt.temp)
plt.show()

### Make case_under and case_frame mandatory

It's unrealistic to model the frame and the bottom of the case. It's unlikely that you would forego these in real-life. What you're really interested in is whether you have to take off the side panel or which direction to have the fan in.

In [None]:
# Get rid of obs without case_under and case_frame
# We can then assume they're always there
df_filtered = df

df_filtered = (
    df_filtered
    .loc[lambda df: df.case_under == 1]
    .loc[lambda df: df.case_frame == 1]
    .drop(["case_under", "case_frame"], axis = 1)
)

df_filtered