In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
def renameColumns(df):
    return [col.split('-', 1)[1] if '-' in col else col for col in df.columns]

def convertTime(df):
    if df["Time"].dtype == 'int64':
        df["Time"] = pd.to_datetime(df["Time"], unit='ms')    
    else:
        df['Time'] = pd.to_datetime(df['Time'], format='%Y-%m-%d %H:%M:%S')
    df.sort_values(by=['Time'], inplace=True)
    return df

In [3]:
def readAndProcessCSV(filename):
    df = pd.read_csv(filename)
    df.columns = renameColumns(df)
    df = convertTime(df)
    return df

In [4]:
def isWeekend(dateObj):
    return dateObj.weekday() in [5, 6]

def isWeeekday(dateObj):
    return not isWeekend(dateObj)

#Depending on on the mask created, you can get the data from the weekend or the week
def applyMask(df, mask):
    dfWeekend = df[~mask]
    dfWeekend = dfWeekend.reset_index(drop=True)
    return dfWeekend

def eliminateNightHours(df):
    return df[(df['Time'].dt.hour >= 6) & (df['Time'].dt.hour <= 23)]

def getDataOneHour(hour, df):
    return df[(df['Time'].dt.hour > hour) & (df['Time'].dt.hour <= hour + 1)]

def getDataOneDay(date, df):
    return df[(df['Time'].dt.day == date.day) & (df['Time'].dt.month == date.month) & (df['Time'].dt.year == date.year)]

def getDataThreeDays(date, df):
    return df[(df['Time'].dt.day >= date.day) & (df['Time'].dt.day <= date.day + 2) & (df['Time'].dt.month == date.month) & (df['Time'].dt.year == date.year)]

def eliminateDayHours(df):
    return df[(df['Time'].dt.hour > 6) & (df['Time'].dt.hour < 23)]

In [5]:
def makeDfSensorSingle(df, sensorName):
    sensorColumns = ['Time'] + [col for col in df.columns if sensorName in col] 
    dfSensor = df[sensorColumns]
    dfSensor = dfSensor.dropna(axis=1, how='all')
    dfSensor = dfSensor.dropna().reset_index(drop=True)
    return dfSensor

In [6]:
#DF that has all the data from October and November
dfOctNov = readAndProcessCSV("sensorData/08_45_18.csv")
fileNames = ["08_46_27", "08_47_03", "08_48_14", "08_48_51", "08_49_32", "08_50_13", "08_50_46"]
for fileName in fileNames:
    df = readAndProcessCSV("sensorData/" + fileName + ".csv")
    dfOctNov = pd.concat([dfOctNov, df])

#Time range of dfOctNov is 2023-10-01 to 2023-11-25
#dfOctNov["Time"].dt.date

In [7]:
#DF that has only date time data (6 AM to 11 PM)
dfDay = eliminateNightHours(dfOctNov)
dfDay = dfDay.reset_index(drop=True)

In [8]:

#dfWeekend - df that contains only the data from the weekend
weekendMask = dfOctNov['Time'].apply(isWeekend)
dfWeekend = applyMask(dfOctNov, weekendMask)

In [9]:
#dfWeekday - df that contains only the data from the week
weekdayMask = dfOctNov['Time'].apply(isWeeekday)
dfWeekday = applyMask(dfOctNov, weekdayMask)

In [10]:
dfFinal = applyMask(dfOctNov, weekendMask)
dfFinal = eliminateNightHours(dfFinal)
dfFinal = dfFinal.reset_index(drop=True)

In [11]:
sensorEye01 = makeDfSensorSingle(dfFinal, 'eye01')
sensorEye01.head()

Unnamed: 0,Time,eye01.humidity,eye01.light,eye01.motion,eye01.occupancy,eye01.temperature,eye01.vdd
0,2023-10-02 06:09:31,42.0,1.0,0.0,0.0,22.8,3662.0
1,2023-10-02 06:19:31,42.0,1.0,0.0,0.0,22.7,3662.0
2,2023-10-02 06:39:31,43.0,0.0,0.0,0.0,22.7,3662.0
3,2023-10-02 06:49:31,43.0,0.0,0.0,0.0,22.7,3662.0
4,2023-10-02 06:59:31,43.0,1.0,0.0,0.0,22.6,3664.0


In [12]:
sensorGas01 = makeDfSensorSingle(dfFinal, 'gas01')
sensorGas01.head()

Unnamed: 0,Time,gas01.co2,gas01.humidity,gas01.light,gas01.motion,gas01.temperature,gas01.vdd
0,2023-10-02 06:06:11,410.0,37.0,0.0,0.0,25.5,3667.0
1,2023-10-02 06:16:11,404.0,37.0,0.0,0.0,25.5,3667.0
2,2023-10-02 06:26:10,409.0,37.0,0.0,0.0,25.5,3670.0
3,2023-10-02 06:36:10,410.0,37.0,0.0,0.0,25.4,3667.0
4,2023-10-02 06:46:10,401.0,37.0,0.0,0.0,25.5,3670.0


In [13]:
sensorVoc01 = makeDfSensorSingle(dfFinal, 'voc01')
sensorVoc01.head()

Unnamed: 0,Time,voc01.humidity,voc01.light,voc01.motion,voc01.temperature,voc01.tvoc,voc01.vdd
0,2023-10-02 06:05:18,35.0,0.0,0.0,24.7,65.0,3648.0
1,2023-10-02 06:10:18,35.0,0.0,0.0,24.6,59.0,3648.0
2,2023-10-02 06:15:18,35.0,0.0,0.0,24.7,51.0,3648.0
3,2023-10-02 06:20:25,36.0,0.0,0.0,24.6,66.0,3648.0
4,2023-10-02 06:25:18,36.0,13.0,0.0,24.6,63.0,3648.0
