In [13]:
import pandas as pd
import matplotlib.pyplot as plt

In [14]:
def renameColumns(df):
    return [col.split('-', 1)[1] if '-' in col else col for col in df.columns]

def convertTime(df):
    if df["Time"].dtype == 'int64':
        df["Time"] = pd.to_datetime(df["Time"], unit='ms')    
    else:
        df['Time'] = pd.to_datetime(df['Time'], format='%Y-%m-%d %H:%M:%S')
    df.sort_values(by=['Time'], inplace=True)
    return df

In [15]:
def readAndProcessCSV(filename):
    df = pd.read_csv(filename)
    df.columns = renameColumns(df)
    df = convertTime(df)
    return df

In [16]:
def applyMask(df, mask):
    dfWeekend = df[~mask]
    dfWeekend = dfWeekend.reset_index(drop=True)
    return dfWeekend

def eliminateNightHours(df):
    return df[(df['Time'].dt.hour >= 6) & (df['Time'].dt.hour <= 23)]

def isWeekend(dateObj):
    return dateObj.weekday() in [5, 6]

def isWeeekday(dateObj):
    return not isWeekend(dateObj)

In [17]:
def makeDfSensorSingle(df, sensorName):
    sensorColumns = ['Time'] + [col for col in df.columns if sensorName in col] 
    dfSensor = df[sensorColumns]
    dfSensor = dfSensor.dropna(axis=1, how='all')
    dfSensor = dfSensor.dropna().reset_index(drop=True)
    return dfSensor

#drop all columns that contain the word 'vdd'
def dropVddColumns(df):
    return df[df.columns.drop(list(df.filter(regex='vdd')))]

In [18]:
#DF that has all the data from October and November
dfOctNov = readAndProcessCSV("sensorData/08_45_18.csv")
fileNames = ["08_46_27", "08_47_03", "08_48_14", "08_48_51", "08_49_32", "08_50_13", "08_50_46"]
for fileName in fileNames:
    df = readAndProcessCSV("sensorData/" + fileName + ".csv")
    df.replace('undefined', pd.NA, inplace=True)
    dfOctNov = pd.concat([dfOctNov, df])

#Time range of dfOctNov is 2023-10-01 to 2023-11-25
#dfOctNov["Time"].dt.date

In [19]:
weekendMask = dfOctNov['Time'].apply(isWeekend)
dfFinal = applyMask(dfOctNov, weekendMask)
dfFinal = eliminateNightHours(dfFinal)
dfFinal = dropVddColumns(dfFinal)
dfFinal = dfFinal.reset_index(drop=True)
# dfFinal.head()

In [20]:
selectedSensors = ['eye03', 'eye04', 'eye05', 'eye09', 'eye11', 'gas01', 'gas02', 'gas03', 'gas04', 'gas05', 'gas06', 'voc01', 'voc02', 'voc03', 'voc04', 'voc05']
dfConcat = makeDfSensorSingle(dfFinal, 'eye02')
for i in selectedSensors:
    sensorDf = makeDfSensorSingle(dfFinal, i)
    dfConcat = pd.concat([dfConcat, sensorDf], axis=1)

dfConcat = dfConcat.drop('Time', axis=1)
dfConcat = dfConcat.dropna().reset_index(drop=True)
# dfConcat.head

In [21]:
def toNumeric(listOfColumns):
    for i in listOfColumns:
        dfConcat[i] = pd.to_numeric(dfConcat[i])

def dropColumns(listOfColumns, df):
    df.drop(listOfColumns, axis=1, inplace=True)

def computeMeanColumn(df, list, columnName):
    df[columnName] = df[list].mean(axis=1)
    df[columnName] = df[columnName].round(2)

def computeStdColumn(df, list, columnName):
    df[columnName] = df[list].std(axis=1)
    df[columnName] = df[columnName].round(2)

def computeMedianColumn(df, list, columnName):
    df[columnName] = df[list].median(axis=1)
    df[columnName] = df[columnName].round(2)

def computeMaxColumn(df, list, columnName):
    df[columnName] = df[list].max(axis=1)
    df[columnName] = df[columnName].round(2)

def computeMinColumn(df, list, columnName):
    df[columnName] = df[list].min(axis=1)
    df[columnName] = df[columnName].round(2)

def computeInterquartileRangeColumn(df, list, columnName):
    df[columnName] = df[list].quantile(0.75, axis=1) - df[list].quantile(0.25, axis=1)
    df[columnName] = df[columnName].round(2)
def processStatisticalColumns(selectedSensors, measurementName):
    columnsList = []
    for i in selectedSensors:
        column = i + '.' + measurementName
        columnsList.append(column)
    toNumeric(columnsList)
    return columnsList
    
def applyStatisticalMethod(df, selectedSensors, measurementName, statisticalMethod):
    columnsList = processStatisticalColumns(selectedSensors, measurementName)
    df[measurementName + '.' + statisticalMethod] = df[columnsList].agg(statisticalMethod, axis=1)
    dropColumns(columnsList, df)

eyeSelectedSensors = ['eye02', 'eye03', 'eye04', 'eye05', 'eye09', 'eye11']
gasSelectedSensors = ['gas01', 'gas02', 'gas03', 'gas04', 'gas05', 'gas06']
vocSelectedSensors = ['voc01', 'voc02', 'voc03', 'voc04', 'voc05']

sensorTypes = {
    'eye': eyeSelectedSensors,
    'gas': gasSelectedSensors,
    'voc': vocSelectedSensors
}

measurements = {
    'humidity': ['mean', 'std', 'median', 'min', 'max', 'iqr'],
    'light': ['mean', 'std', 'median', 'min', 'max', 'iqr'],
    'motion': ['mean', 'std', 'median', 'min', 'max', 'iqr'],
    'temperature': ['mean', 'std', 'median', 'min', 'max', 'iqr'],
}

for sensorType, sensors in sensorTypes.items():
    for measurement, aggFuncs in measurements.items():
        for aggFunc in aggFuncs:
            columnName = f'{sensorType}.{measurement}.{aggFunc}'
            computeMeanColumn(dfConcat, processStatisticalColumns(sensors, measurement), columnName)
            computeStdColumn(dfConcat, processStatisticalColumns(sensors, measurement), columnName)
            computeMedianColumn(dfConcat, processStatisticalColumns(sensors, measurement), columnName)
            computeMaxColumn(dfConcat, processStatisticalColumns(sensors, measurement), columnName)
            computeMinColumn(dfConcat, processStatisticalColumns(sensors, measurement), columnName)
            computeInterquartileRangeColumn(dfConcat, processStatisticalColumns(sensors, measurement), columnName)
        dropColumns(processStatisticalColumns(sensors, measurement), dfConcat)

columnsListCO2 = processStatisticalColumns(gasSelectedSensors, 'co2')
computeMeanColumn(dfConcat, columnsListCO2, 'gas.co2.mean')
computeStdColumn(dfConcat, columnsListCO2, 'gas.co2.std')
computeMedianColumn(dfConcat, columnsListCO2, 'gas.co2.median')
computeMaxColumn(dfConcat, columnsListCO2, 'gas.co2.max')
computeMinColumn(dfConcat, columnsListCO2, 'gas.co2.min')
computeInterquartileRangeColumn(dfConcat, columnsListCO2, 'gas.co2.iqr')
dropColumns(columnsListCO2, dfConcat)

columnsListTVOC = processStatisticalColumns(vocSelectedSensors, 'tvoc')
computeMeanColumn(dfConcat, columnsListTVOC, 'voc.tvoc.mean')
computeStdColumn(dfConcat, columnsListTVOC, 'voc.tvoc.std')
computeMedianColumn(dfConcat, columnsListTVOC, 'voc.tvoc.median')
computeMaxColumn(dfConcat, columnsListTVOC, 'voc.tvoc.max')
computeMinColumn(dfConcat, columnsListTVOC, 'voc.tvoc.min')
computeInterquartileRangeColumn(dfConcat, columnsListTVOC, 'voc.tvoc.iqr')
dropColumns(columnsListTVOC, dfConcat)

dfConcat.columns


Index(['eye02.occupancy', 'eye03.occupancy', 'eye04.occupancy',
       'eye05.occupancy', 'eye09.occupancy', 'eye11.occupancy',
       'eye.humidity.mean', 'eye.humidity.std', 'eye.humidity.median',
       'eye.humidity.min', 'eye.humidity.max', 'eye.humidity.iqr',
       'eye.light.mean', 'eye.light.std', 'eye.light.median', 'eye.light.min',
       'eye.light.max', 'eye.light.iqr', 'eye.motion.mean', 'eye.motion.std',
       'eye.motion.median', 'eye.motion.min', 'eye.motion.max',
       'eye.motion.iqr', 'eye.temperature.mean', 'eye.temperature.std',
       'eye.temperature.median', 'eye.temperature.min', 'eye.temperature.max',
       'eye.temperature.iqr', 'gas.humidity.mean', 'gas.humidity.std',
       'gas.humidity.median', 'gas.humidity.min', 'gas.humidity.max',
       'gas.humidity.iqr', 'gas.light.mean', 'gas.light.std',
       'gas.light.median', 'gas.light.min', 'gas.light.max', 'gas.light.iqr',
       'gas.motion.mean', 'gas.motion.std', 'gas.motion.median',
       'gas.moti

In [22]:
dfConcat.to_csv('dataAggregated_file.csv', index=False)