In [None]:
import pandas as pd
from glob import glob
from tqdm import tqdm

from sklearn.preprocessing import MultiLabelBinarizer
import numpy as np

In [None]:
files = glob("./Data/*/*/*/*.csv")

In [None]:
sensors = []
proximities = []
accelerations = []
floors = []
activities = []
group = []
for f in files:
    if 'sensors' in f:
        sensors.append(pd.read_csv(f, delimiter=';',encoding= 'unicode_escape'))
    if 'proximity' in f:
        proximities.append(pd.read_csv(f, delimiter=';',encoding= 'unicode_escape'))
    if 'acceleration' in f:
        hf = pd.read_csv(f, delimiter=';',encoding= 'unicode_escape')
        hf['group'] = f.split('-')[-2]
        accelerations.append(hf)
    if 'floor' in f:
        floors.append(pd.read_csv(f, delimiter=';',encoding= 'unicode_escape'))


sf = pd.concat(sensors)
sf.TIMESTAMP = pd.to_datetime(sf.TIMESTAMP)
sf = sf.sort_values("TIMESTAMP")
pf = pd.concat(proximities)
pf.TIMESTAMP = pd.to_datetime(pf.TIMESTAMP)
wf = pd.concat(accelerations)
wf.TIMESTAMP = pd.to_datetime(wf.TIMESTAMP)
mf = pd.concat(floors)
mf.TIMESTAMP = pd.to_datetime(mf.TIMESTAMP)

In [None]:
resample_value = '30s'

with open('time-slots training.csv','r') as input_file:
    data_str = input_file.read()
    data_array = data_str.split(';;;;;;;;;;\n;;;;;;;;;;\n;;;;;;;;;;\n') # Split on all instances of double new lines
    for i, smaller_data in enumerate(data_array):
        with open(f'train_file_{i}.txt','w') as new_data_file:
            new_data_file.write(smaller_data)

activities=[]
for f in glob('train_file*.txt'):
    activities.append(pd.read_csv(f,delimiter=';', skiprows=[0]).dropna(axis=1))

temp = pd.read_csv('gt_labels.csv',delimiter=';').dropna(axis=1)
mlb = MultiLabelBinarizer()
tdf = pd.DataFrame(mlb.fit_transform(temp['Activity_1'].apply(lambda x: [x])),columns=mlb.classes_).astype(bool)
tdf['Timestamp'] = temp.Time
tdf['test_data'] = True
activities.append(tdf)

df = pd.concat(activities).fillna(False).sort_values('Timestamp').reset_index(drop=True)
df.Timestamp = pd.to_datetime(df.Timestamp)
df = df.set_index('Timestamp', drop=True)
df.loc[(~df).all(axis=1),'Idle'] = True
#df = df.resample(resample_value).nearest(15).dropna()#.first().bfill().astype(bool)

In [None]:
group_samples = (sf["TIMESTAMP"].dt.minute > (sf["TIMESTAMP"].dt.minute.shift() + 5)).cumsum()
grouped = sf.groupby(group_samples)
group_list = [g.groupby(['TIMESTAMP', 'OBJECT'])['STATE'].aggregate('first').unstack().ffill() for k,g in grouped]
sff = pd.concat(group_list)#.fillna('unkown')

def check_fill(x):
    #return 'Unknown'
    if 'Close' in x.values:
        return 'Close'
    if 'Movement' in x.values:
        return 'No Movement'
    if 'Present' in x.values:
        return 'No Present'
    if 'Pressure' in x.values:
        return 'No Pressure'
    else:
        return 'Unknown'

fill_dct = {}
for c in sff.columns:
    if  check_fill(sff[c]):
        fill_dct[c] = check_fill(sff[c])
sff = sff.fillna(fill_dct)
#sf.groupby(['TIMESTAMP', 'OBJECT'])['STATE'].aggregate('first').unstack().ffill(limit=3).fillna('unknown')#.pivot_table(values='STATE', columns='OBJECT')

In [None]:
time="2017-10-31 11:11:30"
pf[(pf.TIMESTAMP<(pd.Timestamp(time)+pd.Timedelta(resample_value)))&(pf.TIMESTAMP>(pd.Timestamp(time)))]

total_f = pd.DataFrame(df.index)
total_f.Timestamp = pd.to_datetime(total_f.Timestamp)
total_f = total_f.sort_values("Timestamp")

sfff = pd.merge_asof(sff.reset_index(),total_f, right_on="Timestamp", left_on="TIMESTAMP", direction="backward", tolerance=pd.Timedelta(resample_value)).dropna()#.fillna('Unknown')

In [None]:
from rdflib import Graph
import re
g = Graph()
g.parse("/Users/bramsteenwinckel/Documents/Projects/Protego/ucaml_cup/ucml.owl")


query = """
SELECT ?room ?room_type ?floor
WHERE {
    ?room <https://saref.etsi.org/saref4bldg/isSpaceOf> ?floor .
    ?room a ?room_type .
    Filter(?room_type != <http://www.w3.org/2002/07/owl#NamedIndividual>) .
    Filter(?room_type != <https://dahcc.idlab.ugent.be/Ontology/SensorsAndActuators/MultipurposeRoom>) .
    Filter(?room_type != <https://dahcc.idlab.ugent.be/Ontology/SensorsAndActuators/Garage>) .
    Filter(?room != <https://dahcc.idlab.ugent.be/Homelab/SensorsAndActuators/smallbedroom>) .
}"""

qres = g.query(query)
rooms_types = {}
for row in qres:
    if '#' in row.room_type:
        rooms_types[row.room.toPython()] = row.floor.split('#')[-1]+''+row.room_type.split('#')[-1]
    else:
        rooms_types[row.room.toPython()] = row.floor.split('#')[-1]+''+row.room_type.split('/')[-1]


appliances = {}
for room in rooms_types:
    query = """
    SELECT ?appliance ?appliance_type ?sensor
    WHERE {
        ?appliance <https://saref.etsi.org/saref4bldg/isContainedIn> <%s> .
        ?sensor <https://dahcc.idlab.ugent.be/Ontology/Sensors/analyseStateOf> ?appliance .
        ?appliance a ?appliance_type .
        Filter(?appliance_type != <http://www.w3.org/2002/07/owl#NamedIndividual>) .
    }"""%(room)

    qres = g.query(query)
    for row in qres:
        if room not in appliances:
            appliances[room] = {}
        tp = row.appliance_type.split('/')[-1].split('#')[-1]
        if tp not in appliances[room]:
            appliances[room][tp] = set()
        appliances[room][tp].add((row.sensor.split('#')[-1],row.appliance.toPython()))
#print(appliances["http://example.com/ucaml_cup#lab"])
result_appliances = {}
for room in appliances:
    result_appliances[room] = {}
    for a in appliances[room]:
        if len(appliances[room][a])==1:
            result_appliances[room][list(appliances[room][a])[0][0]] = 'has'+a+"State"
        else:
            sensors = [re.sub('([a-zA-Z])', lambda x: x.groups()[0].upper(), x[1].split('#')[-1].replace('_',''), 1) for x in appliances[room][a]]
            for i in range(len(sensors)):
                s = sensors[i]
                if 'has'+s+"State" in result_appliances[room]:
                    result_appliances[room][list(appliances[room][a])[i][0]] = 'has'+s+"State"+str(len(result_appliances[room]))
                else:
                    result_appliances[room][list(appliances[room][a])[i][0]] = 'has'+s+"State"

In [None]:
import datetime

uuid_event_map = {}
uuid_room_map = {}
uuid_map = {}

def generate_obs_uuid(part):
    if part not in uuid_map:
        uuid_map[part] = 0
    result = uuid_map[part]
    uuid_map[part] += 1
    return 'https://dahcc.idlab.ugent.be/Protego/'+part+'/obs'+str(result)

def generate_room_uuid(room, part):
    if part not in uuid_room_map:
        uuid_room_map[part] = {}
    if room not in uuid_room_map[part]:
        uuid_room_map[part][room] = 0
    result = uuid_room_map[part][room]
    uuid_room_map[part][room] += 1
    return 'https://dahcc.idlab.ugent.be/Protego/'+part+'/'+room+'/state'+str(result)

def generate_event_uuid(part):
    if part not in uuid_event_map:
        uuid_event_map[part] = 0
    result = uuid_event_map[part]
    uuid_event_map[part] += 1
    if result > 0:
        return 'https://dahcc.idlab.ugent.be/Protego/'+part+'/event'+str(result), 'https://dahcc.idlab.ugent.be/Protego/'+part+'/event'+str(result-1)
    else:
        return 'https://dahcc.idlab.ugent.be/Protego/' + part + '/event' + str(
            result), None

def create_event(ff, part, time, prev_time):
    with open('event_ucaml.nt', 'a') as f:
        event, prev = generate_event_uuid(part)
        if prev and (pd.Timestamp(time)-pd.Timestamp(prev_time))<=pd.Timedelta(resample_value):
            f.write('<%s> <http://example.org/hasPrevious> <%s> .\n'%(event, prev))

        f.write('<%s> <https://saref.etsi.org/core/hasTimestamp> "%s"^^<http://www.w3.org/2001/XMLSchema#dateTime> .\n'%(event, time))

        for a in [ff.index[i] for i, x in enumerate(ff.values) if x==True]:
            f.write('<%s> <https://saref.etsi.org/core/hasActivity> "%s" .\n' % (event, a))


        for r in rooms_types:
            room_state = generate_room_uuid(r.split('/')[-1].split('#')[-1],part)
            f.write("<%s> <%s> <%s> .\n"%(event, "https://dahcc.idlab.ugent.be/Protego/"+rooms_types[r], room_state))


            res_frame = sfff[sfff['Timestamp']==pd.Timestamp(time)]
            if len(res_frame)>0:
                prev_obs = {}
                for e,row in res_frame.iterrows():
                    for col in row.index:
                        if col in result_appliances[r]:
                            if row[col] != 'Unknown':
                                obs = generate_obs_uuid(part)
                                f.write("<%s> <%s> <%s> .\n"%(room_state, "https://dahcc.idlab.ugent.be/Protego/"+result_appliances[r][col], obs))
                                f.write('<%s> <%s> "%s"^^<http://www.w3.org/2001/XMLSchema#boolean> .\n'%(obs, "https://saref.etsi.org/core/has"+row[col].replace(' ','')+"Value", "true" ))
                                f.write('<%s> <%s> <%s> .\n'%(obs, "https://saref.etsi.org/core/measurementMadeBy", "http://example.com/ucaml_cup#"+col))
                                if col in prev_obs:
                                    f.write('<%s> <%s> <%s> .\n'%(obs, "http://example.org/hasPreviousObs", prev_obs[col]))
                                prev_obs[col] = obs


        wearable_state = generate_room_uuid('NearestObject',part)
        f.write("<%s> <%s> <%s> .\n"%(event, "https://dahcc.idlab.ugent.be/Protego/PersonNearestObject", wearable_state))
        res_frame = pf[(pf.TIMESTAMP<(pd.Timestamp(time)+pd.Timedelta(resample_value)))&(pf.TIMESTAMP>(pd.Timestamp(time)))]
        res_frame = res_frame.groupby(['OBJECT']).agg({"RSSI": [np.mean, min, max]})
        for i,row in res_frame.iterrows():
            obs = generate_obs_uuid(part)
            d = row.RSSI.values
            o = 'near'+row.name.title().replace(' ','')+"Observation"
            f.write("<%s> <%s> <%s> .\n"%(wearable_state, "https://dahcc.idlab.ugent.be/Protego/"+o, obs))
            f.write('<%s> <%s> "%s"^^<http://www.w3.org/2001/XMLSchema#float> .\n'%(obs, "https://dahcc.idlab.ugent.be/Protego/hasMeanValue", d[0]))
            f.write('<%s> <%s> "%s"^^<http://www.w3.org/2001/XMLSchema#float> .\n'%(obs, "https://dahcc.idlab.ugent.be/Protego/hasMinValue", d[1]))
            f.write('<%s> <%s> "%s"^^<http://www.w3.org/2001/XMLSchema#float> .\n'%(obs, "https://dahcc.idlab.ugent.be/Protego/hasMaxValue", d[2]))

        wearable_state = generate_room_uuid('Location',part)
        f.write("<%s> <%s> <%s> .\n"%(event, "https://dahcc.idlab.ugent.be/Protego/PersonLocation", wearable_state))
        res_frame = mf[(mf.TIMESTAMP<(pd.Timestamp(time)+pd.Timedelta(resample_value)))&(mf.TIMESTAMP>(pd.Timestamp(time)))]
        if len(res_frame)>0:
            for device, device_df in res_frame.groupby('DEVICE'):
                obs = generate_obs_uuid(part)
                o = 'has'+device.replace(',','')+'Observation'
                vals = device_df['CAPACITANCE'].values

                f.write("<%s> <%s> <%s> .\n"%(wearable_state, "https://dahcc.idlab.ugent.be/Protego/"+o, obs))
                f.write('<%s> <%s> "%s"^^<http://www.w3.org/2001/XMLSchema#boolean> .\n'%(obs, "https://dahcc.idlab.ugent.be/Protego/hasValue", "true"))
                means = np.mean([[float(x) for x in v.split(',')] for v in vals], axis=0)
                for i in range(len(means)):
                    f.write('<%s> <%s> "%s"^^<http://www.w3.org/2001/XMLSchema#float> .\n'%(obs, "https://dahcc.idlab.ugent.be/Protego/hasMeanValueZone"+str(i), means[i]))
                maxs = np.max([[float(x) for x in v.split(',')] for v in vals], axis=0)
                for i in range(len(maxs)):
                    f.write('<%s> <%s> "%s"^^<http://www.w3.org/2001/XMLSchema#float> .\n'%(obs, "https://dahcc.idlab.ugent.be/Protego/hasMaxValueZone"+str(i), maxs[i]))
                mins = np.min([[float(x) for x in v.split(',')] for v in vals], axis=0)
                for i in range(len(mins)):
                    f.write('<%s> <%s> "%s"^^<http://www.w3.org/2001/XMLSchema#float> .\n'%(obs, "https://dahcc.idlab.ugent.be/Protego/hasMinValueZone"+str(i), mins[i]))
                f.write('<%s> <%s> <%s> .\n'%(obs, "https://saref.etsi.org/core/measurementMadeBy", "http://example.com/ucaml_cup#Wearable"))


        wearable_state = generate_room_uuid('Accelerometer',part)
        f.write("<%s> <%s> <%s> .\n"%(event, "https://dahcc.idlab.ugent.be/Protego/PersonAccelerometer", wearable_state))
        res_frame = wf[(wf.TIMESTAMP<(pd.Timestamp(time)+pd.Timedelta(resample_value)))&(wf.TIMESTAMP>(pd.Timestamp(time)))]
        if len(res_frame)>0:
            f.write('<%s> <https://example.com/partOfGroup> "%s" .\n' % (event, res_frame['group'].unique()[0]))
        for metric in ['X','Y','Z']:
            d = (res_frame[metric].mean(),res_frame[metric].min(),res_frame[metric].max())
            obs = generate_obs_uuid(part)
            o = 'has'+metric.replace('_',' ').replace('.',' ').title().replace(' ','')+"Observation"
            f.write("<%s> <%s> <%s> .\n"%(wearable_state, "https://dahcc.idlab.ugent.be/Protego/"+o, obs))
            f.write('<%s> <%s> "%s"^^<http://www.w3.org/2001/XMLSchema#float> .\n'%(obs, "https://dahcc.idlab.ugent.be/Protego/hasMeanValue", d[0]))
            f.write('<%s> <%s> "%s"^^<http://www.w3.org/2001/XMLSchema#float> .\n'%(obs, "https://dahcc.idlab.ugent.be/Protego/hasMinValue", d[1]))
            f.write('<%s> <%s> "%s"^^<http://www.w3.org/2001/XMLSchema#float> .\n'%(obs, "https://dahcc.idlab.ugent.be/Protego/hasMaxValue", d[2]))
            f.write('<%s> <%s> <%s> .\n'%(obs, "https://saref.etsi.org/core/measurementMadeBy", "http://example.com/ucaml_cup#Wearable"))

##################

In [None]:
prev = None
for i,event in tqdm(df.iterrows(), total=len(df)):
    create_event(event, "Mario", i, prev)
    prev = i