In [1]:
import json
import os
import pandas as pd
import re
from collections import defaultdict

# Config

In [2]:
basePath = '../../data/telia/'

### File operations

In [3]:
with open(basePath + 'c8y_data.json', 'r', encoding='utf8') as json_file:
    c8y_data = json.load(json_file)

In [4]:
def getFilePaths(path):
    if not os.path.exists(path):
        print('Folder does not exist: ' + path)
        return []
    fileNames = os.listdir(path)
    return [path + file for file in fileNames]


def getFileContents(filePath):
    with open(filePath, 'r', encoding='utf8') as json_file:
        return json.load(json_file)


def getFileContentsByFolder(folderPath):
    return [getFileContents(filePath) for filePath in getFilePaths(folderPath)]


### Count fragment + series

In [5]:
def getFragmentSeriesCount():
    fragmentSeriesCount = defaultdict(lambda: {'count': 0, 'unit': set(), 'value': set()})
    
    folder = basePath + 'measurements/fragmentSeries/'
    for jsonFile in getFileContentsByFolder(folder):
        for device in jsonFile:
            for measurement in device['fragmentSeries']:
                fragment = measurement['fragment']
                series = measurement['series']
                count = measurement['count']
                fragmentSeriesCount[(fragment, series)]['count'] += count
    return fragmentSeriesCount

### Add values and units

In [6]:
def _createMeasurementMapping():
    measureMapping = {}
    for device in c8y_data:
        measureMapping[device['id']] = {
            'deviceId': device['id'],
            'deviceType': device['type'],
            'count': 0,
            'measurement': []
        }
    return measureMapping

def _combineMeasurementFileData(idMapping, folderPaths):
    for folder in folderPaths:
        for jsonFile in getFileContentsByFolder(folder):
            for device in jsonFile:
                deviceId = device['deviceId']
                if 'total' in device and device['total']['count'] > 0:
                    idMapping[deviceId]['count'] = device['total']['count']
                    if 'measurement' in device['total']:
                        idMapping[deviceId]['measurement'].append(device['total']['measurement'])

                if 'fragmentSeries' in device:
                    for fragmentSeries in device['fragmentSeries']:
                        if fragmentSeries['measurement']:
                            idMapping[deviceId]['measurement'].append(fragmentSeries['measurement'])
    return idMapping

def combineMeasurementFileData():
    measureMapping = _createMeasurementMapping()
    folders = ['total', 'fragmentSeries']
    folderPaths = [basePath + 'measurements/' + folder + '/' for folder in folders]
    return _combineMeasurementFileData(measureMapping, folderPaths)

In [7]:
def addValuesAndUnits(measurementMapping, countDictionary):
    for device in measurementMapping.values():
        for measurementObj in device['measurement']:
            alwaysPresentKeys = ["lastUpdated", "creationTime", "self", "source", "time", "id", "text"] + ['type']
            measurement = {key: value for key, value in measurementObj.items() if key not in alwaysPresentKeys}
    
            for fragment, fragmentObj in measurement.items():
                if fragmentObj is None:
                    continue
                for series, seriesObj in fragmentObj.items():
                    if (fragment, series) not in countDictionary:
                        continue
                    value = seriesObj['value'] if 'value' in seriesObj else None
                    countDictionary[(fragment, series)]['value'].add(value)
    
                    unit = seriesObj['unit'] if 'unit' in seriesObj else None
                    countDictionary[(fragment, series)]['unit'].add(unit)

### Filter dataset

In [8]:
def fixSensorName(name):
    match = re.match("^(sensor)_\\d{1,4}(.*)", name)  # sensor_1235_daily -> sensor_daily
    if match:
        return match.group(1) + match.group(2)
    return name

def filterDateset(originalDateset):
    filteredDataset = defaultdict(lambda : {'count': 0, 'unit': set(), 'value': set()})
    for key, fragmentSeries in originalDateset.items():
        fragment, series = key
        fragment = fixSensorName(fragment)
    
        filteredDataset[(fragment, series)]['count'] += fragmentSeries['count']
        filteredDataset[(fragment, series)]['unit'] = fragmentSeries['unit'].union(fragmentSeries['unit'])
        filteredDataset[(fragment, series)]['value']= fragmentSeries['value'].union(fragmentSeries['value'])
    return filteredDataset

### Save csv

In [9]:
def findMinAndMaxValues(inputValues):
    values = [num for num in inputValues if num is not None]
    minValue = min(values) if values else ''
    maxValue = max(values) if values else ''
    return minValue, maxValue

def unitRepresentation(unit):
    unit = list(unit)
    if len(unit) == 1:
        return unit[0]
    if len(unit) == 0:
        return ''
    return unit

def saveToCsv(data, fileName):
    fullData = []
    for key, value in data.items():
        fragment, series = key
        unit = unitRepresentation(value['unit'])
        count = value['count']
        minValue, maxValue = findMinAndMaxValues(value['value'])
        fullData.append((fragment, series, count, unit, minValue, maxValue))
    
    df = pd.DataFrame(fullData, columns=['fragment', 'series', 'count', 'unit', 'minValue', 'maxValue'])
    df.to_csv(fileName, index=False, encoding='utf-8-sig')

### Current dataset

In [10]:
measurementMapping = combineMeasurementFileData()
fragmentSeriesCount = getFragmentSeriesCount()
addValuesAndUnits(measurementMapping, fragmentSeriesCount)

In [11]:
saveToCsv(fragmentSeriesCount, 'measurement fragment + series .csv')

### Filtered dataset

In [12]:
filteredFragmentSeriesCount = filterDateset(fragmentSeriesCount)

In [13]:
saveToCsv(filteredFragmentSeriesCount, 'measurement fragment + series (filtered).csv')