# Fetching Data

In [7]:
import numpy as np
import pandas as pd
import concurrent.futures
import requests
import time
from functools import partial
from datetime import datetime
import os

In [8]:
def toDatetime(record):
    """
    Convert formatted date-time text into timestamp. 
    It is specifically tailored to the API.
    """
    timestamp = record['timestamp']
    record['timestamp'] = datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S")
    return record

def getData(startTime, endTime , freq="Daily"):
    """
    Convert formatted date-time text into timestamp.
    This is specifically tailored to the source of data
    
    Keyword arguments:
    startTime -- the starting date of the period you wish to fetch (YYYY-MM-DD)
    endTime -- the ending date of the period you wish to fetch (YYYY-MM-DD)
    freq -- the frequency of data (Hourly/Daily/...)
    """
    reportingGroups = ["Electricity"]
    locationsLink = 'https://helsinki-openapi.nuuka.cloud/api/v1.0/Property/List'
    locationsRequest = requests.get(locationsLink) # initialize api
    locationsRequest.raise_for_status() # get data from api
    locations = pd.DataFrame.from_dict(locationsRequest.json())
    locations = locations['locationName']
    data = []
    dataLink = (f"https://helsinki-openapi.nuuka.cloud/api/v1.0/"
                f"EnergyData/{freq}/ListByProperty")
    
    # Helper function for concurrency 
    def request(group, location):
        payload = {'StartTime': startTime, 'EndTime': endTime}
        payload.update({'ReportingGroup': group})
        payload.update({'SearchString': location.split(' ', 1)[0]})
        r = requests.get(dataLink, payload)
        if r.status_code == requests.codes.ok:
            data_dict = r.json()
            for record in data_dict:
                toDatetime(record)
            data.extend(data_dict)
    
    # The final step - getting the data.
    for g in reportingGroups:
        with concurrent.futures.ThreadPoolExecutor() as executor:
            executor.map(partial(request, g), locations)
    # for each "thread", execute "request" function on each location
    return pd.DataFrame(data)


In [9]:
# Request the data from the API.
# Note: You can only make 5000 API requests per hour.
start = time.perf_counter()
# You can specify the 
ts = getData('2017-01-01', '2020-01-01', freq = 'Daily') 
finish = time.perf_counter()
print(f'Finished in {finish-start} second(s)')

# Export the Dataframe to a .csv file
file_name = 'hourly.csv' # change this to the respective frequency of the data
ts.to_csv(file_name)

Finished in 72.33211269999993 second(s)
