In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import glob, os

import sys
sys.path.append('../..')

from sdt_dask.dask_tool.sdt_dask import SDTDask

In [3]:
SDTDask?

[0;31mInit signature:[0m [0mSDTDask[0m[0;34m([0m[0mdata_plug[0m[0;34m,[0m [0mclient[0m[0;34m,[0m [0;34m**[0m[0mkeywords[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m      <no docstring>
[0;31mFile:[0m           /mnt/c/Users/22720/OneDrive/Documents/solar-data-tools/sdt_dask/dask_tool/sdt_dask.py
[0;31mType:[0m           type
[0;31mSubclasses:[0m     

## Local Client example 
 use PVDAQ, local csv dataplug and local client

### Instantiate a pvdaq data plug


In [4]:
from sdt_dask.dataplugs.pvdaq_plug import PVDAQPlug

In [5]:
PVDAQPlug?

[0;31mInit signature:[0m [0mPVDAQPlug[0m[0;34m([0m[0mapi_key[0m[0;34m=[0m[0;34m'DEMO_KEY'[0m[0;34m,[0m [0mpower_col[0m[0;34m=[0m[0;34m'ac_power'[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Dataplug class for retrieving data from the PVDAQ DB.
Note that the DEMO_KEY has a rate limit of 30/h, 50/d per IP address.
[0;31mFile:[0m           /mnt/c/Users/22720/OneDrive/Documents/solar-data-tools/sdt_dask/dataplugs/pvdaq_plug.py
[0;31mType:[0m           type
[0;31mSubclasses:[0m     

In [6]:
pvdaq_data_plug = PVDAQPlug()

### Set a local User-defined Client

In [7]:
from dask.distributed import Client

In [8]:
n_workers = 1
threads_per_worker = 1
total_system_memory = 8
memory_per_worker = total_system_memory / n_workers
local_client = Client(processes=False, memory_spill_fraction=False, memory_pause_fraction=False, memory_target_fraction=0.8, n_workers=n_workers, threads_per_worker=threads_per_worker, memory_limit=f"{memory_per_worker}GiB")
local_client.dashboard_link



'http://172.21.212.11:8787/status'

(34, 2011)
Pulling site ID 34 for year 2011...

(51, 2012)
Pulling site ID 51 for year 2012...

total time: 15.24 seconds
--------------------------------
Breakdown
--------------------------------
Preprocessing              3.40s
Cleaning                   2.46s
Filtering/Summarizing      9.37s
    Data quality           0.11s
    Clear day detect       0.52s
    Clipping detect        7.89s
    Capacity change detect 0.85s

total time: 14.11 seconds
--------------------------------
Breakdown
--------------------------------
Preprocessing              5.73s
Cleaning                   2.24s
Filtering/Summarizing      6.15s
    Data quality           0.21s
    Clear day detect       0.70s
    Clipping detect        4.24s
    Capacity change detect 1.00s

(35, 2015)
Pulling site ID 35 for year 2015...
[------------------------------------------------------------] 0.0% ...querying year 2015




total time: 6.32 seconds
--------------------------------
Breakdown
--------------------------------
Preprocessing              0.82s
Cleaning                   1.12s
Filtering/Summarizing      4.38s
    Data quality           0.05s
    Clear day detect       0.46s
    Clipping detect        3.52s
    Capacity change detect 0.35s



In [9]:
dask_tool = SDTDask(pvdaq_data_plug, local_client)
pvdaq_keys = [(34, 2011), (35, 2015), (51,2012)]

dask_tool.execute(pvdaq_keys, solver="OSQP", solver_convex="OSQP", fix_shifts=True, verbose=True)

## AWS Fargate Client example
use LocalFile dataplug and fargate client

### Instantiate a localfile data plug

In [None]:
from sdt_dask.dataplugs.csv_plug import LocalFiles

In [None]:
LocalFiles?

In [7]:
path = "../dataplugs/example_data/"
local_file_data_plug = LocalFiles(path_to_files=path)
local_file_keys = [(os.path.basename(fname)[:-4],) for fname in glob.glob(path + "*")]
local_file_keys = local_file_keys[1:]
local_file_keys

[('TAAI01129193',), ('TABC01142170',), ('TABD01110568',)]

In [8]:
local_file_data_plug.get_data(local_file_keys[1])

Loading file TABC01142170...


Unnamed: 0_level_0,ac_power_01,ac_power_02,ac_power_03
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-12-31 16:00:00,0.0767,0.0928,0.0928
2013-12-31 16:05:00,0.0695,0.0862,0.0819
2013-12-31 16:10:00,0.0626,0.0726,0.0653
2013-12-31 16:15:00,0.0546,0.0639,0.0613
2013-12-31 16:20:00,0.0418,0.0530,0.0516
...,...,...,...
2019-06-20 15:30:00,0.5872,0.5950,0.2445
2019-06-20 15:35:00,0.6454,0.6506,0.2748
2019-06-20 15:40:00,0.6220,0.6313,0.2645
2019-06-20 15:45:00,0.5259,0.5338,0.2238


### Set up a Fargate cluster client


In [None]:
from sdt_dask.clients.aws.fargate import Fargate

In [9]:
# Use should define an environment variable for secret key
PA_NUMBER = os.getenv("project_pa_number")
AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION')
ENVIRONMENT = {
    'AWS_ACCESS_KEY_ID' : os.getenv('AWS_ACCESS_KEY_ID'),
    'AWS_SECRET_ACCESS_KEY' : os.getenv('AWS_SECRET_ACCESS_KEY')
}

In [10]:
# The Tag, VPC, image, workers, threads per worker and environment need to be user defined and passed to the client class
TAGS = {
    "project-pa-number": PA_NUMBER,
    "project": "pvinsight"
}
VPC = "vpc-ab2ff6d3" # for us-west-2
IMAGE = "nimishy/sdt-dask-windows:latest"

WORKERS = 3
THREADS_PER_WORKER = 1

In [11]:
fargate_client = Fargate().init_client(image=IMAGE, 
                               tags=TAGS, 
                               vpc=VPC, 
                               region_name=AWS_DEFAULT_REGION,
                               environment=ENVIRONMENT,
                               n_workers=WORKERS,
                               threads_per_worker=THREADS_PER_WORKER
                               )

[i] Initilializing Fargate Cluster ...


RuntimeError: Cluster failed to start: Timed out trying to connect to tcp://35.85.226.66:8786 after 30 s

In [None]:
dask_tool = SDTDask(local_file_data_plug, fargate_client)

dask_tool.execute(local_file_keys, solver="OSQP", solver_convex="OSQP", fix_shifts=True, verbose=True)

## Azure Client example