In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import dask
from solardatatools import DataHandler
from sdt_dask.clients.aws.fargate import get_fargate_cluster
from sdt_dask.dataplugs.pvdaq_plug import PVDAQPlug

# Import user-defined dataplug

In [3]:
PVDAQPlug?

[0;31mInit signature:[0m [0mPVDAQPlug[0m[0;34m([0m[0myear[0m[0;34m,[0m [0mapi_key[0m[0;34m=[0m[0;34m'DEMO_KEY'[0m[0;34m,[0m [0mpower_col[0m[0;34m=[0m[0;34m'ac_power'[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Dataplug class for retrieving data from the PVDAQ DB.
    
[0;31mFile:[0m           ~/Documents/SLAC/PVInsight/solar-data-tools/sdt_dask/dataplugs/pvdaq_plug.py
[0;31mType:[0m           type
[0;31mSubclasses:[0m     

In [4]:
data_plug = PVDAQPlug(year=2011)

In [5]:
KEYS = [34, 35, 38] # site IDs for this plug, given by user

# Import user-defined client

## AWS Fargate

In [12]:
get_fargate_cluster?

[0;31mSignature:[0m
[0mget_fargate_cluster[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mtags[0m[0;34m=[0m[0;34m{[0m[0;34m'project-pa-number'[0m[0;34m:[0m [0;34m'21691-H2001'[0m[0;34m,[0m [0;34m'project'[0m[0;34m:[0m [0;34m'pvinsight'[0m[0;34m}[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mimage[0m[0;34m=[0m[0;34m'smiskov/dask-sdt-sm:latest'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mscale_num[0m[0;34m=[0m[0;36m12[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0mdistributed[0m[0;34m.[0m[0mclient[0m[0;34m.[0m[0mClient[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m <no docstring>
[0;31mFile:[0m      ~/Documents/SLAC/PVInsight/solar-data-tools/sdt_dask/clients/aws/fargate.py
[0;31mType:[0m      function

In [13]:
client = get_fargate_cluster()

Traceback (most recent call last):
  File "/Users/smiskov/opt/anaconda3/envs/pvi-dask/lib/python3.10/site-packages/distributed/comm/tcp.py", line 547, in connect
    stream = await self.client.connect(
  File "/Users/smiskov/opt/anaconda3/envs/pvi-dask/lib/python3.10/site-packages/tornado/tcpclient.py", line 275, in connect
    af, addr, stream = await connector.start(connect_timeout=timeout)
asyncio.exceptions.CancelledError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/smiskov/opt/anaconda3/envs/pvi-dask/lib/python3.10/asyncio/tasks.py", line 456, in wait_for
    return fut.result()
asyncio.exceptions.CancelledError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/Users/smiskov/opt/anaconda3/envs/pvi-dask/lib/python3.10/site-packages/distributed/comm/core.py", line 342, in connect
    comm = await wait_for(
  File "/Users/smiskov/opt/anaconda3/en

# Define SDT Dask function

In [6]:
@dask.delayed
def dask_sdt(data_plug=None, key=34, client=None):
        df = data_plug.get_data(key)
        dh = DataHandler(df)
        dh.run_pipeline(solver="QSS", solver_convex="OSQP")
        return dh.report(return_values=True, verbose=False)

# Run SDT Dask function

In [7]:
delayed_results = []    

for key in KEYS:    
    report = dask_sdt(data_plug, key)
    delayed_results.append(report)
        
results = dask.compute(*delayed_results)




total time: 43.64 seconds
--------------------------------
Breakdown
--------------------------------
Preprocessing              3.92s
Cleaning                   0.73s
Filtering/Summarizing      38.99s
    Data quality           0.24s
    Clear day detect       0.48s
    Clipping detect        34.40s
    Capacity change detect 3.87s

total time: 45.40 seconds
--------------------------------
Breakdown
--------------------------------
Preprocessing              4.04s
Cleaning                   0.39s
Filtering/Summarizing      40.98s
    Data quality           0.23s
    Clear day detect       0.56s
    Clipping detect        34.20s
    Capacity change detect 5.98s

total time: 31.55 seconds
--------------------------------
Breakdown
--------------------------------
Preprocessing              3.37s
Cleaning                   0.71s
Filtering/Summarizing      27.47s
    Data quality           0.22s
    Clear day detect       0.79s
    Clipping detect        24.12s
    Capacity change det



In [8]:
results

({'length': 1.0,
  'capacity': 109.3,
  'sampling': 15,
  'quality score': 0.9835616438356164,
  'clearness score': 0.4986301369863014,
  'inverter clipping': True,
  'clipped fraction': 0.01643835616438356,
  'capacity change': False,
  'time shift correction': False,
  'time zone correction': 0},
 {'length': 1.0,
  'capacity': 87.7,
  'sampling': 15,
  'quality score': 0.989041095890411,
  'clearness score': 0.5041095890410959,
  'inverter clipping': True,
  'clipped fraction': 0.01643835616438356,
  'capacity change': False,
  'time shift correction': False,
  'time zone correction': 0},
 {'length': 1.0,
  'capacity': 401.15,
  'sampling': 1.0,
  'quality score': 0.8876712328767123,
  'clearness score': 0.24383561643835616,
  'inverter clipping': False,
  'clipped fraction': 0.0,
  'capacity change': False,
  'time shift correction': False,
  'time zone correction': 0})