In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import glob, os
from sdt_dask.dask_tool.sdt_dask import SDTDask

In [3]:
SDTDask?

[0;31mInit signature:[0m [0mSDTDask[0m[0;34m([0m[0mdata_plug[0m[0;34m,[0m [0mclient[0m[0;34m,[0m [0moutput_path[0m[0;34m=[0m[0;34m'../results/'[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
A class to run the SolarDataTools pipeline on a Dask cluster.
Will handle invalid data keys and failed datasets.

:param keys: 
    data_plug (:obj:`DataPlug`): The data plug object.
    client (:obj:`Client`): The Dask client object.
    output_path (str): The path to save the results.
[0;31mFile:[0m           /mnt/c/Users/22720/OneDrive/Documents/solar-data-tools/sdt_dask/dask_tool/sdt_dask.py
[0;31mType:[0m           type
[0;31mSubclasses:[0m     

## Local Client example 
 use local csv dataplug and local client

### Instantiate a localfile data plug

In [4]:
from sdt_dask.dataplugs.csv_plug import LocalFiles

In [5]:
LocalFiles?

[0;31mInit signature:[0m [0mLocalFiles[0m[0;34m([0m[0mpath_to_files[0m[0;34m,[0m [0mext[0m[0;34m=[0m[0;34m'.csv'[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Dataplug class for retrieving data from some source. It's recommended
that user-created dataplug inherit from this class to ensure compatibility.

The initialization argument for each class will be different depending on
the source. The main requirement is to keep the ``Dataplug.get_data`` method,
and make sure the args and returns as defined here.
[0;31mFile:[0m           /mnt/c/Users/22720/OneDrive/Documents/solar-data-tools/sdt_dask/dataplugs/csv_plug.py
[0;31mType:[0m           type
[0;31mSubclasses:[0m     

In [6]:
path = "../dataplugs/example_data/"
# path = "../dataplugs/spwr_sensor_0/"
local_file_data_plug = LocalFiles(path_to_files=path)
local_file_keys = [(os.path.basename(fname)[:-4],) for fname in glob.glob(path + "*")]
local_file_keys.append(('NotExist',))
local_file_keys

[('001C4B0008A5',), ('ALL0VALUETESTFILE',), ('NotExist',)]

In [7]:
local_file_data_plug.get_data(local_file_keys[0])

Loading file 001C4B0008A5...


Unnamed: 0_level_0,ac_power_01
ts,Unnamed: 1_level_1
2017-09-13 06:22:32,0.0
2017-09-13 06:37:32,18.0
2017-09-13 06:52:32,53.0
2017-09-13 07:07:35,70.0
2017-09-13 07:22:34,136.0
...,...
2018-10-12 16:51:04,217.0
2018-10-12 17:06:04,131.0
2018-10-12 17:21:04,54.0
2018-10-12 17:36:04,15.0


### Set a local User-defined Client

In [8]:
from dask.distributed import Client

In [9]:
n_workers = 3
threads_per_worker = 1
total_system_memory = 8
memory_per_worker = total_system_memory / n_workers
local_client = Client(processes=False, memory_spill_fraction=False, memory_pause_fraction=False, memory_target_fraction=0.8, n_workers=n_workers, threads_per_worker=threads_per_worker, memory_limit=f"{memory_per_worker}GiB")
local_client.dashboard_link



'http://172.21.212.11:8787/status'

Loading file ALL0VALUETESTFILE...
Loading file NotExist...
Loading file 001C4B0008A5...

            *********************************************
            * Solar Data Tools Data Onboarding Pipeline *
            *********************************************

            This pipeline runs a series of preprocessing, cleaning, and quality
            control tasks on stand-alone PV power or irradiance time series data.
            After the pipeline is run, the data may be plotted, filtered, or
            further analyzed.

            Authors: Bennet Meyers and Sara Miskovich, SLAC

            (Tip: if you have a mosek [https://www.mosek.com/] license and have it
            installed on your system, try setting solver='MOSEK' for a speedup)

            This material is based upon work supported by the U.S. Department
            of Energy's Office of Energy Efficiency and Renewable Energy (EERE)
            under the Solar Energy Technologies Office Award Number 38529.

       


task list:   0%|                                          | 0/7 [00:00<?, ?it/s][A

Please run pipeline first.
Please run the pipeline first!
'DataHandler' object has no attribute 'total_time'




task list:  43%|██████████████▌                   | 3/7 [00:01<00:02,  1.70it/s][A
task list:  71%|████████████████████████▎         | 5/7 [00:06<00:02,  1.46s/it][A
task list: 100%|██████████████████████████████████| 7/7 [00:17<00:00,  2.53s/it][A




total time: 17.74 seconds
--------------------------------
Breakdown
--------------------------------
Preprocessing              1.77s
Cleaning                   11.24s
Filtering/Summarizing      4.73s
    Data quality           0.06s
    Clear day detect       0.09s
    Clipping detect        2.97s
    Capacity change detect 1.61s


            ************************************************
            * Solar Data Tools Degradation Estimation Tool *
            ************************************************

            Monte Carlo sampling to generate a distributional estimate
            of the degradation rate [%/yr]

            The distribution typically stabilizes in 50-100 samples.

            Author: Bennet Meyers, SLAC

            This material is based upon work supported by the U.S. Department
            of Energy's Office of Energy Efficiency and Renewable Energy (EERE)
            under the Solar Energy Technologies Office Award Number 38529.

            


10it [00:01,  5.46it/s]

P50, P02.5, P97.5: -10.147, -11.640, -6.009
changes: -8.949e-01, 0.000e+00, 0.000e+00


21it [00:03,  5.62it/s]

P50, P02.5, P97.5: -10.147, -13.197, -6.009
changes: 1.996e-01, 0.000e+00, 0.000e+00


31it [00:05,  5.49it/s]

P50, P02.5, P97.5: -8.507, -13.185, -6.010
changes: -7.508e-02, 4.304e-03, -7.869e-05


41it [00:07,  5.48it/s]

P50, P02.5, P97.5: -8.357, -13.142, -6.010
changes: -2.724e-03, 4.304e-03, -7.869e-05


51it [00:09,  5.49it/s]

P50, P02.5, P97.5: -8.476, -13.099, -5.596
changes: 1.540e-02, 4.304e-03, -2.383e-02


60it [00:11,  5.86it/s]

P50, P02.5, P97.5: -8.607, -13.056, -5.835
changes: -3.382e-02, 4.304e-03, -2.383e-02


71it [00:13,  5.79it/s]

P50, P02.5, P97.5: -8.584, -12.990, -6.010
changes: -2.237e-02, 1.285e-02, -7.869e-05


81it [00:14,  6.16it/s]

P50, P02.5, P97.5: -8.717, -13.238, -5.860
changes: -5.527e-02, -9.102e-02, -5.478e-03


91it [00:16,  5.33it/s]

P50, P02.5, P97.5: -8.717, -13.223, -5.843
changes: 4.313e-02, 1.493e-03, 6.598e-02


101it [00:18,  5.49it/s]

P50, P02.5, P97.5: -8.753, -13.208, -5.866
changes: 7.395e-03, 1.493e-03, -2.325e-03


111it [00:20,  5.53it/s]

P50, P02.5, P97.5: -8.753, -13.185, -5.790
changes: -1.787e-02, 4.304e-03, -2.336e-05


120it [00:22,  4.76it/s]

P50, P02.5, P97.5: -8.717, -13.142, -5.386
changes: 1.787e-02, 4.304e-03, -1.479e-02


131it [00:24,  5.75it/s]

P50, P02.5, P97.5: -8.607, -13.223, -5.534
changes: -1.145e-02, 1.493e-03, -1.479e-02


141it [00:25,  5.25it/s]

P50, P02.5, P97.5: -8.607, -13.208, -5.682
changes: -1.145e-02, 1.493e-03, -1.479e-02


151it [00:27,  5.23it/s]

P50, P02.5, P97.5: -8.717, -13.185, -5.790
changes: -5.202e-02, 4.304e-03, -2.336e-05


160it [00:29,  4.76it/s]

P50, P02.5, P97.5: -8.717, -13.142, -5.791
changes: 1.666e-02, 4.304e-03, -2.336e-05


170it [00:31,  5.66it/s]

P50, P02.5, P97.5: -8.717, -13.099, -5.791
changes: 1.288e-02, 4.304e-03, -2.336e-05


181it [00:33,  5.44it/s]

P50, P02.5, P97.5: -8.743, -13.056, -5.791
changes: 3.788e-03, 4.304e-03, -2.336e-05


187it [00:34,  5.35it/s]


Performing loss factor analysis...

                    ***************************************
                    * Solar Data Tools Loss Factor Report *
                    ***************************************

                    degradation rate [%/yr]:                    -9.145
                    deg. rate 95% confidence:          [-13.016, -5.790]
                    total energy loss [kWh]:                -1822848.9
                    bulk deg. energy loss (gain) [kWh]:      -345118.3
                    soiling energy loss [kWh]:                -89326.8
                    capacity change energy loss [kWh]:          -643.3
                    weather energy loss [kWh]:               -884895.9
                    system outage loss [kWh]:                -502864.5
                    


In [10]:
%%time
dask_tool = SDTDask(local_file_data_plug, local_client, output_path="../results/")

dask_tool.set_up(local_file_keys, fix_shifts=True, verbose=True)
dask_tool.get_result()

CPU times: user 48.7 s, sys: 7.26 s, total: 56 s
Wall time: 53.9 s


## AWS Fargate Client example
use LocalFile dataplug and fargate client

### Instantiate a pvdaq data plug


In [11]:
from sdt_dask.dataplugs.pvdaq_plug import PVDAQPlug

In [12]:
PVDAQPlug?

In [13]:
pvdaq_data_plug = PVDAQPlug()

### Set up a Fargate cluster client


In [14]:
from sdt_dask.clients.aws.fargate import Fargate

In [15]:
# Use should define an environment variable for secret key
PA_NUMBER = os.getenv("project-pa-number")
AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION')
ENVIRONMENT = {
    'AWS_ACCESS_KEY_ID' : os.getenv('AWS_ACCESS_KEY_ID'),
    'AWS_SECRET_ACCESS_KEY' : os.getenv('AWS_SECRET_ACCESS_KEY')
}

In [16]:
# The Tag, VPC, image, workers, threads per worker and environment need to be user defined and passed to the client class
TAGS = {
    "project-pa-number": PA_NUMBER,
    "project": "pvinsight"
}
VPC = "vpc-ab2ff6d3" # for us-west-2
IMAGE = "nimishy/sdt-windows:latest"

WORKERS = 3
THREADS_PER_WORKER = 1

In [17]:
fargate_client = Fargate().init_client(image=IMAGE, 
                               tags=TAGS, 
                               vpc=VPC, 
                               region_name=AWS_DEFAULT_REGION,
                               environment=ENVIRONMENT,
                               n_workers=WORKERS,
                               threads_per_worker=THREADS_PER_WORKER
                               )

[i] Initilializing Fargate Cluster ...


  next(self.gen)


[i] Initialized Fargate Cluster
[i] Initilializing Dask Client ...
[>] Dask Dashboard: http://54.188.106.107:8787/status


In [18]:
pvdaq_keys = [(34, 2011), (35, 2015), (51,2012)]
dask_tool = SDTDask(pvdaq_data_plug, fargate_client, output_path="../results/")
dask_tool.set_up(pvdaq_keys, fix_shifts=True, verbose=True)
dask_tool.get_result()

## Azure Client example