# Wind Speed Forecasting

# Import Required Libraries

In [None]:
# Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import boto3
import warnings
import matplotlib
import configparser
import logging
import io
import requests
import os
import json
import csv
import psycopg2
import datetime

from docopt import docopt
from scipy.optimize import fmin_l_bfgs_b
from datetime import datetime as dt
from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn import linear_model

matplotlib.style.use('ggplot')
warnings.filterwarnings("ignore")

# Main Method

In [None]:
def main():
    # docopt turns command-line argument parsing on its head. 
    # Instead of parsing the arguments, you just write the usage string for your program, 
    # and docopt parses the usage string and uses it to extract the command line arguments.
    args = docopt(__doc__)
    
    # Use the ConfigParser module to manage user-editable configuration files.
    config = configparser.ConfigParser()
    config.read('config.ini')
    
    # Run argument
    run = args['--run']
    
    # Yield Production Endpoint
    yeild_production_endpoint = args['--yp_endpoint']
    
    # Module name
    module_name = 'hw.py'
    
    # Alpha, Beta, Gamma
    alpha, beta, gamma = 0.5, 0.5, 0.5
    print("Initial values of alpha, beta and gamma: "+ alpha, beta, gamma)
    initial_values = np.array([alpha, beta, gamma])
    
    # Set boundaries for alpha, beta and gamma
    boundaries = [(0,1), (0,1), (0,1)]
    
    type_of_model = 'multiplicative'
    
    # Get turbines list
    turbines_list = get_turbines_list()
    
    # Check turbine list data
    if len(turbines_list) == 0:
        LOGGER.warning('[%s] No turbines found for turbineid [%s] windfarm [%s] and model [%s].',
                       MODULE_NAME, turbine_dt_id, windfarm_name, model_type)
    
    # Sample list
    turbines_list = [('7890', 'DSPU01', 'E48', 'PUSHPATHUR')]
        
    # Validation of tubine list elements
    #--------------To be implemented-----------------
    for (turbine_id, scada_id, turbine_model, windfarm_location) in turbines_list:
        
        # Check for empty fields
        if (str(turbine_id) == "" or str(scada_id) == "" or str(turbine_model) == "" or str(windfarm_location) == ""):
            LOGGER.warning('None of the field should be empty')
    
        # Get train and test data
        train, test = read_data(scada_id, windfarm_location)
        
        
    
if __name__ == '__main__':
    main()

# read_data() function

In [None]:
def read_data(scada_id, windfarm_location):
    
    # Data is present in s3 bucket
    
    """
    Boto is the Amazon Web Services (AWS) SDK for Python. 
    It enables Python developers to create, configure, and manage AWS services, such as EC2 and S3.
    Boto provides an easy to use, object-oriented API, as well as low-level access to AWS services.
    """
    s3 = boto3.resource('s3')
    
    # Data location path
    data_path = 'yield-prediction/data/' + str(windfarm_location) + '/' + str(scada_id) + '.csv'
    
    # Metadata
    metadata = s3.Object(bucket_name = 'sagemaker_digital_twin', key = data_path).get()['Body']
    
    # Data frame
    df = pd.read_csv(io.BytesIO(metadata.read()))
    
    # Pre-process data
    df = pre_processor(df)
    
    # Recent storage date
    last_stored_datetime = ???
    
    # Training data period
    start_datetime = last_stored_datetime - datetime.timedelta(hours = 4*30*24)  # (4*30*24)-> total hours in 4 months.
    
    # Prediction date
    end_date = last_stored_datetime + datetime.timedelta(hours = 24)
    
    # Prediction period
    prediction_period = 24
    
    training_data_length = len(df.loc[df.index <= last_stored_datetime])
    
    training_data = df.iloc[0:training_data_length, :]
    test_data = ???
    
    
    
    
    
    
    
    

# pre_processor() function

In [None]:
def pre_processor(df):
    """-------Do Something----------"""
    return df

# get_turbines_list() function

In [None]:
def get_turbines_list():
    # Empty list to store assets details
    asset_id_list = []
    
    # Authentication/Authorization headers
    headers_auth = {
        'content-type': "application/json",
        'accept': "application/json",
        'Authorization': getToken()
    }
    
    # URL of assets
    url = "https://dev.dtconf.atos.net/dt-asset-service-rest-api/assets"
    
    # Response 
    response = requests.get(url, headers = headers_auth)
    
    if response.status_code == 200:
        try:
            json = response.json()
            for i in range(len(json)):
                fabric_id = json[i]['assetID']
                scada_id = json[i]['iotAssetID']
                turbine_model = json[i]['modelName']
                windfarm_location = json[i]['locationName']
                asset_id_list.append((fabric_id, scada_id, turbine_model, windfarm_location))
            
        # Catch exceptions
        except ValueError as ve:
            LOGGER.exception('')
            raise ve
        except Exception as ex:
            LOGGER.exception('[%s] JSON object does not contain required keys.', module_name)
            raise ex
        return asset_id_list
    
    # Response status code != 200
    LOGGER.exception('[%s] Error in calling user management API. HTTP status code [%s] , HTTP reason [%s]',
                     module_name, response.status_code, response.reason)
    raise requests.HTTPError('Error in calling assets API')