In [11]:
# do not upload this cell! 
API_PATH = '/Users/hkromer/01_Projects/10.SolarAnlage/01.Analytics/solarAnalytics/2020-01-14.ETL/01.Original_data/DarkSkyAPI.pw'

LOCATION_PATH = '/Users/hkromer/01_Projects/10.SolarAnlage/01.Analytics/solarAnalytics/2020-01-14.ETL/01.Original_data/location.pw'


# ETL of DarkSky Data

In this project, the extraction of weather data with python3 using the DarkSky API (https://darksky.net/dev) is covered. This data is to be used in forecasting of photovoltaic energy production. Signup on DarkSky is free, as long as one stays below 1000 API calls per 24 hours. 

The way this pipeline will be laid out is that the DarkSky API is called and returns hourly, historical data for each day that is requested. The response from the API in the form of json files will be stored locally and then be processed into one big csv file with all the data from the API. For the final purpose of this dataset, the forecasting of photovoltaic energy production, not all of the data in DarkSky must be relevant. 

## Import

In [14]:
import datetime
import pandas as pd
import requests
import re
import json
import os
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [26]:
date_str = '2017-10-01'

# convert to a timestamp at mid-day
date = datetime.datetime.strptime(date_str, '%Y-%m-%d') + pd.DateOffset(hours=12)

date

Timestamp('2017-10-01 12:00:00')

In [None]:
# date range
startDate = '2017-10-01'
endDate = '2019-06-29'

# convert to a time delta
d_from_date = datetime.datetime.strptime(startDate, '%Y-%m-%d')
d_to_date = datetime.datetime.strptime(endDate , '%Y-%m-%d')
delta = d_to_date - d_from_date

# for every day in the date range, get the data
for i in range(delta.days+1):
    
    new_date = (d_from_date + datetime.timedelta(days=i)) + pd.DateOffset(hours=12)

    # convert to datetime
    new_date = new_date.to_pydatetime()  

    # convert the date to UNIX time
    unix_time = int(datetime.datetime.timestamp(new_date))

    print(unix_time)
    print(new_date)

In [None]:
class DarkSkyETL():
    
    def __init__(self, DarkSkyAPI, location):
        self.API = DarkSkyAPI
        self.LOCATION = location
        
    def call_API(self, date_str):
        """
        Calls the DarkSky API for the specified location at the specified date, which must be a string in the
        format YYYY-00-DD. Asks for hourly data and returns a json object for this date.
        """
        # option list for API
        option_list = "exclude=currently,minutely,alerts&units=si"
        
        # convert to a timestamp at mid-day
        date = datetime.datetime.strptime(date_str, '%Y-%m-%d') + pd.DateOffset(hours=12)
        
        # convert the date to unit time
        unix_time = int(datetime.datetime.timestamp(date))
        
        # call API
        latitude = LOCATION[0]
        longitude = LOCATION[1]
        response = requests.get("https://api.darksky.net/forecast/"+DarkSkyAPI+"/"+latitude+","+longitude+","+str(unix_time)+"?"+option_list)
        json_res = response.json()

        return json_res