In [1]:
## Load modules

import numpy as np
from numpy import genfromtxt
import matplotlib.pyplot as plt
%matplotlib inline
import os
cwd = os.getcwd() # current working directory

In [2]:
# data (num_samples, features)
basedir = './'
windFile = 'Firewheel_FWL'+str(1)+'_mast_file_tsec_72-months.csv'
def importWindFile(basedir, windFile):
    data = np.loadtxt(basedir+windFile, delimiter=',', skiprows=7)
    ws = np.zeros((data.shape[0],5))
    time = np.linspace(0., 10.*data.shape[0],data.shape[0])
    nt = ws.shape[0]
    hour = data[:,3]
    month = data[:,1]
    for i  in range(0,5):
        ind = 5*(i+1)
        ws[:,i] = data[:,ind]
    return ws, hour, time, nt, month


ws, hour, time, nt, month = importWindFile(basedir, windFile)


In [3]:
# Full dataset
Nt = ws.shape[0]
# Physical parameters
horizon = 18
dataHeight = 0 # index of the height we want
# Initialize
xFull = np.zeros((horizon-1,Nt))
yFull = np.zeros((Nt,1))
for i in range(horizon,Nt):
    xFull[:,i-horizon] = ws[i-horizon:i-1,dataHeight]
    yFull[i-horizon] = ws[i,dataHeight]
    
# Select the dev set
randomSeed_dev = int(np.round(np.random.rand(1)*Nt))
n_dev = int(np.round(Nt*0.01)) # use 1% of the data for dev
while randomSeed_dev > (Nt-n_dev): # make sure the random seed is not too close to the end of the dataset
    randomSeed_dev = int(np.round(np.random.rand(1)*Nt))

x_dev = xFull[:,randomSeed_dev:randomSeed_dev+n_dev]
y_dev = yFull[randomSeed_dev:randomSeed_dev+n_dev]
x_dev = x_dev.T

# Select the test set and make sure that it does not overlap with dev set
randomSeed_test = int(np.round(np.random.rand(1)*Nt))
n_test = int(np.round(Nt*0.01)) # use 1% of the data for dev
while randomSeed_test > (Nt-n_test) | np.abs( randomSeed_dev - randomSeed_test ) < n_test: #make sure that it does not overlap with dev set
    randomSeed_test = int(np.round(np.random.rand(1)*Nt))
    
x_test = xFull[:,randomSeed_test:randomSeed_test+n_dev]
y_test = yFull[randomSeed_test:randomSeed_test+n_dev]
x_test = x_dev.T


# Set the test data as the rest of it and concatenate it together
if randomSeed_dev > randomSeed_test:
    x_train = np.concatenate((xFull[:,0:randomSeed_test-1],xFull[:,randomSeed_test+n_test+1:randomSeed_dev -1]),axis=1)
    x_train = np.concatenate((x_train,xFull[:,randomSeed_dev+n_dev+1:Nt]),axis=1)
    y_train = np.concatenate((yFull[0:randomSeed_test-1],yFull[randomSeed_test+n_test+1:randomSeed_dev-1]),axis=0)
    y_train = np.concatenate((y_train,yFull[randomSeed_dev+n_dev+1:Nt]),axis=0)
    time_train = np.concatenate((time[0:randomSeed_test-1],time[randomSeed_test+n_test+1:randomSeed_dev-1]),axis=0)
    time_train = np.concatenate((time_train,time[randomSeed_dev+n_dev+1:Nt]),axis=0)
    hour_train = np.concatenate((hour[0:randomSeed_test-1],hour[randomSeed_test+n_test+1:randomSeed_dev-1]),axis=0)
    hour_train = np.concatenate((hour_train,hour[randomSeed_dev+n_dev+1:Nt]),axis=0)
    month_train = np.concatenate((month[0:randomSeed_test-1],month[randomSeed_test+n_test+1:randomSeed_dev-1]),axis=0)
    month_train = np.concatenate((month_train,month[randomSeed_dev+n_dev+1:Nt]),axis=0)
else:
    x_train = np.concatenate((xFull[:,0:randomSeed_dev-1],xFull[:,randomSeed_dev+n_dev+1:randomSeed_test -1]),axis=1)
    x_train = np.concatenate((x_train,xFull[:,randomSeed_test+n_test+1:Nt]),axis=1)
    y_train = np.concatenate((yFull[0:randomSeed_dev-1],yFull[randomSeed_dev+n_dev+1:randomSeed_test-1]),axis=0)
    y_train = np.concatenate((y_train,yFull[randomSeed_test+n_test+1:Nt]),axis=0)
    time_train = np.concatenate((time[0:randomSeed_dev-1],time[randomSeed_dev+n_dev+1:randomSeed_test-1]),axis=0)
    time_train = np.concatenate((time_train,time[randomSeed_test+n_test+1:Nt]),axis=0)
    hour_train = np.concatenate((hour[0:randomSeed_dev-1],hour[randomSeed_dev+n_dev+1:randomSeed_test-1]),axis=0)
    hour_train = np.concatenate((hour_train,hour[randomSeed_test+n_test+1:Nt]),axis=0)
    month_train = np.concatenate((month[0:randomSeed_dev-1],month[randomSeed_dev+n_dev+1:randomSeed_test-1]),axis=0)
    month_train = np.concatenate((month_train,month[randomSeed_test+n_test+1:Nt]),axis=0)
    
x_train = x_train.T
print(x_train.shape)
print(time_train.shape)
print(hour_train.shape)
print(month_train.shape)

(309190, 17)
(309190,)
(309190,)
(309190,)
a


In [4]:
# Create a function that will sort through the code and pick out the x and y sets that correspond
# with the time window we want. Pass in the training data, the start and end you want, as well as
# what timescale you want

def getdata_range(x_train,y_train, hour_train, time_train, month_train,start,end,timescale):
    # start - the starting value of the data cut
    # end - the ending value of the data cut
    # the timescale we want to cut the data in reference to
    
    
    # check to see what timescale we want to cut the data with
    if timescale == 'hour':
        timevector = hour_train
    if timescale == 'time':
        timevector = time_train
    if timescale == 'month':
        timevector = month_train
    
    #initialize
    x_train_cut =np.zeros((1,x_train.shape[1]))
    y_train_cut = np.zeros(x_train.shape[0])
    hour_train_cut = np.zeros(x_train.shape[0])
    month_train_cut = np.zeros(x_train.shape[0])
    time_train_cut = np.zeros(x_train.shape[0])
    counter = 0;

    for ii in range(0,x_train.shape[0]):


        
        #check to see if we are in the range, this is based on the y values time stamp
        if (int(timevector[ii]) >= start) & (int(timevector[ii])<= end):
            
            x_train_cut = np.concatenate((x_train_cut,  np.matrix(x_train[ii,:])),axis=0)
            y_train_cut[counter] = y_train[ii]
            hour_train_cut[counter] = hour_train[ii]
            time_train_cut[counter] = time_train[ii]
            month_train_cut[counter] = month_train[ii]
            counter = counter + 1;
    
    x_train_cut = x_train_cut[1:-1,:]
    y_train_cut = y_train_cut[0:counter]
    hour_train_cut = hour_train_cut[0:counter]
    time_train_cut = time_train_cut[0:counter]
    month_train_cut = month_train_cut[0:counter]
 

    return x_train_cut , y_train_cut , hour_train_cut , time_train_cut , month_train_cut 



In [19]:
# example of how to cut the data

# choose only the morning
start_time = 0;
end_time = 6;
timescale = 'hour'

x_train_cut , y_train_cut , hour_train_cut , time_train_cut , month_train_cut  = getdata_range(x_train,y_train, hour_train, time_train, month_train,start_time,end_time,timescale)


#choose only the morning during january
start_month = 1;
end_month = 1;
timescale = 'month'

x_train_janmorn , y_train_janmorn , hour_train_janmorn , time_train_janmorn , month_train_janmorn  = getdata_range(x_train_cut,y_train_cut,hour_train_cut,time_train_cut,month_train_cut,start_month,end_month,timescale)


In [24]:

np.max(month_train_janmorn)

1.0

In [27]:
np.max(hour_train_janmorn)

6.0

In [25]:
print(np.shape(x_train_janmorn))
print(np.shape(y_train_janmorn))
print(np.shape(month_train_janmorn))

(7811, 17)
(7812,)
(7812,)
