In [1]:
from math import sqrt
import numpy as np
from numpy import split
from numpy import array
from pandas import read_csv
import pandas as pd
from sklearn.metrics import mean_squared_error,mean_absolute_error
from matplotlib import pyplot
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import HuberRegressor
from sklearn.linear_model import Lars
from sklearn.linear_model import LassoLars
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import SGDRegressor
pd.options.display.float_format='{:,.20}'.format

In [2]:
# split a univariate dataset into train/test sets
def split_dataset(data):
	# split into standard weeks
	train, test = data[5:140], data[140:175]
	# restructure into windows of weekly data
	train = array(split(train, len(train)/5))
	test = array(split(test, len(test)/5))
	return train, test

In [3]:
 
# evaluate one or more weekly forecasts against expected values
def evaluate_forecasts(actual, predicted):
    scores = list()
    
    print("Printing actual and predicted " , actual[0 , :] , predicted[0,:])
    
    # calculate an RMSE score for each day
    for i in range(actual.shape[1]):
        # calculate mse
        mse = mean_absolute_error(actual[:, i], predicted[:, i])
        # calculate rmse
        rmse = mse
        # store
        scores.append(rmse)
    # calculate overall RMSE
    s = 0
    for row in range(actual.shape[0]):
        for col in range(actual.shape[1]):
            s += np.abs((actual[row,col] - predicted[row,col]))
    score = (s / (actual.shape[0] * actual.shape[1]))
    return score, scores

In [4]:
# summarize scores
def summarize_scores(name, score, scores):
	s_scores = ', '.join(['%.15f' % s for s in scores])
	print('%s: [%.15f] %s' % (name, score, s_scores))
 
# prepare a list of ml models
def get_models(models=dict()):
	# linear models
	models['lr'] = LinearRegression()
	models['lasso'] = Lasso()
	models['ridge'] = Ridge()
	models['en'] = ElasticNet()
	models['huber'] = HuberRegressor()
	models['lars'] = Lars()
	models['llars'] = LassoLars()
	models['pa'] = PassiveAggressiveRegressor(max_iter=1000, tol=1e-3)
	models['ranscac'] = RANSACRegressor()
	models['sgd'] = SGDRegressor(max_iter=1000, tol=1e-3)
	print('Defined %d models' % len(models))
	return models

In [5]:
# create a feature preparation pipeline for a model
def make_pipeline(model):
	steps = list()
	# standardization
	steps.append(('standardize', StandardScaler()))
	# normalization
	steps.append(('normalize', MinMaxScaler()))
	# the model
	steps.append(('model', model))
	# create pipeline
	pipeline = Pipeline(steps=steps)
	return pipeline

In [6]:
# make a recursive multi-step forecast
def forecast(model, input_x, n_input):
	yhat_sequence = list()
	input_data = [x for x in input_x]
	for j in range(5):
		# prepare the input data
		X = array(input_data[-n_input:]).reshape(1, n_input)
		# make a one-step forecast
		yhat = model.predict(X)[0]
		# add to the result
		yhat_sequence.append(yhat)
		# add the prediction to the input
		input_data.append(yhat)
	return yhat_sequence

In [7]:
# convert windows of weekly multivariate data into a series of total power
def to_series(data):
	# extract just the total power from each week
	series = [week[:, 14] for week in data]
	# flatten into a single series
	series = array(series).flatten()
	return series

In [8]:
# convert history into inputs and outputs
def to_supervised(history, n_input):
	# convert history to a univariate series
	data = to_series(history)
	X, y = list(), list()
	ix_start = 0
	# step over the entire history one time step at a time
	for i in range(len(data)):
		# define the end of the input sequence
		ix_end = ix_start + n_input
		# ensure we have enough data for this instance
		if ix_end < len(data):
			X.append(data[ix_start:ix_end])
			y.append(data[ix_end])
		# move along one time step
		ix_start += 1
	return array(X), array(y)

In [9]:
# fit a model and make a forecast
def sklearn_predict(model, history, n_input):
	# prepare data
	train_x, train_y = to_supervised(history, n_input)
	# make pipeline
	pipeline = make_pipeline(model)
	# fit the model
	pipeline.fit(train_x, train_y)
	# predict the week, recursively
	yhat_sequence = forecast(pipeline, train_x[-1, :], n_input)
	return yhat_sequence

In [10]:
# evaluate a single model
def evaluate_model(model, train, test, n_input):
	# history is a list of weekly data
	history = [x for x in train]
	# walk-forward validation over each week
	predictions = list()
	for i in range(len(test)):
		# predict the week
		yhat_sequence = sklearn_predict(model, history, n_input)
		# store the predictions
		predictions.append(yhat_sequence)
		# get real observation and add to history for predicting the next week
		history.append(test[i, :])
	predictions = array(predictions)
	# evaluate predictions days for each week
	score, scores = evaluate_forecasts(test[:, :, 14], predictions)
	return score, scores

In [11]:
# load the new file
df = read_csv('1.csv')

df1 = pd.to_datetime(df['Epoch_Time_of_Clock'])
df['Epoch_Time_of_Clock']=df1
df.index = pd.to_datetime(df['Epoch_Time_of_Clock'],format='%Y-%m-%d %H:%M:%S')

df1 = pd.concat([df['sqrt_A'] , df['e'],df['i0'],df['omega'],df['OMEGA'],df['Del_n'],df['M0']],axis=1)
df1.head()

df3 = pd.DataFrame()

for i in range(1, 6):
    df3["lag_{}".format(i)] = df1.e.shift(i)
    
dataset=df3
# split into train and test
train, test = split_dataset(dataset.values)
# prepare the models to evaluate
models = get_models()
n_input = 5
# evaluate each model
days = ['one', 'two', 'three', 'four', 'five']
for name, model in models.items():
    # evaluate and get scores
    score, scores = evaluate_model(model, train, test, n_input)
    # summarize scores
    print("Score " , score)
    summarize_scores(name, score, scores)
    # plot scores
    pyplot.plot(days, scores, marker='o', label=name)
# show plot
pyplot.legend()
pyplot.show()

Defined 10 models


IndexError: index 14 is out of bounds for axis 1 with size 5

In [None]:
dataset.head()

In [None]:
df = pd.read_csv('1.csv')

In [None]:
df.columns

In [None]:
train, test = split_dataset(df.values)

In [None]:
train.shape

In [None]:
train[0]

In [None]:
models = get_models()

In [None]:
models['lr']

In [None]:
hist = [x for x in train]
hist

In [None]:
len(hist)

In [None]:
hist[0].shape

In [None]:
train_x , train_y = to_supervised(hist , 5)

In [None]:
train_x[0]

In [None]:
train_y[0]

In [None]:
pipeline = make_pipeline(models['lr'])

In [None]:
pipeline.fit(train_x,train_y)

In [None]:
input_x = train_x[-1, : ]
input_x

In [None]:
input_data = [x for x in input_x]
input_data

In [None]:
yhat_sequence = []
for j in range(5):
    # prepare the input data
    X = array(input_data[-n_input:]).reshape(1, n_input)
    print(X)
    # make a one-step forecast
    yhat = model.predict(X)[0]
    print("**")
    print(yhat)
    # add to the result
    yhat_sequence.append(yhat)
    # add the prediction to the input
    input_data.append(yhat)

In [None]:
test.shape

In [None]:
# history is a list of weekly data
history = [x for x in train]
# walk-forward validation over each week
predictions = list()
for i in range(len(test)):
    # predict the week
    yhat_sequence = sklearn_predict(model, history, n_input)
    # store the predictions
    predictions.append(yhat_sequence)
    # get real observation and add to history for predicting the next week
    history.append(test[i, :])
predictions = array(predictions)

In [None]:
predictions

In [None]:
score, scores = evaluate_forecasts(test[:, :, 14], predictions)

In [None]:
score

In [None]:
scores

In [None]:
test[:, :, 14]

In [None]:
dataset = pd.read_csv('1.csv')
# split into train and test
train, test = split_dataset(dataset.values)
# prepare the models to evaluate
models = get_models()
n_input = 5
# evaluate each model
days = ['one', 'two', 'three', 'four', 'five']
for name, model in models.items():
    # evaluate and get scores
    score, scores = evaluate_model(model, train, test, n_input)
    # summarize scores
    print("Score " , score)
    summarize_scores(name, score, scores)
    # plot scores
    pyplot.plot(days, scores, marker='o', label=name)
# show plot
pyplot.legend()
pyplot.show()