## Linear Regression 

In [31]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.figsize'] = [18, 16]
plt.rcParams['figure.dpi'] = 60
plt.rcParams['savefig.dpi'] = 60
plt.rcParams['font.size'] = 14

## Preprocessing data 

In [32]:
# Importing dataset 
# Here the dataset is a StockPrice TimeSeries for price forecasting  
data = pd.read_csv('dataset/dataset-stock-prices-google.csv')
#print(data.head())
#print(data.columns.get_loc("Adj. Open"))
# Select fews features only
# The relationship between features is valuable(Ex. High-Low difference tells the volatility of the day) 
data = data[['Adj. Open','Adj. High','Adj. Low','Adj. Close','Adj. Volume']]
# Create new features 
#   --> Rate of the volatility in percent
data['High-Low Percent diff'] = (data['Adj. High'] - data['Adj. Low']) / data['Adj. Close'] * 100.0
data['Daily Percent change'] = (data['Adj. Close'] - data['Adj. Open']) / data['Adj. Open'] * 100.0
data = data[['Adj. Close', 'High-Low Percent diff', 'Daily Percent change', 'Adj. Volume']]

# Define the label(dependent variable)
# Make "Adjusted Close" as label 
y_forecast_col = 'Adj. Close'

# Fill NaN
data.fillna(-99999, inplace=True)

# Get number of testing data (8% of the dataset) 
import math
no_test_data = int(math.ceil(0.01*len(data)))
#print(data[y_forecast_col].head())

# Get Label 
# Shift dataset from Down-to-Top so that we get the latest data
data['label'] = data[y_forecast_col].shift(periods = -no_test_data)
data.dropna(inplace=True)
#data.tail()

### Training 

In [35]:
from sklearn import preprocessing, svm
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

X = np.array(data.drop(['label'], axis=1))
y = np.array(data['label'])

# Scaling to get Normalized data 
X = preprocessing.scale(X)

# Split training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Create the linearReg model f
#model = LinearRegression()
# Create another models
model = svm.SVR(kernel='poly')
# Training
model.fit(X_train, y_train)

# Testing 
y_pred = model.predict(X_test)

print(y_pred.shape)

#print(model.score(X_test, y_test))

# The coefficients
#print('Coeff: {}'.format(model.score))
# The mean squared error
#print("MSE: {:05.2f}".format(mean_squared_error(y_test, y_pred)))
# Explained variance score: 1 is perfect prediction
#print('Variance score: {:05.2f}'.format(r2_score(y_test, y_pred)))


(1119,)
