# Importing all the useful libraries 

In [5]:
#This program predicts stock prices using Machine Learning

import quandl #for dataset
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

# Fetching the dataset

In [6]:
#Get the stock data

df=quandl.get("WIKI/AMZN") #stock prices of Amazon 
#for Facebook use 'FB' instead of 'AMZN'


#Peek-a-bow at data
print(df.head(3))


             Open   High    Low  Close     Volume  Ex-Dividend  Split Ratio  \
Date                                                                          
1997-05-16  22.38  23.75  20.50  20.75  1225000.0          0.0          1.0   
1997-05-19  20.50  21.25  19.50  20.50   508900.0          0.0          1.0   
1997-05-20  20.75  21.00  19.63  19.63   455600.0          0.0          1.0   

            Adj. Open  Adj. High  Adj. Low  Adj. Close  Adj. Volume  
Date                                                                 
1997-05-16   1.865000   1.979167  1.708333    1.729167   14700000.0  
1997-05-19   1.708333   1.770833  1.625000    1.708333    6106800.0  
1997-05-20   1.729167   1.750000  1.635833    1.635833    5467200.0  


In [7]:
# Get the Adjusted Close Price of the stock
df=df[['Adj. Close']]

#Peek-a-bow
print(df.head())

            Adj. Close
Date                  
1997-05-16    1.729167
1997-05-19    1.708333
1997-05-20    1.635833
1997-05-21    1.427500
1997-05-22    1.395833


In [8]:
#A variable for predicting 'n' days out into the future
forecast_out=30      #'n' days


#creating a new coloumn for the target/Dependent variable

df['Prediction']=df[['Adj. Close']].shift(-forecast_out)
print(df.head())

            Adj. Close  Prediction
Date                              
1997-05-16    1.729167    1.541667
1997-05-19    1.708333    1.515833
1997-05-20    1.635833    1.588333
1997-05-21    1.427500    1.911667
1997-05-22    1.395833    2.000000


# Creating the Independent Data set (X)

In [9]:
#Convert the dataframe to a numpy array
X=np.array(df.drop(['Prediction'],1))

#Remove the last 'n' rows
X=X[:-forecast_out]

print(X)

[[   1.72916667]
 [   1.70833333]
 [   1.63583333]
 ...
 [1350.47      ]
 [1338.99      ]
 [1386.23      ]]


# Create the dependent data set (Y)

In [10]:

#Convert the dataframe to a numpy array(All of the values the NaN)
Y=np.array(df['Prediction'])


# Get all of the y values execpt the last 'n' rows
Y=Y[:-forecast_out]

print(Y)


[1.54166667e+00 1.51583333e+00 1.58833333e+00 ... 1.49556000e+03
 1.55586000e+03 1.49705000e+03]


#  Splitting the data set into TESTING and TRAINING Sets

#### Here we are using 80:20 ratio, 

In [11]:
#Split the data into 80% training and 20% testing

x_train,x_test,y_train,y_test=train_test_split(X,Y, test_size=0.2)

# Support Vector Machine Model

#### Support Vector Machine” (SVM) is a supervised machine learning algorithm which can be used for both classification or regression challenges. However, it is mostly used in classification problems. In this algorithm, we plot each data item as a point in n-dimensional space (where n is number of features you have) with the value of each feature being the value of a particular coordinate. Then, we perform classification by finding the hyper-plane that differentiate the two classes very well

In [12]:
#Create and Train the Support Vectorn Machine (Regressor)

svr_rbf=SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_rbf.fit(x_train,y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [13]:
#Testing Model Accuracy : returns the coefficient of the determination R^2 of the prediction
#The best possible score is 1.0

svm_confidence=svr_rbf.score(x_test,y_test)

print("SVM Confidence", svm_confidence)

SVM Confidence 0.9190193993453613


# Linear Regression Model

### A model that assumes a linear relationship between the input variables (x) and the single output variable (y). More specifically, that y can be calculated from a linear combination of the input variables (x).

In [14]:
#Create and Train the Linear Regression Model
lr=LinearRegression()

#train the model
lr.fit(x_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [15]:
#Testing Model Accuracy : returns the coefficient of the determination R^2 of the prediction
#The best possible score is 1.0

lr_confidence=lr.score(x_test,y_test)

print("SVM Confidence", lr_confidence)

SVM Confidence 0.9894178024349112


Setting the value of forecast(n) i.e. number of days in the future.

In [16]:
#Set  x_forecast equal to the last 30 rows of the original data from Adj, Close column
x_forecast=np.array(df.drop(['Prediction'],1))[-forecast_out:]
print(x_forecast)

[[1414.51]
 [1451.05]
 [1461.76]
 [1448.69]
 [1468.35]
 [1482.92]
 [1484.76]
 [1500.  ]
 [1521.95]
 [1511.98]
 [1512.45]
 [1493.45]
 [1500.25]
 [1523.61]
 [1537.64]
 [1545.  ]
 [1551.86]
 [1578.89]
 [1598.39]
 [1588.18]
 [1591.  ]
 [1582.32]
 [1571.68]
 [1544.93]
 [1586.51]
 [1581.86]
 [1544.1 ]
 [1495.56]
 [1555.86]
 [1497.05]]


# Prediction 

The last and the only step we are left with is to predict the values of "Closing Price" of the stock.

In [17]:
#Print LR Model predictions for the next 'n' days (30 days in our case)
lr_prediction=lr.predict(x_forecast)
print(lr_prediction)



[1500.02520904 1538.86132207 1550.24432072 1536.35302358 1557.24842539
 1572.73398004 1574.68960259 1590.88725893 1614.21656055 1603.62006227
 1604.11959629 1583.92566779 1591.15296852 1615.9808722  1630.89249414
 1638.71498434 1646.00605537 1674.73457576 1695.45992344 1684.60834397
 1687.60554809 1678.38011128 1667.07151132 1638.64058566 1682.83340393
 1677.89120564 1637.75842983 1586.16825669 1650.25740874 1587.75188582]


In [18]:
#Print SVm Model predictions for the next 'n' days (30 days in our case)
svm_prediction=svr_rbf.predict(x_forecast)
print(svm_prediction)


[1048.2888614   660.36811275  659.35973841  687.2062306   659.35973841
  659.35973841  659.35973841  659.35973841  659.35973841  659.35973841
  659.35973841  659.35973841  659.35973841  659.35973841  659.35973841
  659.35973841  659.35973841  659.35973841  659.35973841  659.35973841
  659.35973841  659.35973841  659.35973841  659.35973841  659.35973841
  659.35973841  659.35973841  659.35973841  659.35973841  659.35973841]
