# Predicting Stock Price of multiple oil & gas companies
First let's import the libraries and download the data needed.

In [1]:
import pandas as pd
import datetime
import pandas_datareader.data as web

start = datetime.datetime(2010, 1, 1)
end   = datetime.datetime(2017, 1, 11)

df = web.DataReader(['DVN', 'APA', 'EOG', 'COP', 'MRO'],'yahoo',start=start,end=end)['Adj Close']

In [2]:


#calculation of High Low Percentage and Percentage Change

dfreg = df.loc[:,['Adj Close','Volume']]
dfreg['HL_PCT'] = (df['High'] - df['Low']) / df['Close'] * 100.0
dfreg['PCT_change'] = (df['Close'] - df['Open']) / df['Open'] * 100.0

#Pre-processing & Cross Validation
from sklearn import preprocessing
import numpy as np
import math

# Drop missing value
dfreg.fillna(value=-99999, inplace=True)
# We want to separate 1 percent of the data to forecast
forecast_out = int(math.ceil(0.01 * len(dfreg)))
# Separating the label here, we want to predict the AdjClose
forecast_col = 'Adj Close'
dfreg['label'] = dfreg[forecast_col].shift(-forecast_out)
X = np.array(dfreg.drop(['label'], 1))
# Scale the X so that everyone can have the same distribution for linear regression
X = preprocessing.scale(X)
# Finally We want to find Data Series of late X and early X (train) for model generation and evaluation
X_lately = X[-forecast_out:]
X = X[:-forecast_out]
# Separate label and identify it as y
y = np.array(dfreg['label'])
y = y[:-forecast_out]

#Model Generation
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor

from sklearn.linear_model import Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline


# We will plug and play the existing Scikit-Learn library and train
# the model by selecting our X and y train sets. The code will be as following.

# Linear regression
clfreg = LinearRegression(n_jobs=-1)
clfreg.fit(X,y) # x_train, y_train

# Quadratic Regression 2
clfpoly2 = make_pipeline(PolynomialFeatures(2), Ridge())
clfpoly2.fit(X,y)

# Quadratic Regression 3
clfpoly3 = make_pipeline(PolynomialFeatures(3), Ridge())
clfpoly3.fit(X,y)

# KNN Regression
clfknn = KNeighborsRegressor(n_neighbors=2)
clfknn.fit(X,y)

#EVALUATION
confidencereg = clfreg.score(X,y) #x_test, y_test
confidencepoly2 = clfpoly2.score(X,y)
confidencepoly3 = clfpoly3.score(X,y)
confidenceknn = clfknn.score(X,y)

#print some of the stocks forecast
forecast_set = clfreg.predict(X_lately)
dfreg['Forecast'] = np.nan

#plotting the prediction
last_date = dfreg.iloc[-1].name
last_unix = last_date
next_unix = last_unix + datetime.timedelta(days=1)

for i in forecast_set:
    next_date = next_unix
    next_unix += datetime.timedelta(days=1)
    dfreg.loc[next_date] = [np.nan for _ in
range(len(dfreg.columns)-1)]+[i]

dfreg['Adj Close'].tail(500).plot()
dfreg['Forecast'].tail(500).plot()

import matplotlib.pyplot as plt
from matplotlib import style
import matplotlib as mpl
mpl.rc('figure', figsize=(8,7))
mpl.__version__

#adjusting tje style of matplotlib
style.use('ggplot')

plt.legend(loc=4)
plt.xlabel('Date')
plt.ylabel('Price')
plt.show()

KeyError: "None of [Index(['Adj Close', 'Volume'], dtype='object', name='Symbols')] are in the [columns]"