In [61]:
import numpy as np
import pandas as pd
import quandl
import datetime

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

import matplotlib.pyplot as plt
%matplotlib inline

In [25]:
# Import API key from file
import API_config
quandl.ApiConfig.api_key = API_config.API_KEY

In [65]:
# Set start and end date for stock prices
start_date = datetime.date(2009, 3,8)
end_date = datetime.date.today()
# Load data from Quandl
data = quandl.get('FSE/SAP_X', start_date=start_date, end_date=end_date)
# Save data to CSV file
data.to_csv('data/sap_stock.csv')

In [69]:
# Create DataFrame with only closing price and date
df = pd.DataFrame(data, columns=['Close'])
# Reset index column so that date is proper column
df = df.reset_index()

In [70]:
df.head()

Unnamed: 0,Date,Close
0,2009-03-09,25.59
1,2009-03-10,26.87
2,2009-03-11,26.64
3,2009-03-12,26.18
4,2009-03-13,25.73


In [71]:
# Check data types in columns
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2549 entries, 0 to 2548
Data columns (total 2 columns):
Date     2549 non-null datetime64[ns]
Close    2549 non-null float64
dtypes: datetime64[ns](1), float64(1)
memory usage: 39.9 KB


In [54]:
# Check for missing values in relevant columns 
print(df['Close'].isna().sum())
print(df['Date'].isna().sum())

0
0


In [72]:
# Split data into train and test set: 80% / 20%
train, test = train_test_split(df, test_size=0.20)

In [79]:
# Plot train set


In [88]:
# Reshape index column to 2D array for .fit() method
X_train = np.array(train.index).reshape(-1, 1)
y_train = train['Close']

In [89]:
# Create LinearRegression Object & fit linear model
model = LinearRegression()
# Fit linear model
model.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [91]:
model.coef_

array([0.02763423])

In [92]:
model.intercept_

25.703095708906183

In [90]:
slope = np.asscalar(np.squeeze(model.coef_))
intercept = model.intercept_
print('slope: ', slope, 'intercept: ', intercept)

slope:  0.0276342327551606 intercept:  25.703095708906183
