# Step 1: Create a view on the data

Analyze the data from the ORACLE table and create a view on top of it. The users don't have to know the data comes from Oracle. It just looks like it comes from SQL Server.

In [6]:
DROP VIEW IF EXISTS rental_data;
GO
CREATE VIEW rental_data AS
SELECT * FROM oracle.rental_data;
GO

# Step 2: Train and generate a model

Create a table to store the model, create a procedure to generate the model based on prediction data, and then execute the procedure and store the results of the model (binary) into the table. The source of the data is the view that uses Oracle as a base.

In [7]:
USE TutorialDB
GO
DROP TABLE IF EXISTS rental_py_models;
GO
CREATE TABLE rental_py_models (
	model_name VARCHAR(30) NOT NULL DEFAULT('default model') PRIMARY KEY,
	model VARBINARY(MAX) NOT NULL
);
GO
-- Stored procedure that trains and generates a Python model using the rental_data and a decision tree algorithm
DROP PROCEDURE IF EXISTS generate_rental_py_model;
go
CREATE PROCEDURE generate_rental_py_model (@trained_model varbinary(max) OUTPUT)
AS
BEGIN
    EXECUTE sp_execute_external_script
      @language = N'Python'
    , @script = N'
from sklearn.linear_model import LinearRegression
import pickle

df = rental_train_data

# Get all the columns from the dataframe.
columns = df.columns.tolist()

# Store the variable well be predicting on.
target = "RentalCount"

# Initialize the model class.
lin_model = LinearRegression()

# Fit the model to the training data.
lin_model.fit(df[columns], df[target])

#Before saving the model to the DB table, we need to convert it to a binary object
trained_model = pickle.dumps(lin_model)'

, @input_data_1 = N'select "RentalCount", "Year", "Month", "Day", "WeekDay", "Snow", "Holiday" from dbo.rental_data where Year < 2015'
, @input_data_1_name = N'rental_train_data'
, @params = N'@trained_model varbinary(max) OUTPUT'
, @trained_model = @trained_model OUTPUT;
END;
GO
TRUNCATE TABLE rental_py_models;
GO
DECLARE @model VARBINARY(MAX);
EXEC generate_rental_py_model @model OUTPUT;
INSERT INTO rental_py_models (model_name, model) VALUES('linear_model', @model);
GO

 # Step 3: Create objects to run the prediction model and store the results

 Create a procedure and table to store the results of the prediction

In [8]:
DROP PROCEDURE IF EXISTS py_predict_rentalcount;
GO
CREATE PROCEDURE py_predict_rentalcount (@model varchar(100))
AS
BEGIN
	DECLARE @py_model varbinary(max) = (select model from rental_py_models where model_name = @model);

	EXEC sp_execute_external_script
				@language = N'Python',
				@script = N'

# Import the scikit-learn function to compute error.
from sklearn.metrics import mean_squared_error
import pickle
import pandas as pd

rental_model = pickle.loads(py_model)

df = rental_score_data

# Get all the columns from the dataframe.
columns = df.columns.tolist()

# variable we will be predicting on.
target = "RentalCount"

# Generate our predictions for the test set.
lin_predictions = rental_model.predict(df[columns])
print(lin_predictions)

# Compute error between our test predictions and the actual values.
lin_mse = mean_squared_error(lin_predictions, df[target])
#print(lin_mse)

predictions_df = pd.DataFrame(lin_predictions)

OutputDataSet = pd.concat([predictions_df, df["RentalCount"], df["Month"], df["Day"], df["WeekDay"], df["Snow"], df["Holiday"], df["Year"]], axis=1)
'
, @input_data_1 = N'Select "RentalCount", "Year" ,"Month", "Day", "WeekDay", "Snow", "Holiday"  from rental_data where Year = 2015'
, @input_data_1_name = N'rental_score_data'
, @params = N'@py_model varbinary(max)'
, @py_model = @py_model
with result sets (("RentalCount_Predicted" float, "RentalCount" float, "Month" float,"Day" float,"WeekDay" float,"Snow" float,"Holiday" float, "Year" float));

END;
GO
DROP TABLE IF EXISTS [dbo].[py_rental_predictions];
GO
--Create a table to store the predictions in
CREATE TABLE [dbo].[py_rental_predictions](
 [RentalCount_Predicted] [int] NULL,
 [RentalCount_Actual] [int] NULL,
 [Month] [int] NULL,
 [Day] [int] NULL,
 [WeekDay] [int] NULL,
 [Snow] [int] NULL,
 [Holiday] [int] NULL,
 [Year] [int] NULL
) ON [PRIMARY];
GO

# Step 4: Run the prediction model, store and display the results

Run the procedure to run the prediction model and store the results in a table. Query out the results.

In [9]:
USE TutorialDB;
GO
TRUNCATE TABLE py_rental_predictions;
--Insert the results of the predictions for test set into a table
INSERT INTO py_rental_predictions
EXEC py_predict_rentalcount 'linear_model';
GO
-- Select contents of the table
SELECT * FROM py_rental_predictions;
GO

RentalCount_Predicted,RentalCount_Actual,Month,Day,WeekDay,Snow,Holiday,Year
42,42,2,11,4,0,0,2015
360,360,3,29,1,0,0,2015
20,20,4,22,4,0,0,2015
42,42,3,6,6,0,0,2015
405,405,2,28,7,1,0,2015
38,38,1,12,2,1,0,2015
327,327,1,24,7,0,0,2015
34,34,4,10,6,0,0,2015
37,37,4,16,5,1,0,2015
514,514,1,18,1,0,0,2015
