From b1c26888f156ecb4eb7e83e3fb5c7ec045995b6b Mon Sep 17 00:00:00 2001
From: Brett Cannon <brcan@microsoft.com>
Date: Mon, 5 Jun 2017 14:51:42 -0700
Subject: [PATCH 1/3] Collect import statements

Along the way, switch to importing modules instead of classes
---
 .../rental-prediction/rental_prediction.sql   | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.sql b/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.sql
index bd36334fd3..b2f61709db 100644
--- a/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.sql
+++ b/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.sql
@@ -27,23 +27,24 @@ BEGIN
       @language = N'Python'
     , @script = N'
 
+from sklearn import linear_model
+
+import pickle
+
+
 df = rental_train_data
 
 # Get all the columns from the dataframe.
 columns = df.columns.tolist()
 
-
 # Store the variable well be predicting on.
 target = "RentalCount"
 
-from sklearn.linear_model import LinearRegression
-
 # Initialize the model class.
-lin_model = LinearRegression()
+lin_model = linear_model.LinearRegression()
 # Fit the model to the training data.
 lin_model.fit(df[columns], df[target])
 
-import pickle
 #Before saving the model to the DB table, we need to convert it to a binary object
 trained_model = pickle.dumps(lin_model)
 '
@@ -75,7 +76,7 @@ AS
 BEGIN
 	DECLARE @py_model varbinary(max) = (select model from rental_py_models where model_name = @model);
 
-	EXEC sp_execute_external_script 
+	EXEC sp_execute_external_script
 					@language = N'Python'
 				  , @script = N'
 
@@ -83,7 +84,7 @@ BEGIN
 import pickle
 rental_model = pickle.loads(py_model)
 
-  
+
 df = rental_score_data
 #print(df)
 
@@ -106,7 +107,7 @@ lin_mse = mean_squared_error(linpredictions, df[target])
 #print(lin_mse)
 
 import pandas as pd
-predictions_df = pd.DataFrame(lin_predictions)  
+predictions_df = pd.DataFrame(lin_predictions)
 OutputDataSet = pd.concat([predictions_df, df["RentalCount"], df["Month"], df["Day"], df["WeekDay"], df["Snow"], df["Holiday"], df["Year"]], axis=1)
 '
 	, @input_data_1 = N'Select "RentalCount", "Year" ,"Month", "Day", "WeekDay", "Snow", "Holiday"  from rental_data where Year = 2015'
@@ -114,7 +115,7 @@ OutputDataSet = pd.concat([predictions_df, df["RentalCount"], df["Month"], df["D
 	, @params = N'@py_model varbinary(max)'
 	, @py_model = @py_model
 	with result sets (("RentalCount_Predicted" float, "RentalCount" float, "Month" float,"Day" float,"WeekDay" float,"Snow" float,"Holiday" float, "Year" float));
-			  
+
 END;
 GO
 

From b586643c475f69d8e1fedf0008305ae0e77f96a8 Mon Sep 17 00:00:00 2001
From: Brett Cannon <brcan@microsoft.com>
Date: Mon, 5 Jun 2017 15:18:12 -0700
Subject: [PATCH 2/3] Touch up Python code

---
 .../rental-prediction/rental_prediction.py    | 69 +++++++++----------
 1 file changed, 32 insertions(+), 37 deletions(-)

diff --git a/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.py b/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.py
index 9e5b6e1e2d..f424e8da28 100644
--- a/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.py
+++ b/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.py
@@ -1,6 +1,6 @@
-import pandas as pd
-from sklearn.linear_model import LinearRegression
-from sklearn.metrics import mean_squared_error
+import pandas
+import sklearn.linear_model
+import sklearn.metrics
 
 from revoscalepy.computecontext.RxInSqlServer import RxInSqlServer
 from revoscalepy.computecontext.RxInSqlServer import RxSqlServerData
@@ -9,43 +9,36 @@
 
 def get_rental_predictions():
     conn_str = 'Driver=SQL Server;Server=MYSQLSERVER;Database=TutorialDB;Trusted_Connection=True;'
-    column_info = { 
-            "Year" : { "type" : "integer" },
-            "Month" : { "type" : "integer" }, 
-            "Day" : { "type" : "integer" }, 
-            "RentalCount" : { "type" : "integer" }, 
-            "WeekDay" : { 
-                "type" : "factor", 
-                "levels" : ["1", "2", "3", "4", "5", "6", "7"]
-            },
-            "Holiday" : { 
-                "type" : "factor", 
-                "levels" : ["1", "0"]
-            },
-            "Snow" : { 
-                "type" : "factor", 
-                "levels" : ["1", "0"]
-            }
+    column_info = {
+        "Year": {"type": "integer"},
+        "Month": {"type": "integer"},
+        "Day": {"type": "integer"},
+        "RentalCount": {"type": "integer"},
+        "WeekDay": {
+            "type": "factor",
+            "levels": ["1", "2", "3", "4", "5", "6", "7"],
+        },
+        "Holiday": {
+            "type": "factor",
+            "levels": ["1", "0"],
+        },
+        "Snow": {
+            "type": "factor",
+            "levels": ["1", "0"],
         }
+    }
 
     data_source = RxSqlServerData(table="dbo.rental_data",
-                                  connectionString=conn_str, colInfo=column_info)
-    computeContext = RxInSqlServer(
-        connectionString = conn_str,
-        numTasks = 1,
-        autoCleanup = False
-        )
-     
-    
+                                  connectionString=conn_str,
+                                  colInfo=column_info)
     RxInSqlServer(connectionString=conn_str, numTasks=1, autoCleanup=False)
-    
+
     # import data source and convert to pandas dataframe
-    df = pd.DataFrame(rx_import_datasource(data_source))
+    df = pandas.DataFrame(rx_import_datasource(data_source))
     print("Data frame:", df)
-    # Get all the columns from the dataframe.
-    columns = df.columns.tolist()
-    # Filter the columns to remove ones we don't want.
-    columns = [c for c in columns if c not in ["Year"]]
+    # Get all the columns from the dataframe and filter out the ones we don't
+    # want.
+    columns = [x for x in df.columns if x == "Year"]
     # Store the variable we'll be predicting on.
     target = "RentalCount"
     # Generate the training set.  Set random_state to be able to replicate results.
@@ -56,14 +49,16 @@ def get_rental_predictions():
     print("Training set shape:", train.shape)
     print("Testing set shape:", test.shape)
     # Initialize the model class.
-    lin_model = LinearRegression()
+    lin_model = sklearn.linear_model.LinearRegression()
     # Fit the model to the training data.
     lin_model.fit(train[columns], train[target])
     # Generate our predictions for the test set.
     lin_predictions = lin_model.predict(test[columns])
     print("Predictions:", lin_predictions)
     # Compute error between our test predictions and the actual values.
-    lin_mse = mean_squared_error(lin_predictions, test[target])
+    lin_mse = sklearn.metrics.mean_squared_error(lin_predictions, test[target])
     print("Computed error:", lin_mse)
 
-get_rental_predictions()
+
+if __name__ == "__main__":
+    get_rental_predictions()

From 2dda902ab4bd6901a1847861b65b4cc0e19724b8 Mon Sep 17 00:00:00 2001
From: NelGson <negust@microsoft.com>
Date: Mon, 14 Aug 2017 12:02:31 -0700
Subject: [PATCH 3/3] update to python imports to reflect revoscalepy updates

---
 .../rental-prediction/rental_prediction.py    | 123 ++++++++++--------
 1 file changed, 66 insertions(+), 57 deletions(-)

diff --git a/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.py b/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.py
index f424e8da28..4488ef3227 100644
--- a/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.py
+++ b/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.py
@@ -1,64 +1,73 @@
-import pandas
-import sklearn.linear_model
-import sklearn.metrics
-
-from revoscalepy.computecontext.RxInSqlServer import RxInSqlServer
-from revoscalepy.computecontext.RxInSqlServer import RxSqlServerData
-from revoscalepy.etl.RxImport import rx_import_datasource
+import pandas as pd
+from sklearn.linear_model import LinearRegression
+from sklearn.metrics import mean_squared_error
 
+#If you are running SQL Server 2017 RC1 and above:
+from revoscalepy import RxComputeContext, RxInSqlServer, RxSqlServerData
+from revoscalepy import rx_import
 
 def get_rental_predictions():
-    conn_str = 'Driver=SQL Server;Server=MYSQLSERVER;Database=TutorialDB;Trusted_Connection=True;'
-    column_info = {
-        "Year": {"type": "integer"},
-        "Month": {"type": "integer"},
-        "Day": {"type": "integer"},
-        "RentalCount": {"type": "integer"},
-        "WeekDay": {
-            "type": "factor",
-            "levels": ["1", "2", "3", "4", "5", "6", "7"],
-        },
-        "Holiday": {
-            "type": "factor",
-            "levels": ["1", "0"],
-        },
-        "Snow": {
-            "type": "factor",
-            "levels": ["1", "0"],
-        }
-    }
+#Connection string to connect to SQL Server named instance
+ conn_str = 'Driver=SQL Server;Server=MYSQLSERVER;Database=TutorialDB;Trusted_Connection=True;'
+
+#Define the columns we wish to import
+ column_info = {
+         "Year" : { "type" : "integer" },
+         "Month" : { "type" : "integer" },
+         "Day" : { "type" : "integer" },
+         "RentalCount" : { "type" : "integer" },
+         "WeekDay" : {
+             "type" : "factor",
+             "levels" : ["1", "2", "3", "4", "5", "6", "7"]
+         },
+         "Holiday" : {
+             "type" : "factor",
+             "levels" : ["1", "0"]
+         },
+         "Snow" : {
+             "type" : "factor",
+             "levels" : ["1", "0"]
+         }
+     }
+
+ #Get the data from SQL Server Table
+ data_source = RxSqlServerData(table="dbo.rental_data",
+                              connection_string=conn_str, column_info=column_info)
+ computeContext = RxInSqlServer(
+     connection_string = conn_str,
+     num_tasks = 1,
+     auto_cleanup = False
+)
+
 
-    data_source = RxSqlServerData(table="dbo.rental_data",
-                                  connectionString=conn_str,
-                                  colInfo=column_info)
-    RxInSqlServer(connectionString=conn_str, numTasks=1, autoCleanup=False)
+ RxInSqlServer(connection_string=conn_str, num_tasks=1, auto_cleanup=False)
 
-    # import data source and convert to pandas dataframe
-    df = pandas.DataFrame(rx_import_datasource(data_source))
-    print("Data frame:", df)
-    # Get all the columns from the dataframe and filter out the ones we don't
-    # want.
-    columns = [x for x in df.columns if x == "Year"]
-    # Store the variable we'll be predicting on.
-    target = "RentalCount"
-    # Generate the training set.  Set random_state to be able to replicate results.
-    train = df.sample(frac=0.8, random_state=1)
-    # Select anything not in the training set and put it in the testing set.
-    test = df.loc[~df.index.isin(train.index)]
-    # Print the shapes of both sets.
-    print("Training set shape:", train.shape)
-    print("Testing set shape:", test.shape)
-    # Initialize the model class.
-    lin_model = sklearn.linear_model.LinearRegression()
-    # Fit the model to the training data.
-    lin_model.fit(train[columns], train[target])
-    # Generate our predictions for the test set.
-    lin_predictions = lin_model.predict(test[columns])
-    print("Predictions:", lin_predictions)
-    # Compute error between our test predictions and the actual values.
-    lin_mse = sklearn.metrics.mean_squared_error(lin_predictions, test[target])
-    print("Computed error:", lin_mse)
+ # import data source and convert to pandas dataframe
+ df = pd.DataFrame(rx_import(input_data = data_source))
+ print("Data frame:", df)
+ # Get all the columns from the dataframe.
+ columns = df.columns.tolist()
+ # Filter the columns to remove ones we don't want to use in the training
+ columns = [c for c in columns if c not in ["Year"]]
+  # Store the variable we'll be predicting on.
+ target = "RentalCount"
+ # Generate the training set.  Set random_state to be able to replicate results.
+ train = df.sample(frac=0.8, random_state=1)
+ # Select anything not in the training set and put it in the testing set.
+ test = df.loc[~df.index.isin(train.index)]
+ # Print the shapes of both sets.
+ print("Training set shape:", train.shape)
+ print("Testing set shape:", test.shape)
+ # Initialize the model class.
+ lin_model = LinearRegression()
+ # Fit the model to the training data.
+ lin_model.fit(train[columns], train[target])
 
+ # Generate our predictions for the test set.
+ lin_predictions = lin_model.predict(test[columns])
+ print("Predictions:", lin_predictions)
+ # Compute error between our test predictions and the actual values.
+ lin_mse = mean_squared_error(lin_predictions, test[target])
+ print("Computed error:", lin_mse)
 
-if __name__ == "__main__":
-    get_rental_predictions()
+get_rental_predictions()