From b1c26888f156ecb4eb7e83e3fb5c7ec045995b6b Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Mon, 5 Jun 2017 14:51:42 -0700 Subject: [PATCH 1/3] Collect import statements Along the way, switch to importing modules instead of classes --- .../rental-prediction/rental_prediction.sql | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.sql b/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.sql index bd36334fd3..b2f61709db 100644 --- a/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.sql +++ b/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.sql @@ -27,23 +27,24 @@ BEGIN @language = N'Python' , @script = N' +from sklearn import linear_model + +import pickle + + df = rental_train_data # Get all the columns from the dataframe. columns = df.columns.tolist() - # Store the variable well be predicting on. target = "RentalCount" -from sklearn.linear_model import LinearRegression - # Initialize the model class. -lin_model = LinearRegression() +lin_model = linear_model.LinearRegression() # Fit the model to the training data. lin_model.fit(df[columns], df[target]) -import pickle #Before saving the model to the DB table, we need to convert it to a binary object trained_model = pickle.dumps(lin_model) ' @@ -75,7 +76,7 @@ AS BEGIN DECLARE @py_model varbinary(max) = (select model from rental_py_models where model_name = @model); - EXEC sp_execute_external_script + EXEC sp_execute_external_script @language = N'Python' , @script = N' @@ -83,7 +84,7 @@ BEGIN import pickle rental_model = pickle.loads(py_model) - + df = rental_score_data #print(df) @@ -106,7 +107,7 @@ lin_mse = mean_squared_error(linpredictions, df[target]) #print(lin_mse) import pandas as pd -predictions_df = pd.DataFrame(lin_predictions) +predictions_df = pd.DataFrame(lin_predictions) OutputDataSet = pd.concat([predictions_df, df["RentalCount"], df["Month"], df["Day"], df["WeekDay"], df["Snow"], df["Holiday"], df["Year"]], axis=1) ' , @input_data_1 = N'Select "RentalCount", "Year" ,"Month", "Day", "WeekDay", "Snow", "Holiday" from rental_data where Year = 2015' @@ -114,7 +115,7 @@ OutputDataSet = pd.concat([predictions_df, df["RentalCount"], df["Month"], df["D , @params = N'@py_model varbinary(max)' , @py_model = @py_model with result sets (("RentalCount_Predicted" float, "RentalCount" float, "Month" float,"Day" float,"WeekDay" float,"Snow" float,"Holiday" float, "Year" float)); - + END; GO From b586643c475f69d8e1fedf0008305ae0e77f96a8 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Mon, 5 Jun 2017 15:18:12 -0700 Subject: [PATCH 2/3] Touch up Python code --- .../rental-prediction/rental_prediction.py | 69 +++++++++---------- 1 file changed, 32 insertions(+), 37 deletions(-) diff --git a/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.py b/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.py index 9e5b6e1e2d..f424e8da28 100644 --- a/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.py +++ b/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.py @@ -1,6 +1,6 @@ -import pandas as pd -from sklearn.linear_model import LinearRegression -from sklearn.metrics import mean_squared_error +import pandas +import sklearn.linear_model +import sklearn.metrics from revoscalepy.computecontext.RxInSqlServer import RxInSqlServer from revoscalepy.computecontext.RxInSqlServer import RxSqlServerData @@ -9,43 +9,36 @@ def get_rental_predictions(): conn_str = 'Driver=SQL Server;Server=MYSQLSERVER;Database=TutorialDB;Trusted_Connection=True;' - column_info = { - "Year" : { "type" : "integer" }, - "Month" : { "type" : "integer" }, - "Day" : { "type" : "integer" }, - "RentalCount" : { "type" : "integer" }, - "WeekDay" : { - "type" : "factor", - "levels" : ["1", "2", "3", "4", "5", "6", "7"] - }, - "Holiday" : { - "type" : "factor", - "levels" : ["1", "0"] - }, - "Snow" : { - "type" : "factor", - "levels" : ["1", "0"] - } + column_info = { + "Year": {"type": "integer"}, + "Month": {"type": "integer"}, + "Day": {"type": "integer"}, + "RentalCount": {"type": "integer"}, + "WeekDay": { + "type": "factor", + "levels": ["1", "2", "3", "4", "5", "6", "7"], + }, + "Holiday": { + "type": "factor", + "levels": ["1", "0"], + }, + "Snow": { + "type": "factor", + "levels": ["1", "0"], } + } data_source = RxSqlServerData(table="dbo.rental_data", - connectionString=conn_str, colInfo=column_info) - computeContext = RxInSqlServer( - connectionString = conn_str, - numTasks = 1, - autoCleanup = False - ) - - + connectionString=conn_str, + colInfo=column_info) RxInSqlServer(connectionString=conn_str, numTasks=1, autoCleanup=False) - + # import data source and convert to pandas dataframe - df = pd.DataFrame(rx_import_datasource(data_source)) + df = pandas.DataFrame(rx_import_datasource(data_source)) print("Data frame:", df) - # Get all the columns from the dataframe. - columns = df.columns.tolist() - # Filter the columns to remove ones we don't want. - columns = [c for c in columns if c not in ["Year"]] + # Get all the columns from the dataframe and filter out the ones we don't + # want. + columns = [x for x in df.columns if x == "Year"] # Store the variable we'll be predicting on. target = "RentalCount" # Generate the training set. Set random_state to be able to replicate results. @@ -56,14 +49,16 @@ def get_rental_predictions(): print("Training set shape:", train.shape) print("Testing set shape:", test.shape) # Initialize the model class. - lin_model = LinearRegression() + lin_model = sklearn.linear_model.LinearRegression() # Fit the model to the training data. lin_model.fit(train[columns], train[target]) # Generate our predictions for the test set. lin_predictions = lin_model.predict(test[columns]) print("Predictions:", lin_predictions) # Compute error between our test predictions and the actual values. - lin_mse = mean_squared_error(lin_predictions, test[target]) + lin_mse = sklearn.metrics.mean_squared_error(lin_predictions, test[target]) print("Computed error:", lin_mse) -get_rental_predictions() + +if __name__ == "__main__": + get_rental_predictions() From 2dda902ab4bd6901a1847861b65b4cc0e19724b8 Mon Sep 17 00:00:00 2001 From: NelGson Date: Mon, 14 Aug 2017 12:02:31 -0700 Subject: [PATCH 3/3] update to python imports to reflect revoscalepy updates --- .../rental-prediction/rental_prediction.py | 123 ++++++++++-------- 1 file changed, 66 insertions(+), 57 deletions(-) diff --git a/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.py b/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.py index f424e8da28..4488ef3227 100644 --- a/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.py +++ b/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.py @@ -1,64 +1,73 @@ -import pandas -import sklearn.linear_model -import sklearn.metrics - -from revoscalepy.computecontext.RxInSqlServer import RxInSqlServer -from revoscalepy.computecontext.RxInSqlServer import RxSqlServerData -from revoscalepy.etl.RxImport import rx_import_datasource +import pandas as pd +from sklearn.linear_model import LinearRegression +from sklearn.metrics import mean_squared_error +#If you are running SQL Server 2017 RC1 and above: +from revoscalepy import RxComputeContext, RxInSqlServer, RxSqlServerData +from revoscalepy import rx_import def get_rental_predictions(): - conn_str = 'Driver=SQL Server;Server=MYSQLSERVER;Database=TutorialDB;Trusted_Connection=True;' - column_info = { - "Year": {"type": "integer"}, - "Month": {"type": "integer"}, - "Day": {"type": "integer"}, - "RentalCount": {"type": "integer"}, - "WeekDay": { - "type": "factor", - "levels": ["1", "2", "3", "4", "5", "6", "7"], - }, - "Holiday": { - "type": "factor", - "levels": ["1", "0"], - }, - "Snow": { - "type": "factor", - "levels": ["1", "0"], - } - } +#Connection string to connect to SQL Server named instance + conn_str = 'Driver=SQL Server;Server=MYSQLSERVER;Database=TutorialDB;Trusted_Connection=True;' + +#Define the columns we wish to import + column_info = { + "Year" : { "type" : "integer" }, + "Month" : { "type" : "integer" }, + "Day" : { "type" : "integer" }, + "RentalCount" : { "type" : "integer" }, + "WeekDay" : { + "type" : "factor", + "levels" : ["1", "2", "3", "4", "5", "6", "7"] + }, + "Holiday" : { + "type" : "factor", + "levels" : ["1", "0"] + }, + "Snow" : { + "type" : "factor", + "levels" : ["1", "0"] + } + } + + #Get the data from SQL Server Table + data_source = RxSqlServerData(table="dbo.rental_data", + connection_string=conn_str, column_info=column_info) + computeContext = RxInSqlServer( + connection_string = conn_str, + num_tasks = 1, + auto_cleanup = False +) + - data_source = RxSqlServerData(table="dbo.rental_data", - connectionString=conn_str, - colInfo=column_info) - RxInSqlServer(connectionString=conn_str, numTasks=1, autoCleanup=False) + RxInSqlServer(connection_string=conn_str, num_tasks=1, auto_cleanup=False) - # import data source and convert to pandas dataframe - df = pandas.DataFrame(rx_import_datasource(data_source)) - print("Data frame:", df) - # Get all the columns from the dataframe and filter out the ones we don't - # want. - columns = [x for x in df.columns if x == "Year"] - # Store the variable we'll be predicting on. - target = "RentalCount" - # Generate the training set. Set random_state to be able to replicate results. - train = df.sample(frac=0.8, random_state=1) - # Select anything not in the training set and put it in the testing set. - test = df.loc[~df.index.isin(train.index)] - # Print the shapes of both sets. - print("Training set shape:", train.shape) - print("Testing set shape:", test.shape) - # Initialize the model class. - lin_model = sklearn.linear_model.LinearRegression() - # Fit the model to the training data. - lin_model.fit(train[columns], train[target]) - # Generate our predictions for the test set. - lin_predictions = lin_model.predict(test[columns]) - print("Predictions:", lin_predictions) - # Compute error between our test predictions and the actual values. - lin_mse = sklearn.metrics.mean_squared_error(lin_predictions, test[target]) - print("Computed error:", lin_mse) + # import data source and convert to pandas dataframe + df = pd.DataFrame(rx_import(input_data = data_source)) + print("Data frame:", df) + # Get all the columns from the dataframe. + columns = df.columns.tolist() + # Filter the columns to remove ones we don't want to use in the training + columns = [c for c in columns if c not in ["Year"]] + # Store the variable we'll be predicting on. + target = "RentalCount" + # Generate the training set. Set random_state to be able to replicate results. + train = df.sample(frac=0.8, random_state=1) + # Select anything not in the training set and put it in the testing set. + test = df.loc[~df.index.isin(train.index)] + # Print the shapes of both sets. + print("Training set shape:", train.shape) + print("Testing set shape:", test.shape) + # Initialize the model class. + lin_model = LinearRegression() + # Fit the model to the training data. + lin_model.fit(train[columns], train[target]) + # Generate our predictions for the test set. + lin_predictions = lin_model.predict(test[columns]) + print("Predictions:", lin_predictions) + # Compute error between our test predictions and the actual values. + lin_mse = mean_squared_error(lin_predictions, test[target]) + print("Computed error:", lin_mse) -if __name__ == "__main__": - get_rental_predictions() +get_rental_predictions()