In [1]:

import azureml.core

# Check core SDK version number.
print('SDK version:', azureml.core.VERSION)

SDK version: 1.34.0


In [2]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.34.0 to work with aml-walkthrough-ws


In [3]:
#load the dependencies

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import matplotlib.pyplot as plt 
from matplotlib import style
plt.style.use( 'bmh')

In [4]:
DATA_DIR = "./data"
data_df = pd.read_csv(os.path.join(DATA_DIR, 'stock_data.csv'))
print(data_df.shape)
data_df.head()

(1949, 7)


Unnamed: 0,Datetime,Open,High,Low,Close,Adj Close,Volume
0,2022-08-01 09:30:00-04:00,277.820007,277.939911,277.070007,277.679993,277.679993,731044
1,2022-08-01 09:31:00-04:00,277.589996,277.649994,276.743103,277.279999,277.279999,185477
2,2022-08-01 09:32:00-04:00,277.339996,277.665009,277.220093,277.470001,277.470001,100953
3,2022-08-01 09:33:00-04:00,277.440002,278.25,277.261414,278.0,278.0,112216
4,2022-08-01 09:34:00-04:00,277.5,277.73999,277.436707,277.640015,277.640015,106432


In [5]:
import os, shutil

# Create a folder for the experiment files
training_folder = 'stock-prediction-training'
os.makedirs(training_folder, exist_ok=True)

# Copy the data file into the experiment folder
shutil.copy('data/stock_data.csv', os.path.join(training_folder, "stock_data.csv"))

'stock-prediction-training/stock_data.csv'

In [6]:
%%writefile $training_folder/stock_training.py
# Import libraries
from azureml.core import Run
import pandas as pd
import numpy as np
import joblib
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

# Get the experiment run context
run = Run.get_context()

# load the diabetes dataset
print("Loading Data...")
stock_data = pd.read_csv('stock_data.csv')

x= stock_data[['Open','High','Low']].values
y= stock_data[['Close']].values

x_train,x_test,y_train,y_test= train_test_split(x,y,test_size=0.2,random_state=0)

model= LinearRegression()
model.fit(x_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

# y_pred= model.predict(x_test)

# calculate accuracy
y_hat= model.predict(x_test)
acc = np.average(y_hat == y_test)
print('Accuracy:', acc)
run.log('Accuracy', np.float(acc))

# # calculate AUC
# y_scores = model.predict_proba(x_test)
# auc = roc_auc_score(y_test,y_scores[:,1])
# print('AUC: ' + str(auc))
# run.log('AUC', np.float(auc))

# Save the trained model in the outputs folder
os.makedirs('outputs', exist_ok=True)
joblib.dump(value=model, filename='outputs/stock_model.pkl')

run.complete()

Overwriting stock-prediction-training/stock_training.py


In [7]:
%%writefile $training_folder/conda_dependencies.yml
name: scikit-learn-env
channels:
    - defaults
dependencies:
  - python=3.6.2
  - pip
  - pip:
    - azureml-defaults
    - azureml-widgets
    - azureml-sdk
    - joblib
    - lightgbm
    - inference-schema[numpy-support]
    - imblearn
    - imbalanced-learn
  - numpy
  - pandas
  - scikit-learn

Overwriting stock-prediction-training/conda_dependencies.yml


In [None]:
%%writefile $experiment_folder/conda_dependencies.yml
name: scikit-learn-env
channels:
    - defaults
dependencies:
  - python=3.6.2
  - pip
  - pip:
    - azureml-defaults
    - azureml-widgets
    - azureml-sdk
    - joblib
    - lightgbm
    - inference-schema[numpy-support]
    - imblearn
    - imbalanced-learn
  - numpy
  - pandas
  - scikit-learn

In [8]:
from azureml.core import Experiment, ScriptRunConfig
from azureml.widgets import RunDetails
from azureml.core import Environment
# env = Environment.get(workspace=ws, name="AzureML-sklearn-0.24.1-ubuntu18.04-py37-cpu-inference")"
# env = Environment.get(workspace=ws, name="AzureML-sklearn-1.0-ubuntu20.04-py38-cpu")
sklearn_env = Environment.from_conda_specification(name='stock-predict-env', file_path=os.path.join(training_folder,'conda_dependencies.yml'))

script_config = ScriptRunConfig(source_directory=training_folder,
                      script='stock_training.py',
                      arguments=['--kernel', 'linear', '--penalty'],
                      environment=sklearn_env)

In [9]:
from azureml.core import Experiment, ScriptRunConfig
from azureml.core import Environment

# submit the experiment
experiment = Experiment(workspace = ws, name = 'stock-prediction-experiment-hack')
run = experiment.submit(config=script_config)

In [10]:
%%time
# Shows output of the run on stdout.
run.wait_for_completion(show_output=True)

RunId: stock-prediction-experiment-hack_1660757346_dc168c8d
Web View: https://ml.azure.com/runs/stock-prediction-experiment-hack_1660757346_dc168c8d?wsid=/subscriptions/b30d9dbd-c0f7-405f-902c-3eabd080eb00/resourcegroups/aml-walkthrough-rg/workspaces/aml-walkthrough-ws&tid=72f988bf-86f1-41af-91ab-2d7cd011db47

Streaming azureml-logs/70_driver_log.txt

[2022-08-17T17:29:09.886419] Entering context manager injector.
  from cryptography.hazmat.backends import default_backend
[2022-08-17T17:29:10.670870] context_manager_injector.py Command line Options: Namespace(inject=['ProjectPythonPath:context_managers.ProjectPythonPath', 'RunHistory:context_managers.RunHistory', 'TrackUserError:context_managers.TrackUserError', 'UserExceptions:context_managers.UserExceptions'], invocation=['stock_training.py', '--kernel', 'linear', '--penalty'])
Script type = None
[2022-08-17T17:29:10.675829] Entering Run History Context Manager.
[2022-08-17T17:29:12.286242] Current directory: /tmp/azureml_runs/stock-

{'runId': 'stock-prediction-experiment-hack_1660757346_dc168c8d',
 'target': 'local',
 'status': 'Completed',
 'startTimeUtc': '2022-08-17T17:29:08.78345Z',
 'endTimeUtc': '2022-08-17T17:29:22.514034Z',
 'services': {},
 'properties': {'_azureml.ComputeTargetType': 'local',
  'ContentSnapshotId': 'cbf86181-729a-43a9-bdf7-2c9413281637'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'stock_training.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': ['--kernel', 'linear', '--penalty'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'local',
  'dataReferences': {},
  'data': {},
  'outputData': {},
  'datacaches': [],
  'jobName': None,
  'maxRunDurationSeconds': 2592000,
  'nodeCount': 1,
  'instanceTypes': [],
  'priority': None,
  'credentialPassthrough': False,
  'identity': None,
  'environment': {'name': 'stock-predict-env',
   'version': 'Autosave_2022-08-17T17:02:26Z_ff97a478',
   'assetI

In [11]:
from azureml.core import Model

# Register the model
run.register_model(model_path='outputs/stock_model.pkl', model_name='stock_model',
                   tags={'Training context':'ScriptRunConfig'},
                   properties={'Accuracy': run.get_metrics()['Accuracy']})

Model(workspace=Workspace.create(name='aml-walkthrough-ws', subscription_id='b30d9dbd-c0f7-405f-902c-3eabd080eb00', resource_group='aml-walkthrough-rg'), name=stock_model, id=stock_model:3, version=3, tags={'Training context': 'ScriptRunConfig'}, properties={'Accuracy': '0.0'})

In [12]:
from azureml.core import Model

for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

stock_model version: 3
	 Training context : ScriptRunConfig
	 Accuracy : 0.0


stock_model version: 2
	 Training context : ScriptRunConfig
	 Accuracy : 0.0


AutoML16dd11da30 version: 1


diabetes_model version: 12
	 Training context : Pipeline


diabetes_model version: 11
	 Training context : Pipeline


amlstudio-realtimestockpred version: 1
	 CreatedByAMLStudio : true


amlstudio-stockpricepredictor version: 1
	 CreatedByAMLStudio : true


stock_model version: 1
	 Training context : ScriptRunConfig
	 Accuracy : 0.0


AutoML703a6658842 version: 1


credit_defaults_model version: 10
	 flavors.python_function : {
  "model_path": "model.pkl",
  "loader_module": "mlflow.sklearn",
  "python_version": "3.7.11",
  "env": "conda.yaml"
}
	 flavors.sklearn : {
  "pickled_model": "model.pkl",
  "sklearn_version": "0.24.1",
  "serialization_format": "cloudpickle"
}
	 flavors : python_function,sklearn
	 azureml.artifactPrefix : ExperimentRun/dcid.536455f9-8445-4b07-9466-6af120a5f0a4/credit_default

In [45]:
import joblib
# load the diabetes dataset
print("Loading Data...")
stock_data = pd.read_csv('stock-prediction-training/stock_data.csv')
#Users/meeral/Realtime Stock Price Prediction/yahoofinance/stock-prediction-training

x= stock_data[['Open','High','Low']].values
y= stock_data[['Close']].values

#print(x)

x_train,x_test,y_train,y_test= train_test_split(x,y,test_size=0.2,random_state=0)


# model_path = Model.get_model_path(model_name="stock_model")
model_obj = Model(ws, 'stock_model' )
model_path = model_obj.download(exist_ok = True)
model = joblib.load(model_path)

# Example when the model is a file
# model_path = os.path.join(os.getenv(''), 'stock_model')

# Example when the model is a folder containing a file
# file_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'my_model_folder', 'sklearn_regression_model.pkl')

    #with open(model_path, 'rb') as file:
    #    model = pickle.load(file)
# model = joblib.load(model_path)

print('x_test: ')
print (x_test)

y_pred= model.predict(x_test)
result= pd.DataFrame({'Actual':y_test.flatten(),'Predicted':y_pred.flatten()})
result.head(25)


Loading Data...




Unnamed: 0,Actual,Predicted
0,278.48999,278.463653
1,283.355408,283.365573
2,282.640015,282.594862
3,278.850006,278.930075
4,277.967194,277.930056
5,280.279999,280.310254
6,282.693604,282.726585
7,281.23999,281.28803
8,282.519989,282.518176
9,273.329987,273.260686


x_test: 
[[278.42999268 278.48999023 278.42999268]
 [283.5        283.51998901 283.30999756]
 [282.60998535 282.66000366 282.54998779]
 ...
 [280.20999146 280.26000977 280.17001343]
 [282.92999268 283.         282.82998657]
 [276.65499878 276.66000366 276.42999268]]


In [66]:
%%writefile $training_folder/score.py 
#$training_folder/score.py
# %%writefile $script_file

import numpy
import joblib
import os
from azureml.core.model import Model
import json
import numpy as np
import pickle
from sklearn.linear_model import Ridge
from azureml.core.model import Model
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType



def init():
    # load the model from file into a global object
    global model   
    model_path = Model.get_model_path(model_name="stock_model")
    #with open(model_path, 'rb') as file:
    #    model = pickle.load(file)
    model = joblib.load(model_path)

input_sample = numpy.array([
    [514.390015,515.630005,505.369995]
    ,
    [513.000000,517.979980,510.369995]])
output_sample = numpy.array([[
    510.820007],
    [517.349976
    ]])

# Inference_schema generates a schema for your web service
# It then creates an OpenAPI (Swagger) specification for the web service
# at http://<scoring_base_url>/swagger.json
@input_schema('data', NumpyParameterType(input_sample))
@output_schema(NumpyParameterType(output_sample))

def run(raw_data):
    data = np.array(json.loads(raw_data)['data'])
    # result = model.predict(data)
    # Get a prediction from the model
    predictions = model.predict(data)
    return json.dumps(predictions)


if __name__ == "__main__":
    # Test scoring
    init()
    test_row = '{"data":[[514.390015,515.630005,505.369995],[513.000000,517.979980,510.369995]]}'
    prediction = run(test_row)
    print("Test result: ", prediction)

Overwriting stock-prediction-training/score.py


In [57]:
%%writefile $training_folder/score.py
import json
import joblib
import pickle
import numpy as np
import pandas as pd
import azureml.train.automl
from azureml.core.model import Model

from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType


# Called when the service is loaded
def init():
    global model
    # Get the path to the deployed model file and load it
    model_path = Model.get_model_path('stock_model')
    model = joblib.load(model_path)

# providing 3 sample inputs for schema generation
numpy_sample_input = NumpyParameterType(np.array([[514.390015,515.630005,505.369995], [513.000000,517.979980,510.369995]],dtype='float64'))

# This is a nested input sample, any item wrapped by `ParameterType` will be described by schema
sample_input = StandardPythonParameterType({'data': numpy_sample_input})
sample_output = StandardPythonParameterType([[509.12305715658124], [515.6020041775826]])
outputs = StandardPythonParameterType({'Results':sample_output}) # 'Results' is case sensitive

@input_schema('Inputs', sample_input) 
# 'Inputs' is case sensitive

@output_schema(outputs)

# Called when a request is received
def run(Inputs):
    # Get the input data as a numpy array
    inputData = Inputs['data']
    # data = np.array(json.loads(inputData)['data'])
    # data = json.loads(inputData)
    print(inputData)
    # Get a prediction from the model
    predictions = model.predict(inputData)
    print(predictions)
    # result= pd.DataFrame({'Actual':inputData.flatten(),'Predicted':predictions.flatten()})
    return json.dumps(predictions.tolist())
    

Overwriting stock-prediction-training/score.py


In [58]:
# Conda Dependency File
env_file = os.path.join(training_folder,"conda_dependencies.yml")
# Print the .yml file
# with open(env_file,"r") as f:
#    print(f.read())
# Set path for scoring script
script_file = os.path.join(training_folder,"score.py")

In [59]:
from azureml.core.webservice import AciWebservice
from azureml.core.model import InferenceConfig
from inference_schema.schema_decorators import input_schema, output_schema
from azureml.core.model import Model
# Configure the scoring environment



inference_config = InferenceConfig(runtime= "python",
                                   entry_script=script_file,
                                   conda_file=env_file)

deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)

service_name = "predictstockclosingprice"
ws = Workspace.from_config()
model = Model(ws,"stock_model")

service = Model.deploy(ws, service_name, [model], inference_config, deployment_config)

service.wait_for_deployment(True)
print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-08-17 19:24:11+00:00 Creating Container Registry if not exists.
2022-08-17 19:24:11+00:00 Use the existing image.
2022-08-17 19:24:12+00:00 Submitting deployment to compute.
2022-08-17 19:24:17+00:00 Checking the status of deployment predictstockclosingprice..
2022-08-17 19:26:21+00:00 Checking the status of inference endpoint predictstockclosingprice.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [28]:
print(service.state)

Healthy


In [52]:
# model_path = Model.get_model_path(model_name="stock_model")
model_obj = Model(ws, 'stock_model' )
model_path = model_obj.download(exist_ok = True)
model = joblib.load(model_path)

predict = model.predict([[514.390015,515.630005,505.369995],[513.000000,517.979980,510.369995]])
print (type(predict))
predict.tolist()

<class 'numpy.ndarray'>


[[509.12305715658124], [515.6020041775826]]

In [61]:
import json

x_new = [[514.390015,515.630005,505.369995],[513.000000,517.979980,510.369995]]
print ('Patients: {}'.format(x_new[0]))

# Convert the array to a serializable list in a JSON document
input_json = json.dumps({"Inputs": {"data": x_new}})

# input_json = json.dumps({
#   "Inputs": {
#     "data": [
#       [
#         514.390015,
#         515.630005,
#         505.369995
#       ],
#       [
#         513,
#         517.97998,
#         510.369995
#       ]
#     ]
#   }
# })

# Call the web service, passing the input data (the web service will also accept the data in binary format)
predictions = service.run(input_data = input_json)
print(predictions)



Patients: [514.390015, 515.630005, 505.369995]
[[509.12305715658124], [515.6020041775826]]


In [None]:
import os
import json

from pathlib import Path

from collections import defaultdict




model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'trained_model_outputs')
schema_file_path = Path(model_path) / '_schema.json'
with open(schema_file_path) as fp:
    schema_data = json.load(fp)


def init():
    global model
    model = ModelDirectory.load(model_path).model


def run(data):
    data = json.loads(data)
    input_entry = defaultdict(list)
    for row in data:
        for key, val in row.items():
            input_entry[key].append(decode_nan(val))

    data_frame_directory = create_dfd_from_dict(input_entry, schema_data)
    score_module = ScoreModelModule()
    result, = score_module.run(
        learner=model,
        test_data=DataTable.from_dfd(data_frame_directory),
        append_or_result_only=True)
    return json.dumps({"result": result.data_frame.values.tolist()})
