## Example Linear Regression
This is a simple Linear Regression example to predice the temperature from the rate of cricket chirps based on The Song of Insects, by Dr. G. W. Pierce, Harvard College Press.

### Import the Azure ML library and check the SDK version

In [None]:
import azureml
from azureml.core import Workspace, Run, Experiment

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

### Connect to workspace

Create a workspace object from the existing workspace. `Workspace.from_config()` reads the file **config.json** and loads the details into an object named `ws`.

In [None]:
ws = Workspace.from_config()

### Create a Run Configuration with Azure ML Compute 

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.runconfig import RunConfiguration 
# Create a new runconfig object 
run_AML_compute = RunConfiguration()
# Signal that you want to use AmlCompute to execute the training script 
run_AML_compute.target = "amlcompute"
# AmlCompute is created in the same region as your workspace 
# Set the VM size for AmlCompute from the list of supported_vmsizes 
run_AML_compute.amlcompute.vm_size = 'STANDARD_D2_V2'


### Define the necessary dependencies

In [None]:
from azureml.core.conda_dependencies import CondaDependencies
dependencies = CondaDependencies()
dependencies.add_pip_package("scikit-learn")
dependencies.add_pip_package("pandas")
#We need to add azureml-dataprep package
dependencies.add_pip_package("azureml-dataprep")
run_AML_compute.environment.python.conda_dependencies = dependencies

### Create a script folder

In [None]:
import os
script_folder='./MyScriptFolder'
os.makedirs(script_folder, exist_ok=True)

### Create the training script and write it to script folder

We need to enter the workspace name, subscription Id and resource group name before running the cell

In [None]:
%%writefile $script_folder/train.py
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.externals import joblib
from azureml.core import Run
from sklearn.metrics import mean_squared_error
from math import sqrt
from azureml.core.dataset import Dataset
from azureml.core import Workspace
import os

#We need the workspace to retrieve the dataset, default authentication is interactive
ws = Workspace.get(name="<Azure ML workspace name>", subscription_id="<Azure subscription ID>", resource_group="Resource Group Name for the Azure ML workspace")
#Get the data set that is already registered with the workspace
data_set =Dataset.get(ws,'CricketChirps')
#Use the dataset
dataset=data_set.to_pandas_dataframe()
X = dataset.iloc[:, :-1].values  #  independent variable 
y = dataset.iloc[:, 1].values    #  dependent variable 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 1/3, random_state = 0)
# get hold of the current run
run = Run.get_context()
print('Train a linear regression model')
regressor = LinearRegression()    # This object is the regressor, that does the regression
regressor.fit(X_train, y_train)   # Provide training data so the machine can learn to predict using a learned model.

print('Predict the test set')
y_pred = regressor.predict(X_test)
print(y_pred)

#Calculate and log the root mean square error
rmse = sqrt(mean_squared_error(y_test, y_pred))
run.log('RMSE', rmse)

#Calculate and log Mean Absolute Square Error and Model Accuracy
sum_actuals = sum_errors = 0
for actual_val, predict_val in zip(y_test, y_pred):
    abs_error = actual_val - predict_val
    if abs_error < 0:
        abs_error = abs_error * -1
    
    sum_errors = sum_errors + abs_error
    sum_actuals = sum_actuals + actual_val   
mean_abs_percent_error = sum_errors / sum_actuals
run.log('MAPE', mean_abs_percent_error)
accuracy = 1 - mean_abs_percent_error
run.log('Model Accuracy', accuracy)

os.makedirs('outputs', exist_ok=True)
# note file saved in the outputs folder is automatically uploaded into experiment record
joblib.dump(value=regressor, filename='outputs/model.pkl')

### Create experiment

Create an experiment to track the runs in your workspace. A workspace can have muliple experiments. 

In [None]:
experiment_name = 'myExperiment'
exp = Experiment(workspace=ws, name=experiment_name)

### Submit the experiment for training on AML Compute using the run configuration we defined earlier

In [None]:
from azureml.core import ScriptRunConfig
src = ScriptRunConfig(source_directory = script_folder, script = 'train.py', run_config = run_AML_compute) 
run = exp.submit(src) 
run.wait_for_completion(show_output = True)
