In [1]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.26.0 to work with 429test


In [2]:
from azureml.core import Experiment
from azureml.core import Dataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import joblib
import os

In [3]:
# Create a folder for the experiment files
experiment_folder = 'zombies_inline'
os.makedirs(experiment_folder, exist_ok=True)
print(experiment_folder, 'folder created')

# Create an Azure ML experiment in your workspace
experiment = Experiment(workspace=ws, name="429test")

# Start logging data from the experiment, obtaining a reference to the experiment run
run = experiment.start_logging()
print("Starting experiment:", experiment.name)


zombies_inline folder created
Starting experiment: 429test


In [4]:
ws.get_default_datastore()

{
  "name": "workspaceblobstore",
  "container_name": "azureml-blobstore-c1d2c1ad-a4f6-4d8d-b0d7-83a3ed52b912",
  "account_name": "429test0160411344",
  "protocol": "https",
  "endpoint": "core.windows.net"
}

In [5]:
dataset = Dataset.get_by_name(ws, name='zombies')
zombies = dataset.to_pandas_dataframe()

In [6]:
zombies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 838 entries, 0 to 837
Data columns (total 8 columns):
Age         838 non-null int64
Num_Fam     838 non-null int64
Ammo        838 non-null int64
Bounty      838 non-null float64
Human_1     838 non-null int64
Zone_2      838 non-null int64
Zone_3      838 non-null int64
Sex_male    838 non-null int64
dtypes: float64(1), int64(7)
memory usage: 52.5 KB


In [7]:
zom = zombies[['Human_1','Zone_2']]

zombies['Human_1'] = zombies['Human_1'].astype('category')
zombies['Zone_2'] = zombies['Zone_2'].astype('category')
zombies['Zone_3'] = zombies['Zone_3'].astype('category')
zombies['Sex_male'] = zombies['Sex_male'].astype('category')

zombies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 838 entries, 0 to 837
Data columns (total 8 columns):
Age         838 non-null int64
Num_Fam     838 non-null int64
Ammo        838 non-null int64
Bounty      838 non-null float64
Human_1     838 non-null category
Zone_2      838 non-null category
Zone_3      838 non-null category
Sex_male    838 non-null category
dtypes: category(4), float64(1), int64(3)
memory usage: 30.0 KB


In [8]:
zom_log = zombies.copy()
for col in zom_log:
    if zom_log[col].dtype.name != 'category':
        zom_log[col] = zom_log[col].astype('float64').replace(0.0, 0.01)
        zom_log[col] = np.log(zom_log[col])
        
zom_log.head()

# zom_log.hist(figsize = (12,8),color="green");

# **Yikes! Since we are not able to coerce our data into a normal distribution, we'll just try to scale it instead.**

## Splitting our dataset into a training set and a test set.

X = zom_log[['Age','Num_Fam','Ammo','Human_1','Zone_2','Zone_3','Sex_male']]

y = zom_log['Bounty']

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)



## Build a model with each categorical variable as a predictor

import statsmodels.api as sm

ols_model = sm.OLS(y,X.astype(float)).fit()
ols_model.summary()



0,1,2,3
Dep. Variable:,Bounty,R-squared (uncentered):,0.868
Model:,OLS,Adj. R-squared (uncentered):,0.867
Method:,Least Squares,F-statistic:,779.6
Date:,"Wed, 05 May 2021",Prob (F-statistic):,0.0
Time:,09:05:29,Log-Likelihood:,-1246.2
No. Observations:,838,AIC:,2506.0
Df Residuals:,831,BIC:,2539.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Age,0.1115,0.044,2.536,0.011,0.025,0.198
Num_Fam,0.1861,0.016,11.574,0.000,0.155,0.218
Ammo,0.3990,0.024,16.598,0.000,0.352,0.446
Human_1,0.3238,0.096,3.386,0.001,0.136,0.512
Zone_2,-0.7708,0.117,-6.584,0.000,-1.001,-0.541
Zone_3,-0.9744,0.105,-9.267,0.000,-1.181,-0.768
Sex_male,0.0462,0.095,0.484,0.628,-0.141,0.233

0,1,2,3
Omnibus:,867.454,Durbin-Watson:,2.04
Prob(Omnibus):,0.0,Jarque-Bera (JB):,37600.281
Skew:,-4.98,Prob(JB):,0.0
Kurtosis:,34.268,Cond. No.,38.2


In [9]:
r2 = ols_model.rsquared
run.log("r2",r2)

In [10]:
r2_adj = ols_model.rsquared_adj
run.log("r2_adj",r2_adj)

In [11]:
metrics = run.get_metrics()
for key in metrics.keys():
        print(key, metrics.get(key))

In [12]:
print(metrics)

{}


In [13]:
preds = ols_model.predict()

In [None]:
# Save the trained model in the outputs folder
os.makedirs('outputs', exist_ok=True)
joblib.dump(value=ols_model, filename='outputs/zombies.pkl')

run.complete()

In [None]:
from azureml.core import Model

# Register the model
run.register_model(model_path='outputs/zombies.pkl', model_name='zombies_model',properties={'r2': run.get_metrics()['r2'], 'r2_adj': run.get_metrics()['r2_adj']})

# List registered models
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
#     for tag_name in model.tags:
#         tag = model.tags[tag_name]
#         print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

In [None]:
preds

In [None]:
print(X_test)

In [None]:

dataset = dataset.register(workspace = ws,
                           name = 'x_test_dataset',
                           create_new_version=True)

In [None]:
X_test.to_csv('xtest.csv',index=True)