In [1]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.27.0 to work with avz0521


In [5]:
from azureml.core import Experiment
from azureml.core import Datastore
from azureml.core import Dataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import joblib
import os

In [6]:
# Create a folder for the experiment files
experiment_folder = 'zombies_inline'
os.makedirs(experiment_folder, exist_ok=True)
print(experiment_folder, 'folder created')

# Create an Azure ML experiment in your workspace
experiment = Experiment(workspace=ws, name="zombies_test")

# Start logging data from the experiment, obtaining a reference to the experiment run
run = experiment.start_logging()
print("Starting experiment:", experiment.name)


zombies_inline folder created
Starting experiment: zombies_test


In [10]:
# dsr = ws.set_as_default(ws,zombies)

In [12]:
dataset = Dataset.get_by_name(ws, name='zombiesavz') #I changed the dataset name to what was in my Datasets
                                                    #Yours will be different I'm assuming
zombies = dataset.to_pandas_dataframe()

In [28]:
# zombies = pd.read_csv('bounty_pred.csv')[['Human','Zone','Age','Num_Fam','Bounty']]
# zombies.info()

In [14]:
zombies['Human'] = zombies['Human'].astype('category')
zombies['Zone'] = zombies['Zone'].astype('category')
zombies = pd.get_dummies(zombies) 

In [15]:
# zom = zombies[['Human_1','Zone_2']]

# zombies['Human'] = zombies['Human_1'].astype('category')
# zombies['Zone_2'] = zombies['Zone_2'].astype('category')
# zombies['Zone_3'] = zombies['Zone_3'].astype('category')
# zombies['Sex_male'] = zombies['Sex_male'].astype('category')

zombies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 11 columns):
Age           891 non-null int64
Num_Fam       891 non-null int64
Ammo          891 non-null int64
Bounty        891 non-null float64
Human_0       891 non-null uint8
Human_1       891 non-null uint8
Zone_1        891 non-null uint8
Zone_2        891 non-null uint8
Zone_3        891 non-null uint8
Sex_female    891 non-null uint8
Sex_male      891 non-null uint8
dtypes: float64(1), int64(3), uint8(7)
memory usage: 34.1 KB


In [16]:
zom_log = zombies.copy()
# for col in zom_log:
#     if zom_log[col].dtype.name != 'category':
#         zom_log[col] = zom_log[col].astype('float64').replace(0.0, 0.01)
#         zom_log[col] = np.log(zom_log[col])
        
zom_log.head()

# zom_log.hist(figsize = (12,8),color="green");

# **Yikes! Since we are not able to coerce our data into a normal distribution, we'll just try to scale it instead.**

## Splitting our dataset into a training set and a test set.

X = zom_log[['Age','Num_Fam','Human_0','Human_1','Zone_1','Zone_2','Zone_3']]

y = zom_log['Bounty']

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)



## Build a model with each categorical variable as a predictor

import statsmodels.api as sm

ols_model = sm.OLS(y,X.astype(float)).fit()
ols_model.summary()



0,1,2,3
Dep. Variable:,Bounty,R-squared:,0.421
Model:,OLS,Adj. R-squared:,0.418
Method:,Least Squares,F-statistic:,128.7
Date:,"Thu, 06 May 2021",Prob (F-statistic):,1.94e-102
Time:,01:50:10,Log-Likelihood:,-4500.5
No. Observations:,891,AIC:,9013.0
Df Residuals:,885,BIC:,9042.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Age,-0.1383,0.108,-1.282,0.200,-0.350,0.073
Num_Fam,7.3280,0.811,9.031,0.000,5.735,8.921
Human_0,19.0727,2.796,6.820,0.000,13.584,24.561
Human_1,26.2670,2.487,10.562,0.000,21.386,31.148
Zone_1,59.9016,3.050,19.641,0.000,53.916,65.887
Zone_2,-3.4652,2.659,-1.303,0.193,-8.684,1.754
Zone_3,-11.0966,1.871,-5.931,0.000,-14.768,-7.425

0,1,2,3
Omnibus:,1059.127,Durbin-Watson:,2.048
Prob(Omnibus):,0.0,Jarque-Bera (JB):,127386.91
Skew:,5.852,Prob(JB):,0.0
Kurtosis:,60.396,Cond. No.,7.88e+16


In [17]:
r2 = ols_model.rsquared
run.log("r2",r2)

In [18]:
r2_adj = ols_model.rsquared_adj
run.log("r2_adj",r2_adj)

In [19]:
metrics = run.get_metrics()
for key in metrics.keys():
        print(key, metrics.get(key))

r2 0.4209509153405405
r2_adj 0.4176794515854023


In [20]:
print(metrics)

{'r2': 0.4209509153405405, 'r2_adj': 0.4176794515854023}


In [21]:
preds = ols_model.predict()

In [22]:
# Save the trained model in the outputs folder
os.makedirs('outputs', exist_ok=True)
joblib.dump(value=ols_model, filename='outputs/zombies.pkl')

run.complete()

In [23]:
from azureml.core import Model

# Register the model
run.register_model(model_path='outputs/zombies.pkl', model_name='zombies_model',properties={'r2': run.get_metrics()['r2']})

# List registered models
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
#     for tag_name in model.tags:
#         tag = model.tags[tag_name]
#         print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

zombies_model version: 8
	 r2 : 0.4209509153405405


zombies_model version: 7
	 AUC : 0.7075929447249361
	 Accuracy : 0.7261904761904762


zombies_model version: 6
	 Accuracy : 0.7738095238095238
	 r2_adj : 0.813383152173913


zombies_model version: 5
	 r2 : 0.8678496063600782
	 r2_adj : 0.8667364261489117


zombies_model version: 4
	 AUC : 0.7295587174611501
	 Accuracy : 0.746031746031746


zombies_model version: 3
	 AUC : 0.7257556881515966
	 Accuracy : 0.7420634920634921


zombies_model version: 2
	 AUC : 0.7191659563307324
	 Accuracy : 0.7380952380952381


zombies_model version: 1
	 AUC : 0.7212641793980723
	 Accuracy : 0.7380952380952381




In [24]:
preds

array([ 12.26049486,  88.23950554,  11.57338747,  88.65454104,
         3.13399755,   4.65579439,  71.50368931,  37.0114286 ,
        26.09106256,  28.19296934,  29.27300141,  78.14459205,
         5.20917506,  46.54867766,   6.03924607,  15.19280734,
        44.33943873,  19.48150754,  11.01538835,  11.8500778 ,
        10.76542728,  18.09805586,  13.09518431,  82.29494708,
        36.18135759,  53.88130623,   4.65579439, 112.98582083,
        11.8500778 ,   4.65579439,  73.44052166,  90.17633788,
        11.8500778 ,   6.47672709,  82.4286738 ,  80.49184146,
        11.8500778 ,   5.07082989,  20.14188566,  20.56153961,
         9.77028184,  19.20019875,   4.65579439,  44.37078645,
        12.54180364,   4.65579439,  11.98380452,  11.8500778 ,
        19.31181465,  12.81387553,  43.64771289,   5.07082989,
        86.71770869,  26.11779183,  77.3099026 ,  82.84832775,
        19.89654304,   4.10241372,  44.09409611,  57.75035248,
         4.93248473,  80.91149541,  80.07680595,  44.06

In [25]:
print(X_test)

     Age  Num_Fam  Human_0  Human_1  Zone_1  Zone_2  Zone_3
331   45        0        1        0       1       0       0
700   18        1        0        1       1       0       0
748   19        1        1        0       1       0       0
751    6        1        0        1       0       0       1
481   24        0        1        0       0       1       0
..   ...      ...      ...      ...     ...     ...     ...
388   24        0        1        0       0       0       1
416   34        2        0        1       0       1       0
407    3        2        0        1       0       1       0
482   50        0        1        0       0       0       1
829   62        0        0        1       1       0       0

[268 rows x 7 columns]


In [26]:

dataset = dataset.register(workspace = ws,
                           name = 'x_test_dataset',
                           create_new_version=True)

In [27]:
X_test.to_csv('xtest.csv',index=True)

In [34]:
default_ds = ws.get_default_datastore()
for ds_name in ws.datastores:
    print(ds_name, "- Default =", ds_name == default_ds.name)

zombies - Default = True
azureml_globaldatasets - Default = False
workspaceblobstore - Default = False
workspacefilestore - Default = False


In [35]:
ws.set_default_datastore('zombies')