# Automated ML


In [1]:
!pip install xgboost==0.90



In [2]:
import json
import sys
import os
import numpy as np
import pandas as pd
import shutil
import joblib
import requests

from sklearn.model_selection import train_test_split

from TrainCovid19Infections import clean_data

from azureml.core import Workspace, Experiment, Environment, ScriptRunConfig, Dataset
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.model import InferenceConfig, Model
from azureml.core.webservice import AciWebservice, Webservice
from azureml.core.run import Run
from azureml.core.compute_target import ComputeTargetException

from azureml.widgets import RunDetails

from azureml.train.automl import AutoMLConfig

from azureml.data.dataset_factory import TabularDatasetFactory

from azureml.train.automl import constants

## Initialize Workspace

In [3]:
# Get current workspace from config
ws = Workspace.from_config()
    
ws.get_details()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

project_folder = './capstone-project'
# choose a name for experiment
experiment_name = 'Covid19VaccinationExperiment'
experiment=Experiment(ws, experiment_name)
experiment

Workspace name: wsptest
Azure region: eastus2
Subscription id: c04b3d3f-4994-454d-96ff-aa3f2050b57f
Resource group: testingMLFunctionnalities


Name,Workspace,Report Page,Docs Page
Covid19VaccinationExperiment,wsptest,Link to Azure Machine Learning studio,Link to Documentation


## Create Cluster

Get cluster if it exists else create one

In [4]:
# Create compute cluster
cpu_cluster_name = "Covid19Cluster"
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('A cluster with the same name already exists. If you are trying to create a new one please use a new cluster name')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',max_nodes=4,identity_type="SystemAssigned")
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)
cpu_cluster.wait_for_completion(show_output=True)
# Get a detailed status for the current cluster. 
print(cpu_cluster.get_status().serialize())

A cluster with the same name already exists. If you are trying to create a new one please use a new cluster name
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2021-03-21T16:22:03.834000+00:00', 'errors': None, 'creationTime': '2021-03-21T03:05:29.360274+00:00', 'modifiedTime': '2021-03-21T03:05:45.676021+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_D2_V2'}


## Dataset

### Overview

I Chose a COVID-19 World Vaccination Dataset that holds a track of the world vaccination including the name of the country, Which vaccines have been used by country, and how many have been vaccinated by Country.

Since the covid-19 vaccination is among the hottest subjects in the world, and as a member of the society being interested in such statistic calculations can help further scientists or even regular people to better understand the global effect of this vaccine all over the world.

I used Kaggle's API to download the Dataset.

TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [5]:
# Try to load the dataset from the Workspace. Otherwise, create it from the file
found = False
key = "Covid19InfectionsDataset2"
description_text = "Covid19 Vaccination DataSet from Github"
datastore = ws.get_default_datastore()
datastore.upload_files(files = ['./github/owid-covid-data.csv'],
                       target_path ='train-dataset/tabular/',
                       overwrite = True,
                       show_progress = True)
if key in ws.datasets.keys(): 
    found = True
    dataset = ws.datasets[key] 

if not found:
    original_path = 'https://covid.ourworldindata.org/data/owid-covid-data.csv'
    ds = TabularDatasetFactory.from_delimited_files(original_path, infer_column_types=True, separator=',', header=True)
    
    
    #ds = Dataset.Tabular.from_delimited_files(path = [(datastore, 'train-dataset/tabular/country_vaccinations.csv')])
    
    #Register Dataset in Workspace
    dataset = ds.register(workspace=ws,
                          name=key,
                          description=description_text)

df = dataset.to_pandas_dataframe().fillna(0)
df.describe()

Uploading an estimated of 1 files
Uploading ./github/owid-covid-data.csv
Uploaded ./github/owid-covid-data.csv, 1 files out of an estimated total of 1
Uploaded 1 files


Unnamed: 0,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,...,median_age,aged_65_older,aged_70_older,gdp_per_capita,cardiovasc_death_rate,diabetes_prevalence,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
count,76215.0,76215.0,76215.0,76215.0,76215.0,76215.0,76215.0,76215.0,76215.0,76215.0,...,76215.0,76215.0,76215.0,76215.0,76215.0,76215.0,76215.0,76215.0,76215.0,76215.0
mean,664429.5,5129.566568,5063.11263,17076.1,114.393531,113.178646,7873.626153,66.505456,65.390387,160.78255,...,27.700474,7.880594,5.023743,17411.789829,236.141847,7.253194,23.489918,2.548185,69.505214,0.667534
std,4698197.0,32191.869926,31633.387699,108136.1,674.709265,652.256822,15662.046239,168.372816,141.179154,315.4216,...,12.419133,6.482936,4.370634,19609.485356,133.738715,4.294111,33.329193,2.519296,17.534691,0.246698
min,0.0,-74347.0,-6223.0,0.0,-1918.0,-232.143,0.0,-2153.437,-276.825,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,673.5,1.0,4.429,10.0,0.0,0.0,123.578,0.061,0.741,1.603,...,19.6,3.008,1.783,2896.913,140.448,4.61,0.0,0.7,66.47,0.555
50%,7821.0,55.0,63.0,130.0,1.0,0.857,1036.897,5.861,7.536,19.611,...,29.0,5.44,3.212,10727.146,233.07,6.93,0.0,2.0,74.16,0.737
75%,87043.0,662.0,666.7855,1710.0,11.0,11.571,7189.7665,55.94,61.936,139.17,...,38.0,13.26,8.353,25063.846,318.949,9.75,47.782,3.6,78.49,0.828
max,122813800.0,880902.0,739564.429,2709639.0,17895.0,14424.0,148592.506,8652.658,2648.773,2327.774,...,48.2,27.049,18.493,116935.6,724.417,30.53,98.999,13.8,86.75,0.957


In [6]:
# preview the first 10 rows of the dataset
df.head(10)

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
0,AFG,Asia,Afghanistan,2020-02-24,1.0,1.0,0.0,0.0,0.0,0.0,...,1803.987,0,597.029,9.59,0,0,37.746,0.5,64.83,0.511
1,AFG,Asia,Afghanistan,2020-02-25,1.0,0.0,0.0,0.0,0.0,0.0,...,1803.987,0,597.029,9.59,0,0,37.746,0.5,64.83,0.511
2,AFG,Asia,Afghanistan,2020-02-26,1.0,0.0,0.0,0.0,0.0,0.0,...,1803.987,0,597.029,9.59,0,0,37.746,0.5,64.83,0.511
3,AFG,Asia,Afghanistan,2020-02-27,1.0,0.0,0.0,0.0,0.0,0.0,...,1803.987,0,597.029,9.59,0,0,37.746,0.5,64.83,0.511
4,AFG,Asia,Afghanistan,2020-02-28,1.0,0.0,0.0,0.0,0.0,0.0,...,1803.987,0,597.029,9.59,0,0,37.746,0.5,64.83,0.511
5,AFG,Asia,Afghanistan,2020-02-29,1.0,0.0,0.143,0.0,0.0,0.0,...,1803.987,0,597.029,9.59,0,0,37.746,0.5,64.83,0.511
6,AFG,Asia,Afghanistan,2020-03-01,1.0,0.0,0.143,0.0,0.0,0.0,...,1803.987,0,597.029,9.59,0,0,37.746,0.5,64.83,0.511
7,AFG,Asia,Afghanistan,2020-03-02,1.0,0.0,0.0,0.0,0.0,0.0,...,1803.987,0,597.029,9.59,0,0,37.746,0.5,64.83,0.511
8,AFG,Asia,Afghanistan,2020-03-03,2.0,1.0,0.143,0.0,0.0,0.0,...,1803.987,0,597.029,9.59,0,0,37.746,0.5,64.83,0.511
9,AFG,Asia,Afghanistan,2020-03-04,4.0,2.0,0.429,0.0,0.0,0.0,...,1803.987,0,597.029,9.59,0,0,37.746,0.5,64.83,0.511


In [7]:
# Use the clean_data function to clean your data.
x, y = clean_data(df)
data = pd.concat([x,y],axis=1)
data.head()

Unnamed: 0,date,total_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,...,iso_code_ZMB,iso_code_ZWE,continent_0,continent_Africa,continent_Asia,continent_Europe,continent_North America,continent_Oceania,continent_South America,new_cases
0,737479,1.0,0.0,0.0,0.0,0.0,0.026,0.026,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0
1,737480,1.0,0.0,0.0,0.0,0.0,0.026,0.0,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0
2,737481,1.0,0.0,0.0,0.0,0.0,0.026,0.0,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0
3,737482,1.0,0.0,0.0,0.0,0.0,0.026,0.0,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0
4,737483,1.0,0.0,0.0,0.0,0.0,0.026,0.0,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0


In [8]:
# Split data into train and test sets.
training_data,validation_data = train_test_split(data,test_size = 0.3,random_state = 42,shuffle=True)

In [9]:
# Create necessary folders
if "automl_training" not in os.listdir():
    os.mkdir("./automl_training")
if "data" not in os.listdir():
    os.mkdir("./data")
if "outputs" not in os.listdir():
    os.mkdir("./outputs")
if "training" not in os.listdir():
    os.mkdir("./training")
# store training_dataset into it using datastore
script_folder = './automl_training/'    
os.makedirs(script_folder, exist_ok=True)
shutil.copy('TrainCovid19Infections.py', script_folder)
project_folder = './pipeline-project'

## AutoML Configuration
TODO: Explain why you chose the automl settings and cofiguration you used below.
The settings used below refers to a classification task within a number of settings chosen based on the existing workspace and cluster configuration restrictions 

In [10]:
#convert the training dataset to a CSV file and store it under the training folder
training_data.to_csv('training/training_data.csv')
#Create an experiment for the AutoML testing script
exp = Experiment(workspace=ws, name="Covid19AutoMlExperiment")

# Get the dataset from the data folder
datastore.upload_files(files = ['training/training_data.csv'],
                       target_path ='./data/',
                       overwrite = True,
                       show_progress = True)
training_dataset = TabularDatasetFactory.from_delimited_files(path=[(datastore,('./data/training_data.csv'))])
#training_dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, 'train-dataset/tabular/country_vaccinations.csv')])
automl_settings = {
    "n_cross_validations": 5,
    "primary_metric": 'accuracy',
    "enable_early_stopping": True,
    "experiment_timeout_hours": 1.0,
    "max_concurrent_iterations": 3,
}
automl_config = AutoMLConfig(task = 'classification',
                             compute_target = cpu_cluster,
                             training_data = training_dataset,
                             label_column_name = 'new_cases',
                             featurization= 'auto',
                             path=project_folder,
                              model_explainability=True,
                             debug_log = "Covid_automl_errors.log",
                             **automl_settings)

Uploading an estimated of 1 files
Uploading training/training_data.csv
Uploaded training/training_data.csv, 1 files out of an estimated total of 1
Uploaded 1 files


In [11]:
# Experiment Submission
tag = {"Covid19Infections": "Capstone project: Covid19 AutoML Experiment"}
remote_run = experiment.submit(automl_config,tags=tag, show_output=True)

Running on remote.
No run_configuration provided, running on Covid19Cluster with default configuration
Running on remote compute: Covid19Cluster
Parent Run ID: AutoML_4156e39b-3d8d-4306-aa43-4829b9e31daa

Current status: DatasetEvaluation. Gathering dataset statistics.
Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

************************************************

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?
Supervised Learning in general is based on labled 

In [12]:
RunDetails(remote_run).show()
remote_run.wait_for_completion(show_output=True)

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…



****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

****************************************************************************************************

TYPE:         High cardinality feature detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and no high cardinality features were detected.
              Learn more abo

{'runId': 'AutoML_4156e39b-3d8d-4306-aa43-4829b9e31daa',
 'target': 'Covid19Cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-03-21T20:21:16.947822Z',
 'endTimeUtc': '2021-03-21T21:52:32.30952Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '5',
  'target': 'Covid19Cluster',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"749083bc-e829-4d34-a67b-46f648331105\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetDatastoreFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"datastores\\\\\\": [{\\\\\\"datastoreName\\\\\\": \\\\\\"workspaceblobstore\\\\\\", \\\\\\"path\\\\\\": \\\\\\"./data/training_data.csv\\\\\\", \\\\\\"resourceGroup\\\\\\": \\\\\\"testingMLFunctionnalities\\\\\\", \\\\\\"subscription\\\\\\": \\\\\\"c04b3d3f-4994-454d-96ff-aa3f2050b57f\\\\\\"

In [13]:
# List best models of HyperDrive Run and AutoML Run to compare the accuracy of the models and choose the best among them to deploy
for model in Model.list(ws):
    print(model.name)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print('\t',tag_name,':',tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print('\t',prop_name,':',prop)
    print("\n")

## Best Model

explaining_model_run_id = remote_run.id
print(explaining_model_run_id)
explaining_model_run = Run(experiment=experiment, run_id=explaining_model_run_id)
explaining_model_run.wait_for_completion()

In [14]:
# Retrieve and save best model.
best_automl_run, best_automl_model = remote_run.get_output()

In [15]:
# Get the metrics of the best selected run
best_run_metrics = best_automl_run.get_metrics()

# Print all metrics of the best run model
for metric_name in best_run_metrics:
    metric = best_run_metrics[metric_name]
    print(metric_name, metric)
    
print(best_automl_model._final_estimator)

norm_macro_recall 0.9974493746686296
average_precision_score_macro 0.9999723432258383
average_precision_score_weighted 0.9999827239436186
f1_score_weighted 0.9990440422376144
matthews_correlation 0.9975225530390672
AUC_micro 0.9999864943798314
precision_score_micro 0.9990440487347705
recall_score_weighted 0.9990440487347705
accuracy 0.9990440487347705
precision_score_macro 0.998797941330779
weighted_accuracy 0.9992440229190299
AUC_weighted 0.9999830165697053
average_precision_score_micro 0.9999865950831733
log_loss 0.02218153679203686
recall_score_micro 0.9990440487347705
balanced_accuracy 0.9987246873343147
recall_score_macro 0.9987246873343147
f1_score_macro 0.9987611131021031
f1_score_micro 0.9990440487347705
precision_score_weighted 0.9990442868982194
AUC_macro 0.9999830165697052
accuracy_table aml://artifactId/ExperimentRun/dcid.AutoML_4156e39b-3d8d-4306-aa43-4829b9e31daa_21/accuracy_table
confusion_matrix aml://artifactId/ExperimentRun/dcid.AutoML_4156e39b-3d8d-4306-aa43-4829b9e3

In [16]:
best_automl_run

Experiment,Id,Type,Status,Details Page,Docs Page
Covid19VaccinationExperiment,AutoML_4156e39b-3d8d-4306-aa43-4829b9e31daa_21,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [17]:
joblib.dump(best_automl_model, filename="./outputs/automl-model.pkl")

['./outputs/automl-model.pkl']

In [18]:
arr = os.listdir('./outputs/')
print(arr)

['automl-model.pkl', 'model.joblib']


In [19]:
from azureml.core.model import Model
model = Model.list(ws)
print (model)

[]


In [20]:
# register the best model
import sklearn

from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration
best_model_registered = remote_run.register_model(
                                           
                                            model_name = "Covid19-automl-model",
                                            tags={'Area': "Pandemic", 'Type': "Classification",'Method of execution':'Auto ML'},
                                            )
#print(best_model.name, best_model.id, sep='\t')
print(best_model_registered)

Model(workspace=Workspace.create(name='wsptest', subscription_id='c04b3d3f-4994-454d-96ff-aa3f2050b57f', resource_group='testingMLFunctionnalities'), name=Covid19-automl-model, id=Covid19-automl-model:2, version=2, tags={'Area': 'Pandemic', 'Type': 'Classification', 'Method of execution': 'Auto ML'}, properties={})


## Model Deployment

As the best model coming from AutoML run has better accuracy than the one coming from the HyperDrive run, I deploy it in the cell below, register it, create an inference config and deploy the model as a web service.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [21]:
with open('score.py') as f:
    print(f.read())

# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
import json
import logging
import os
import pickle
import numpy as np
import pandas as pd
import joblib

import azureml.automl.core
from azureml.automl.core.shared import logging_utilities, log_server
from azureml.telemetry import INSTRUMENTATION_KEY

from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType


input_sample = pd.DataFrame({"Column1": pd.Series([0], dtype="int64"), "date": pd.Series([0], dtype="int64"), "total_cases": pd.Series([0.0], dtype="float64"), "new_cases_smoothed": pd.Series([0.0], dtype="float64"), "total_deaths": pd.Series([0.0], dtype="float64"), "new_deaths": pd.Series([0.0], dtype="float64"), "n

In [22]:
from azureml.automl.core.shared import constants

model = Model(ws, 'Covid19-automl-model')

environment = best_automl_run.get_environment()
best_automl_run.download_file('outputs/scoring_file_v_1_0_0.py', 'score.py')
best_automl_run.download_file(constants.CONDA_ENV_FILE_PATH, 'environment.yml')


inference_config = InferenceConfig(entry_script='score.py', environment=environment)

deployment_config = AciWebservice.deploy_configuration(
                                                       cpu_cores=1,
                                                       memory_gb=1,
                                                       description='Covid19 new cases prediction',
                                                       auth_enabled=True,
                                                       enable_app_insights= True,
                                                       collect_model_data = True)

service = Model.deploy(workspace=ws,
                       name='aci-covid19-service',
                       models=[model],
                       inference_config=inference_config,
                       deployment_config=deployment_config,
                       overwrite=True)

service.wait_for_deployment(show_output=True)
print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-03-21 21:57:00+00:00 Creating Container Registry if not exists.
2021-03-21 21:57:00+00:00 Registering the environment.
2021-03-21 21:57:01+00:00 Use the existing image.
2021-03-21 21:57:01+00:00 Generating deployment configuration.
2021-03-21 21:57:02+00:00 Submitting deployment to compute..
2021-03-21 21:57:05+00:00 Checking the status of deployment aci-covid19-service..
2021-03-21 22:00:21+00:00 Checking the status of inference endpoint aci-covid19-service.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [23]:
print("Key " + service.get_keys()[0])
print("Swagger URI : "+service.swagger_uri)
print("Scoring URI : "+service.scoring_uri)

Key egdA59jBSgeCRegEldYCdVKYpShm66qH
Swagger URI : http://e6396f0e-30ea-47fa-99bc-76dc924015a3.eastus2.azurecontainer.io/swagger.json
Scoring URI : http://e6396f0e-30ea-47fa-99bc-76dc924015a3.eastus2.azurecontainer.io/score


TODO: In the cell below, send a request to the web service you deployed to test it.

In [24]:
#Importing the dataset for testing 
x_df = validation_data.to_pandas_dataframe()
x_df.head(10)

Unnamed: 0,date,total_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,...,iso_code_ZMB,iso_code_ZWE,continent_0,continent_Africa,continent_Asia,continent_Europe,continent_North America,continent_Oceania,continent_South America,new_cases
0,737479,1.0,0.0,0.0,0.0,0.0,0.03,0.03,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0
1,737480,1.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0
2,737481,1.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0
3,737482,1.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0
4,737483,1.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0
5,737484,1.0,0.14,0.0,0.0,0.0,0.03,0.0,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0
6,737485,1.0,0.14,0.0,0.0,0.0,0.03,0.0,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0
7,737486,1.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0
8,737487,2.0,0.14,0.0,0.0,0.0,0.05,0.03,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0
9,737488,4.0,0.43,0.0,0.0,0.0,0.1,0.05,0.01,0.0,...,0,0,0,0,1,0,0,0,0,1


In [None]:
y_df = x_df["new_cases"]
y_df.head(10)

In [25]:
x_df.drop(['new_cases','date'], inplace=True, axis=1)
x_df.head(10)

Unnamed: 0,total_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,...,iso_code_ZAF,iso_code_ZMB,iso_code_ZWE,continent_0,continent_Africa,continent_Asia,continent_Europe,continent_North America,continent_Oceania,continent_South America
0,1.0,0.0,0.0,0.0,0.0,0.03,0.03,0.0,0.0,0.0,...,0,0,0,0,0,1,0,0,0,0
1,1.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,...,0,0,0,0,0,1,0,0,0,0
2,1.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,...,0,0,0,0,0,1,0,0,0,0
3,1.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,...,0,0,0,0,0,1,0,0,0,0
4,1.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,...,0,0,0,0,0,1,0,0,0,0
5,1.0,0.14,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,...,0,0,0,0,0,1,0,0,0,0
6,1.0,0.14,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,...,0,0,0,0,0,1,0,0,0,0
7,1.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,...,0,0,0,0,0,1,0,0,0,0
8,2.0,0.14,0.0,0.0,0.0,0.05,0.03,0.0,0.0,0.0,...,0,0,0,0,0,1,0,0,0,0
9,4.0,0.43,0.0,0.0,0.0,0.1,0.05,0.01,0.0,0.0,...,0,0,0,0,0,1,0,0,0,0


In [26]:
Covid19DataTesting= json.dumps({'data': x_df.to_dict(orient='records')})
print(Covid19DataTesting)

In [None]:
headers = {'Content-type': 'application/json'}
headers['Authorization'] = f'Bearer {service.get_keys()[0]}'
# Make the request and display the response
response = requests.post(service.scoring_uri, Covid19DataTesting, headers=headers)
print('Prediction :', response.text)

# Print original labels
print('True Values :', y_df.values)

TODO: In the cell below, print the logs of the web service and delete the service

In [None]:
print(service.get_logs())

In [None]:
#service.delete()
#cpu_cluster.delete()