# Sample jupyter notebook

### Essential imports

In [1]:
import requests
import pandas as pd
from time import sleep
import json

### Prepare the _headers_ and base urls for the API request call with credentials


In [10]:
headers = {
    "X-IBM-Client-Id": "", 
    "X-IBM-Client-Secret": "",
    "Accept": "application/json"
}

base_url = "https://api.ibm.com"
autoai4text_api_url = f"{base_url}/autoai4text/run"

### Preliminary health check to make sure your credentials are OK and you can access the service!

You will get a "Welcome to the AutoAI for Text service!" message. If you receive an "Authorize" message, please check your credentials.

In [11]:
health_check_url = f"{autoai4text_api_url}/health_check"
print(health_check_url)
response = requests.get(health_check_url, headers=headers)
print(response.text)

https://dev.api.ibm.com/autoai4text/test/health_check
"Welcome to the AutoAI for Text service!"



### Inspect the data

In [12]:
trec_train = pd.read_csv("trec_train.csv")
trec_holdout = pd.read_csv("trec_holdout.csv")
print(f"Train dataset shape: {trec_train.shape}")
print(f"Holdout dataset shape: {trec_holdout.shape}")


Train dataset shape: (5452, 2)
Holdout dataset shape: (500, 2)


In [13]:
print("Train dataset's first rows")
trec_train.head()

Train dataset's first rows


Unnamed: 0,Sentence,Label
0,How did serfdom develop in and then leave Russ...,1
1,What films featured the character Popeye Doyle ?,2
2,How can I find a list of celebrities ' real na...,1
3,What fowl grabs the spotlight after the Chines...,2
4,What is the full form of .com ?,0


In [14]:
print("Holdout dataset's first rows")
trec_holdout.head()

Holdout dataset's first rows


Unnamed: 0,Sentence,Label
0,How far is it from Denver to Aspen ?,5
1,"What county is Modesto , California in ?",4
2,Who was Galileo ?,3
3,What is an atom ?,1
4,When did Hawaii become a state ?,5


### Preparing to run an experiment: set the parameters

The only required parameter is the train_file, but here we are setting all of them.

In [15]:
params = {
        "scorer_for_ranking": "accuracy",
        "time_to_train": 600,
        "models_to_include": 'cnn,bert,lstm',
        "max_evals_count": 30,
        "ranked_pipelines_count": 4,
        "text_column_index": 0,
        "label_column_index": 1,
        "label_type": 'int',
        "file_delimiter": ',',
        "validation_ratio": .1,
}

files = {
    "train_file":('trec_train.csv', open('trec_train.csv')),
    "holdout_file":('trec_holdout.csv', open('trec_holdout.csv'))
}

### Ready to run the experiment

In [16]:
run_experiment_url = f"{autoai4text_api_url}/experiment"
print(run_experiment_url)
response = requests.post(
    run_experiment_url,
    headers=headers,
    files=files,
    params=params
)
exp_id = response.json()['exp_id']
f'The experiment ID is : {exp_id}'

https://dev.api.ibm.com/autoai4text/test/experiment


'The experiment ID is : b5d3db6b1a0f4be09136b61a5f3c13c4'

### Make status checks against the above returned _exp\_id_

The below code snippet will make status check calls until the experiment results are available, i.e. the status code is 200 

In [17]:
experiment_status_url = f"{autoai4text_api_url}/experiment/{exp_id}"
print(experiment_status_url)

def get_results():
    return requests.get(experiment_status_url, headers=headers)

response = get_results()
status_code = response.status_code
sleep(600)
while status_code != 200:
    sleep(30)
    response = get_results()
    status_code = response.status_code

if status_code != 200:
    print('Unexpected error')
    print(response.text)
elif status_code == 200:
    print(f"The experiment has finished and {response.json()['message']}") 

https://dev.api.ibm.com/autoai4text/test/experiment/b5d3db6b1a0f4be09136b61a5f3c13c4
The experiment has finished and optimizer has learnt 4 pipelines.


### Inspect the results

#### _See the pipeline nodes_

In [18]:
results = response.json()['report']
print(f'results is a list containing {len(results)} pipeline reports')
print(f'The nodes of the best pipeline are:')
print(json.dumps(results[0]['pipeline_nodes'], indent=4))


results is a list containing 4 pipeline reports
The nodes of the best pipeline are:
{
    "TransformerStylePreprocessor": {
        "op": "TransformerStylePreprocessor",
        "hyperparameters": {
            "model_name": "BERT",
            "max_length": 200
        }
    },
    "TransformerStyleEmbedding": {
        "op": "TransformerStyleEmbedding"
    },
    "MLPClassifier": {
        "op": "MLPClassifier"
    }
}


#### _See the metric scores for validation and holdout:_

In [19]:
print(f'The validation metric scores of the best pipeline are:')
print(json.dumps(results[0]['validation_scores_report'], indent=4))
print(f'The holdout metric scores of the best pipeline are: ')
print(json.dumps(results[0]['holdout_scores_report'], indent=4))

The validation metric scores of the best pipeline are:
{
    "recall_weighted": 0.9597069597069597,
    "recall_macro": 0.9512714596569989,
    "recall_micro": 0.9597069597069597,
    "precision_weighted": 0.9614308020022307,
    "precision_macro": 0.9682677757971875,
    "precision_micro": 0.9597069597069597,
    "f1_weighted": 0.9596575093845483,
    "f1_macro": 0.9586368015924234,
    "f1_micro": 0.9597069597069597,
    "accuracy": 0.9597069597069597,
    "runtime_per_row_predict": 3.2076241769196785
}
The holdout metric scores of the best pipeline are: 
{
    "recall_weighted": 0.966,
    "recall_macro": 0.9188442563586395,
    "recall_micro": 0.966,
    "precision_weighted": 0.9674115730162726,
    "precision_macro": 0.9743734908443432,
    "precision_micro": 0.966,
    "f1_weighted": 0.9651073636775687,
    "f1_macro": 0.9402398821776453,
    "f1_micro": 0.966,
    "accuracy": 0.966,
    "runtime_per_row_predict": 1.9571967124938963
}


#### _See the predictions made by the best pipeline from the holdout dataset_

In [23]:
print(f'The predictions on the holdout dataset of the best pipeline are:')
print(results[0]['holdout_predicted_values'])

The predictions on the holdout dataset of the best pipeline are:
[5.0, 4.0, 3.0, 1.0, 5.0, 5.0, 3.0, 2.0, 1.0, 1.0, 4.0, 3.0, 5.0, 3.0, 5.0, 5.0, 2.0, 3.0, 1.0, 5.0, 3.0, 1.0, 4.0, 1.0, 1.0, 3.0, 1.0, 4.0, 4.0, 4.0, 5.0, 4.0, 1.0, 5.0, 5.0, 5.0, 4.0, 5.0, 5.0, 5.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 5.0, 1.0, 5.0, 3.0, 1.0, 3.0, 3.0, 1.0, 1.0, 1.0, 5.0, 4.0, 4.0, 5.0, 4.0, 3.0, 4.0, 2.0, 4.0, 3.0, 2.0, 1.0, 5.0, 4.0, 5.0, 5.0, 4.0, 3.0, 4.0, 1.0, 2.0, 5.0, 5.0, 3.0, 1.0, 5.0, 3.0, 5.0, 5.0, 1.0, 1.0, 3.0, 1.0, 4.0, 2.0, 1.0, 5.0, 5.0, 4.0, 4.0, 5.0, 1.0, 1.0, 5.0, 1.0, 3.0, 1.0, 3.0, 4.0, 1.0, 5.0, 2.0, 5.0, 4.0, 2.0, 1.0, 5.0, 3.0, 2.0, 3.0, 5.0, 2.0, 5.0, 4.0, 5.0, 5.0, 1.0, 3.0, 5.0, 3.0, 5.0, 5.0, 1.0, 5.0, 5.0, 3.0, 1.0, 2.0, 1.0, 4.0, 4.0, 3.0, 3.0, 1.0, 1.0, 2.0, 4.0, 2.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 1.0, 5.0, 3.0, 4.0, 4.0, 1.0, 4.0, 4.0, 1.0, 1.0, 5.0, 4.0, 2.0, 4.0, 5.0, 1.0, 4.0, 3.0, 5.0, 5.0, 5.0, 1.0, 4.0, 4.0, 4.0, 5.0, 2.0, 5.0, 4.0, 1.0, 4.0, 1.0, 2.0, 3.0, 3.0,