This notebook loads the results of various Iris dataset experiments, calculates GPT-3 and kNN/logistic regression accuracies and compares them.

In [11]:
import json
from sklearn import datasets
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from collections import defaultdict

engines = ['ada', 'babbage', 'curie', 'davinci']

iris = datasets.load_iris()

transformed = 14*iris['data'] + 6
transformed = np.vectorize(round)(transformed)
y = iris.target

# Basic experiment

In [None]:
random_states = [88, 91, 93, 95, 97]
scores = defaultdict(list)
for state in random_states:
    with open(f'iris_results/basic experiment, no preamble, input-output'
                f' terminology, random_state {state}.json', 'r') as file:

        results = json.loads(file.read())

        for k in [3,5,7]:
            x_train, x_test, y_train, y_test = train_test_split(transformed, y,
                                            test_size=0.5, stratify=y,
                                            random_state=state)

            neigh = KNeighborsClassifier(n_neighbors=k)
            neigh.fit(x_train, y_train.reshape(-1, 1))

            scores[f'knn_{k}'].append(neigh.score(x_test, y_test.reshape(-1, 1)))

    sc = StandardScaler()
    x_train = sc.fit_transform(x_train)
    x_test = sc.transform(x_test)

    classifier = LogisticRegression(random_state = 0, solver='lbfgs',
                                    multi_class='auto')
    classifier.fit(x_train, y_train)

    scores['lr'].append(classifier.score(x_test, y_test.reshape(-1, 1)))


    y_test = [int(x) for x in list(y_test)]

    for engine in engines:
        accurate = [1 if x==y else 0
                    for x, y in zip(results[engine]['gpt_classification'],
                                    y_test)]
        scores[engine].append(sum(accurate)/len(accurate))


In [17]:
for key in scores.keys():
    print(key, np.mean(scores[key]))

knn_3 0.9653333333333334
knn_5 0.9573333333333333
knn_7 0.96
lr 0.9626666666666667
ada 0.8986666666666666
babbage 0.9306666666666666
curie 0.952
davinci 0.9573333333333334


In [25]:
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(x_train, y_train.reshape(-1, 1))

print(neigh.score(x_test, y_test.reshape(-1, 1)))


0.9733333333333334


  neigh.fit(x_train, y_train.reshape(-1, 1))


In [28]:
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

classifier = LogisticRegression(random_state = 0, solver='lbfgs', multi_class='auto')
classifier.fit(x_train, y_train)

classifier.score(x_test, y_test.reshape(-1, 1))

0.9733333333333334

In [5]:
y_test = [int(x) for x in list(y_test)]


In [22]:
for engine in engines:
    accurate = [1 if x==y else 0
                for x, y in zip(results[engine]['gpt_classification'],
                                y_test)]
    print(engine, sum(accurate)/len(accurate))

ada 0.9066666666666666
babbage 0.9333333333333333
curie 0.9333333333333333
davinci 0.9733333333333334


## Basic experiment, no preamble and bare numbers, random_state 88

In [6]:
with open('iris_results/basic experiment, no preamble and bare numbers, random_state 88.json', 'r') as file:
    results = json.loads(file.read())

for engine in engines:
    accurate = [1 if x==y else 0
                for x, y in zip(results[engine]['gpt_classification'],
                                y_test)]
    print(engine, sum(accurate)/len(accurate))

ada 0.8933333333333333
babbage 0.92
curie 0.92
davinci 0.9733333333333334


## Basic experiment, no preamble and bare numbers, two orders of magnitutde bigger, random_state 88

In [7]:
with open('iris_results/basic experiment, no preamble and bare numbers, two orders of magnitutde bigger, random_state 88.json', 'r') as file:
    results = json.loads(file.read())

for engine in engines:
    accurate = [1 if x==y else 0
                for x, y in zip(results[engine]['gpt_classification'],
                                y_test)]
    print(engine, sum(accurate)/len(accurate))

ada 0.84
babbage 0.8666666666666667
curie 0.9733333333333334
davinci 0.9066666666666666
