In [1]:
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import normalize
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import scale
from sklearn.datasets import load_boston
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import load_iris
from sklearn.metrics import r2_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns


In [2]:
dataset = load_iris()
data = dataset['data']
target = dataset['target']

lb = LabelBinarizer()
target = lb.fit_transform(target)

X_train, X_test, y_train, y_test = \
    train_test_split(data, target, test_size=.25, random_state=13)

X_train.shape, y_train.shape

((112, 4), (112, 3))

In [3]:
n = X_train.shape[1]
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train).astype(np.float32)

In [4]:
X_train_scaled[0]

array([ 0.46113643, -2.02367187,  0.45434329,  0.45153624], dtype=float32)

In [5]:
n_features = X_train.shape[1]
n_labels = y_train.shape[1]
n_features, n_labels

(4, 3)

In [6]:
tf.reset_default_graph()

with tf.name_scope("input"):
    # define variables
    X = tf.placeholder(tf.float32, shape=[None, n_features], name='x')
    y = tf.placeholder(tf.float32, shape=[None, n_labels], name='y')

with tf.name_scope("regression"):
    # define variables
    W = tf.Variable(tf.truncated_normal([n_features,n_labels], dtype=tf.float32, stddev=.01), name='weights')
    b = tf.Variable(tf.zeros([n_labels], dtype=tf.float32), name='biases')
    
with tf.name_scope("operations"):
    # regular calculation of pred, similar to linear regression
    logits = tf.add(tf.matmul(X ,W), b)
    
    # softmax will convert to a probability distribution
    softmax = tf.nn.softmax(logits)
        
    # compute regular error functions
    # cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
    # loss = tf.losses.log_loss(softmax, y)
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(cross_entropy)
        
    # result, needs to be 0 or 1
    # correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(softmax,1))
    # y_pred = tf.argmax(softmax, 1)
    
    # define optimization
    learning_rate = tf.placeholder(tf.float32)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(loss)


In [7]:
# tensorboard
# creat a summary for x and y
tf.summary.scalar("loss", loss)
summary_op = tf.summary.merge_all()

# no need to specify graph
writer = tf.summary.FileWriter('./example', graph=tf.get_default_graph()) 

In [8]:
# run it
epochs=20000
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
losses = []
for epoch in range(1, epochs):
    # loss, summary  = sess.run([train_op, summary_op], feed_dict)
    feed_dict = {
        X: X_train_scaled,
        y: y_train,
        learning_rate: .02
    }
    # run
    _, summary, loss_ = sess.run([train_op, summary_op, loss], feed_dict)

    losses.append(loss_)
    
    # log results
    writer.add_summary(summary)
    
    # log to console
    if epoch%(epochs/10) == 0:
        curr_loss, curr_W, curr_b, pred = sess.run([loss, W, b, softmax], feed_dict)
        # print(curr_loss)
        acc = accuracy_score(np.argmax(y_train, 1), np.argmax(pred, 1))
        print("Acc: {:.2f}, loss: {:.2f} ".format(acc, curr_loss))


Acc: 0.96, loss: 0.19 
Acc: 0.96, loss: 0.14 
Acc: 0.98, loss: 0.11 
Acc: 0.98, loss: 0.10 
Acc: 0.98, loss: 0.09 
Acc: 0.98, loss: 0.08 
Acc: 0.98, loss: 0.07 
Acc: 0.98, loss: 0.07 
Acc: 0.98, loss: 0.07 


In [9]:
np.argmax(pred, 1), np.argmax(y_train, 1)

(array([1, 0, 0, 2, 1, 2, 2, 2, 0, 1, 0, 1, 0, 1, 2, 2, 0, 1, 0, 1, 2, 2, 0,
        0, 2, 1, 2, 2, 0, 2, 1, 1, 0, 0, 0, 1, 1, 1, 2, 2, 1, 2, 2, 2, 0, 2,
        1, 0, 1, 2, 1, 0, 0, 1, 1, 0, 1, 2, 0, 2, 1, 0, 2, 0, 0, 0, 1, 0, 1,
        1, 2, 2, 0, 2, 0, 1, 1, 2, 0, 2, 0, 1, 0, 2, 2, 0, 1, 1, 2, 1, 2, 2,
        0, 0, 1, 1, 0, 0, 0, 0, 2, 2, 0, 1, 1, 1, 2, 2, 1, 0, 1, 1]),
 array([1, 0, 0, 2, 1, 2, 2, 2, 0, 1, 0, 1, 0, 1, 1, 2, 0, 1, 0, 1, 2, 2, 0,
        0, 2, 1, 2, 2, 0, 2, 1, 1, 0, 0, 0, 1, 1, 1, 2, 2, 1, 2, 2, 2, 0, 2,
        1, 0, 1, 2, 1, 0, 0, 1, 1, 0, 1, 2, 0, 2, 1, 0, 2, 0, 0, 0, 1, 0, 1,
        1, 2, 2, 0, 1, 0, 1, 1, 2, 0, 2, 0, 1, 0, 2, 2, 0, 1, 1, 2, 1, 2, 2,
        0, 0, 1, 1, 0, 0, 0, 0, 2, 2, 0, 1, 1, 1, 2, 2, 1, 0, 1, 1]))

In [10]:
curr_W, curr_b

(array([[-1.85281742,  1.35762477,  0.4971602 ],
        [ 1.71766376, -0.45415202, -1.2657181 ],
        [-3.12245345, -0.97211236,  4.0975709 ],
        [-2.97668934, -1.01792395,  4.00237417]], dtype=float32),
 array([-0.39082167,  3.61333537, -3.22252488], dtype=float32))

In [11]:
np.bincount(pred.astype(np.int32).reshape(-1))
pred[:10], y_train[:10]

(array([[  5.85038151e-06,   7.34984934e-01,   2.65009165e-01],
        [  9.94616210e-01,   5.38385287e-03,   1.94919619e-11],
        [  9.97449815e-01,   2.55013467e-03,   5.94321120e-12],
        [  1.01948153e-05,   3.11950892e-01,   6.88038945e-01],
        [  5.10007283e-03,   9.78581131e-01,   1.63187236e-02],
        [  2.39182896e-07,   9.06806588e-02,   9.09319103e-01],
        [  9.80332970e-06,   1.47161067e-01,   8.52829099e-01],
        [  4.87663776e-09,   1.55191869e-03,   9.98448014e-01],
        [  9.97036219e-01,   2.96377414e-03,   9.31677478e-12],
        [  1.97491180e-02,   9.79146302e-01,   1.10456068e-03]], dtype=float32),
 array([[0, 1, 0],
        [1, 0, 0],
        [1, 0, 0],
        [0, 0, 1],
        [0, 1, 0],
        [0, 0, 1],
        [0, 0, 1],
        [0, 0, 1],
        [1, 0, 0],
        [0, 1, 0]]))

In [12]:
feed_dict = {
    X: scaler.transform(X_train),
    y: y_train
}

curr_loss, pred = sess.run([loss, softmax], feed_dict)
acc = accuracy_score(np.argmax(y_train, 1), np.argmax(pred, 1))
print("Acc: {:.4f}, loss: {:.2f} ".format(acc, curr_loss))

feed_dict = {
    X: scaler.transform(X_test),
    y: y_test
}
curr_loss, pred = sess.run([loss, softmax], feed_dict)
acc = accuracy_score(np.argmax(y_test, 1), np.argmax(pred, 1))
print("Acc: {:.4f}, loss: {:.2f} ".format(acc, curr_loss))


Acc: 0.9821, loss: 0.07 
Acc: 0.9737, loss: 0.09 


# In scikit-learn

In [13]:
dataset = load_iris()
data = dataset['data']
target = dataset['target']

X_train, X_test, y_train, y_test = \
    train_test_split(data, target, test_size=.25, random_state=13)

X_train.shape, y_train.shape

((112, 4), (112,))

In [36]:
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
clf = SVC(kernel='linear')
clf.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [37]:
pred = clf.predict(X_train)
acc = accuracy_score(y_train, pred)
print("Acc: {:.4f}".format(acc))


Acc: 0.9821


In [38]:
y_train, pred

(array([1, 0, 0, 2, 1, 2, 2, 2, 0, 1, 0, 1, 0, 1, 1, 2, 0, 1, 0, 1, 2, 2, 0,
        0, 2, 1, 2, 2, 0, 2, 1, 1, 0, 0, 0, 1, 1, 1, 2, 2, 1, 2, 2, 2, 0, 2,
        1, 0, 1, 2, 1, 0, 0, 1, 1, 0, 1, 2, 0, 2, 1, 0, 2, 0, 0, 0, 1, 0, 1,
        1, 2, 2, 0, 1, 0, 1, 1, 2, 0, 2, 0, 1, 0, 2, 2, 0, 1, 1, 2, 1, 2, 2,
        0, 0, 1, 1, 0, 0, 0, 0, 2, 2, 0, 1, 1, 1, 2, 2, 1, 0, 1, 1]),
 array([1, 0, 0, 2, 1, 2, 2, 2, 0, 1, 0, 1, 0, 1, 2, 2, 0, 1, 0, 1, 2, 2, 0,
        0, 2, 1, 2, 2, 0, 2, 1, 1, 0, 0, 0, 1, 1, 1, 2, 2, 1, 2, 2, 2, 0, 2,
        1, 0, 1, 2, 1, 0, 0, 1, 1, 0, 1, 2, 0, 2, 1, 0, 2, 0, 0, 0, 1, 0, 1,
        1, 2, 2, 0, 2, 0, 1, 1, 2, 0, 2, 0, 1, 0, 2, 2, 0, 1, 1, 2, 1, 2, 2,
        0, 0, 1, 1, 0, 0, 0, 0, 2, 2, 0, 1, 1, 1, 2, 2, 1, 0, 1, 1]))

In [39]:
pred = clf.predict(X_test)
acc = accuracy_score(y_test, pred)
print("Acc: {:.4f}".format(acc))


Acc: 1.0000


In [40]:
X_train[0], clf.coef_, clf.intercept_

(array([ 6.2,  2.2,  4.5,  1.5]),
 array([[-0.25163786,  0.47811307, -0.85347203, -0.61651333],
        [-0.02034588,  0.18311289, -0.52899281, -0.30518816],
        [ 0.78613759,  0.46053208, -2.02000442, -1.50339089]]),
 array([ 2.32700222,  1.54120035,  6.21300813]))

In [41]:
list(zip(curr_W[:, 0], clf.coef_[0, :])), list(zip(curr_b, clf.intercept_))

([(-1.8528174, -0.25163786049925418),
  (1.7176638, 0.4781130713458126),
  (-3.1224535, -0.8534720276259451),
  (-2.9766893, -0.61651332642178791)],
 [(-0.39082167, 2.3270022150828358),
  (3.6133354, 1.5412003479828156),
  (-3.2225249, 6.2130081288127048)])