# Logistic Regression

In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import time

In [6]:
# Ładowanie zbioru danych
iris = load_iris()
iris_X, iris_y = iris.data[:-1, :], iris.target[:-1]
# Funkcja get_dummies() daje nam 
iris_y = pd.get_dummies(iris_y).values

# Podział na zbiór testowy i treningowy
trainX, testX, trainY, testY = train_test_split(iris_X, iris_y, test_size=0.33, random_state=42)


In [7]:
# Liczba ficzerów
n_features = trainX.shape[1]

# Liczba mozliwych klas
n_labels = trainY.shape[1]

# Placeholdery. Będą przyjmować wejście o stałej liczbie ficzerów, 
# lecz o nieznanej liczbie przykładów

X =tf.placeholder(tf.float32, [None, n_features])
yTruth = tf.placeholder(tf.float32, [None, n_labels])

In [8]:
# Zmienne - wagi modelu i bias
W = tf.Variable(tf.zeros([4, 3])) # Liczba ficzerów i liczba klas
b = tf.Variable(tf.zeros([3])) # Liczba klas

# Inicjalizujemy rozkładem normalnym początkowe wartości
weights = tf.Variable(tf.random_normal([n_features, n_labels],
                                      mean=0,
                                      stddev=0.01,
                                      name='weights'))

bias = tf.Variable(tf.random_normal([1, n_labels],
                                   mean=0,
                                   stddev=0.01,
                                   name='bias'))

In [9]:
# Budowanie modelu regresji logistycznej
# Są to kolejno 3 operacje:
# 1. "Regersja liniowa" tj. iloczyn skalarny
apply_weights_OP = tf.matmul(X, weights, name='apply_weights')
# 2. Dodanie biasu
add_bias_OP = tf.add(apply_weights_OP, bias, name="add_bias")
# 3. Zaaplikowanie funkcji sigmoid
activation_OP = tf.nn.sigmoid(add_bias_OP, name="activation")


## Funkcja kosztu

In [11]:
# Liczba epok
n_epochs = 700
learning_rate = tf.train.exponential_decay(learning_rate=0.0008,
                                          global_step=1,
                                          decay_steps=trainX.shape[0],
                                          decay_rate=0.95,
                                          staircase=True) # Sprawdzić, co to


# Funkcja kosztu
cost_OP = tf.nn.l2_loss(activation_OP-yTruth, name="mse")

# Optimajzer
training_OP = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_OP)

In [12]:
# Tworzę nową sesję
session = tf.Session()

init_OP = tf.global_variables_initializer()

# inicjalizacja zmiennych
session.run(init_OP)

In [20]:
# Definiujemy dodatkowe operacje aby móc śledzić przebieg treningu
# argmax(activation_OP, 1) returns the label with the most probability
# argmax(yGold, 1) is the correct label
correct_predictions_OP = tf.equal(tf.argmax(activation_OP,1),tf.argmax(yTruth,1))

# If every false prediction is 0 and every true prediction is 1, the average returns us the accuracy
accuracy_OP = tf.reduce_mean(tf.cast(correct_predictions_OP, "float"))

# Summary op for regression output
activation_summary_OP = tf.summary.histogram("output", activation_OP)

# Summary op for accuracy
accuracy_summary_OP = tf.summary.scalar("accuracy", accuracy_OP)

# Summary op for cost
cost_summary_OP = tf.summary.scalar("cost", cost_OP)

# Summary ops to check how variables (W, b) are updating after each iteration
weightSummary = tf.summary.histogram("weights", weights.eval(session=session))
biasSummary = tf.summary.histogram("biases", bias.eval(session=session))

# Merge all summaries
merged = tf.summary.merge([activation_summary_OP, accuracy_summary_OP, cost_summary_OP, weightSummary, biasSummary])

# Summary writer
writer = tf.summary.FileWriter("summary_logs", session.graph)

In [21]:
# Uczenie klasyfikatora

cost = 0
diff =1
epoch_values = []
accuracy_values = []
cost_values = []

# Trening
for i in range(n_epochs):
    if i > 1 and diff < .0001:
        print "Zmiana", diff
        break
    else:
        step = session.run(training_OP, feed_dict={X: trainX,
                                                  yTruth: trainY})
        # Co 10. epokę
        if i % 10 == 0:
               # Add epoch to epoch_values
            epoch_values.append(i)
            # Generate accuracy stats on test data
            train_accuracy, newCost = session.run([accuracy_OP, cost_OP], feed_dict={X: trainX, yTruth: trainY})
            # Add accuracy to live graphing variable
            accuracy_values.append(train_accuracy)
            # Add cost to live graphing variable
            cost_values.append(newCost)
            # Re-assign values for variables
            diff = abs(newCost - cost)
            cost = newCost

            #generate print statements
            print("step %d, training accuracy %g, cost %g, change in cost %g"%(i, train_accuracy, newCost, diff))


# How well do we perform on held-out test data?
print("final accuracy on test set: %s" %str(session.run(accuracy_OP, 
                                                     feed_dict={X: testX, 
                                                                yTruth: testY})))

step 0, training accuracy 0.909091, cost 14.8549, change in cost 14.8549
step 10, training accuracy 0.909091, cost 14.8229, change in cost 0.0319614
step 20, training accuracy 0.909091, cost 14.7915, change in cost 0.0314722
step 30, training accuracy 0.909091, cost 14.7605, change in cost 0.0309973
step 40, training accuracy 0.909091, cost 14.7299, change in cost 0.0305347
step 50, training accuracy 0.909091, cost 14.6999, change in cost 0.0300837
step 60, training accuracy 0.909091, cost 14.6702, change in cost 0.0296469
step 70, training accuracy 0.909091, cost 14.641, change in cost 0.0292215
step 80, training accuracy 0.909091, cost 14.6122, change in cost 0.0288038
step 90, training accuracy 0.909091, cost 14.5838, change in cost 0.0283995
step 100, training accuracy 0.909091, cost 14.5558, change in cost 0.0280056
step 110, training accuracy 0.909091, cost 14.5282, change in cost 0.0276213
step 120, training accuracy 0.909091, cost 14.5009, change in cost 0.0272446
step 130, tra