# How to spot a Bear - Intro to Machine Learning

* How would you spot a bear if you saw one in the woods?
* Machine Learning is the field of programming that tries to make computers mimic human's range of learning, reasoning and decision making
* What are the features you would look for?

In [None]:
import pandas as pd
import numpy as np
import warnings

from IPython.core.display import Image, HTML
from helpers import get_image, get_images, get_pca
from plot import plot2D, plot3D, animate2D, plot_decision_regions
import matplotlib.pyplot as plt

warnings.simplefilter(action='ignore', category=RuntimeWarning)

In [None]:
Image(filename='images/bear.jpg', width=500) 

# What features would you select?

In [None]:
df = pd.DataFrame(
    columns=['Image', 'Number of legs', 'Bear?'],
    data=[[get_image('bear'), 4, 'Yes']],
    index=['Grizzly bear'])
HTML(df.to_html(escape=False))

In [None]:
df = df.append(pd.Series([get_image('table'), 4, 'No'], index=df.columns, name='Table'))
HTML(df.to_html(escape=False))

In [None]:
df.insert(2, 'Number of eyes', [2, 0])
HTML(df.to_html(escape=False))

In [None]:
df.insert(3, 'Weight (kg)', [300, 5])
HTML(df.to_html(escape=False))

In [None]:
df = df.append(pd.Series([get_image('zebra'), 4, 2, 300, 'No'], index=df.columns, name='Zebra'))
HTML(df.to_html(escape=False))

In [None]:
df.insert(4, 'Color', ['Brown', 'Brown', 'B&W'])
HTML(df.to_html(escape=False))

In [None]:
df = df.append(pd.Series([get_image('walrus'), 4, 2, 350, 'Brown', 'No'], index=df.columns, name='Walrus'))
HTML(df.to_html(escape=False))

In [None]:
df = df.append(pd.Series([get_image('panda'), 4, 2, 100, 'B&W', 'Yes'], index=df.columns, name='Panda'))
df = df.append(pd.Series([get_image('polar_bear'), 4, 2, 300, 'White', 'Yes'], index=df.columns, name='Polar bear'))
HTML(df.to_html(escape=False))

In [None]:
df.insert(5, 'Texture', ['Long fur', 'Wood', 'Short fur', 'Leather', 'Long fur', 'Long fur'])
df.insert(6, 'Sound', ['Roar', 'None', 'Grunt', 'Moan', 'Roar', 'Roar'])
HTML(df.to_html(escape=False))

In [None]:
df = df.append(pd.Series([get_image('tiger'), 4, 2, 300, 'Light brown', 'Short fur', 'Roar', 'No'], index=df.columns, name='Tiger'))
df = df.append(pd.Series([get_image('lion'), 4, 2, 200, 'Striped', 'Short fur', 'Roar', 'No'], index=df.columns, name='Lion'))
HTML(df.to_html(escape=False))

In [None]:
df.insert(7, 'Lifespan', [25, 0, 30, 30, 20, 20, 22, 12])
df.insert(8, 'Claw length (cm)', [15, 0, 0, 0, 10, 20, 5, 5])
HTML(df.to_html(escape=False))

## Feature engineering

* Choose independent features

In [None]:
df = df.drop(['Number of legs', 'Number of eyes'], axis=1)
HTML(df.to_html(escape=False))

* Normalize values
* Convert non numerical features to numeric value

In [None]:
df = df.rename(columns={'Texture': 'Fur length (cm)', 'Sound': 'Loudness (db)', 'Color': 'Pixel intensity'})
df['Pixel intensity'] = [190, 220, 140, 150, 200, 40, 150, 120]
df['Fur length (cm)'] = [17, 0, 5, 0, 13, 16, 6, 4]
df['Loudness (db)'] = [120, 0, 30, 50, 70, 130, 90, 100]
df['Bear?'] = [1, 0, 0, 0, 1, 1, 0, 0]
HTML(df.to_html(escape=False))

## Add more data!

* Allows model to generalize better
* Removes bias that are inherent in small datasets

### Both positive

In [None]:
df = df.append(pd.Series([get_image('babybear'), 100, 165, 13, 100, 23, 9, 1], index=df.columns, name='Baby bear'))
df = df.append(pd.Series([get_image('blackbear'), 220, 235, 15, 90, 17, 10, 1], index=df.columns, name='Black bear'))
df = df.append(pd.Series([get_image('mamabear'), 320, 190, 18, 120, 27, 15, 1], index=df.columns, name='Mother bear'))
df = df.append(pd.Series([get_image('papabear'), 350, 215, 19, 120, 22, 16, 1], index=df.columns, name='Father bear'))
HTML(df.to_html(escape=False))

### And negative

In [None]:
df = df.append(pd.Series([get_image('monkey'), 40, 165, 4, 60, 18, 3, 0], index=df.columns, name='Monkey'))
df = df.append(pd.Series([get_image('goose'), 10, 125, 0, 65, 16, 0, 0], index=df.columns, name='Goose'))
df = df.append(pd.Series([get_image('duck'), 5, 140, 0, 45, 7, 0, 0], index=df.columns, name='Duck'))
df = df.append(pd.Series([get_image('fox'), 30, 160, 5, 45, 4, 3, 0], index=df.columns, name='Fox'))
df = df.append(pd.Series([get_image('moose'), 500, 170, 3, 55, 20, 0, 0], index=df.columns, name='Moose'))
HTML(df.to_html(escape=False))

### Visualizing correlations

In [None]:
columns = list(df.columns[i] for i in [1, 2, 3, 4, 5, 6])
fig, sub = plt.subplots(6, 6, figsize=(15, 15))

yes = df['Bear?'] == 1
dfs = [df[yes], df[~yes]]
for row, x in zip(sub, df[columns]):
    for ax, y in zip(row, df[columns]):
        for _df, color in zip(dfs, ['r', 'b']):
            ax.scatter(_df[x], _df[y])
        
plt.show()

### Prinicipal Components

* Collapses multi-dimensions into projection into n axes that preserve the most variation in the data

### PCA in 2 dimensions

In [None]:
df_pca2d = get_pca(2, df[columns])
df_pca2d['Bear?'] = df['Bear?']
yes = df_pca2d['Bear?'] == 1
plot2D(*(df_pca2d[yes], df_pca2d[~yes]), columns=[df_pca2d.columns[i] for i in [0, 1]], figsize=(10, 10), plot_titles=True)

### PCA in 3 dimensions

In [None]:
df_pca3d = get_pca(3, df[columns])
df_pca3d['Bear?'] = df['Bear?']
yes = df_pca3d['Bear?'] == 1
plot3D(*(df_pca3d[yes], df_pca3d[~yes]), columns=[df_pca3d.columns[i] for i in [0, 1, 2]], figsize=(10, 10), plot_titles=True)

## SVM Demo

In [None]:
df_yes = df_pca2d[yes].reset_index(drop=True)
df_no = df_pca2d[~yes].reset_index(drop=True)
df_zipped = pd.concat([df_yes, df_no]).sort_index()
anim = animate2D(df_zipped, factor=100.0)
HTML(anim.to_html5_video())

Other gaussian models are: but all of these pale in comparison to the power of neural nets

## Neural Network Demo

In [None]:
import neural_network as nn

SEED = 20170914
ITERATION_COUNT = 1500
DELTA = 0.001

training_input = (df.iloc[:, 1:7].transform(lambda x: x / 100.0)).values
print("Training input: ")
print(training_input)
training_output = df.iloc[:, 7].values
print("Training output: ")
print(training_output)

my_nn = nn.NeuralNetwork(6, 1, SEED)
print("Random starting synaptic weights: ")
print(my_nn.synaptic_weights)

print ("Training by iterations: ")
errors = my_nn.train(np.array(training_input), np.array([training_output]).T, ITERATION_COUNT)
print ("New synaptic weights after training: ")
print (my_nn.synaptic_weights)

# Test the neural network with a new situation.
print ("Considering new situation [2.26, 1.78, .09, .78, .19, .09] -> ?: ")
print (my_nn.predict(np.array([2.26, 1.78, .09, .78, .19, .09])))
print (my_nn.predict(np.array([.26, .38, .10, .56, .20, .07])))
print (my_nn.predict(np.array([3, 1.9, .17, 1.2, .25, .15])))
print (my_nn.predict(np.array([3, 1.4, 0.05, 0.3, 0.3, 0])))

print ("Untrain neural network: ")
my_nn.untrain()
print(my_nn.synaptic_weights)

print ("Training until fit: ")
my_nn.train_until_fit(np.array(training_input), np.array([training_output]).T, error_delta=DELTA)
print ("New synaptic weights after training: ")
print (my_nn.synaptic_weights)

# Test the neural network again
print ("Considering new situation [2.26, 1.78, .09, .78, .19, .09] -> ?: ")
print (my_nn.predict(np.array([2.26, 1.78, .09, .78, .19, .09])))
print (my_nn.predict(np.array([.26, .38, .10, .56, .20, .07])))
print (my_nn.predict(np.array([3, 1.9, .17, 1.2, .25, .15])))
print (my_nn.predict(np.array([3, 1.4, 0.05, 0.3, 0.3, 0])))

In [None]:
plt.close()

ax = plt.subplot()
ax.plot(range(ITERATION_COUNT), errors)
plt.xlabel('times')
plt.ylabel('error')
plt.title('Error Curve')

plt.show()