In [None]:
%pylab inline
rcParams['figure.figsize'] = (10, 4) #wide graphs by default
from __future__ import print_function
from __future__ import division

import os     
os.environ["PATH"] += os.pathsep + '/usr/local/bin'


# A lot of code below borrowed from the scikits learn docs

http://peekaboo-vision.blogspot.com/2013/01/machine-learning-cheat-sheet-for-scikit.html

![http://1.bp.blogspot.com/-ME24ePzpzIM/UQLWTwurfXI/AAAAAAAAANw/W3EETIroA80/s1600/drop_shadows_background.png](http://1.bp.blogspot.com/-ME24ePzpzIM/UQLWTwurfXI/AAAAAAAAANw/W3EETIroA80/s1600/drop_shadows_background.png)

In [None]:
from sklearn.datasets import load_iris
data = load_iris()

# Desicion Trees

http://scikit-learn.org/stable/modules/tree.html

In [None]:
from sklearn import tree

In [None]:
clf = tree.DecisionTreeClassifier()
clf.fit(data.data, data.target)

In [None]:
clf.predict([data.data[0]])

In [None]:
clf.predict([data.data[10]])

In [None]:
clf.predict(data.data)

In [None]:
from IPython.display import Image #needed to render in notebook
import io, pydot  #needed to convert dot format to png

dot_data = io.StringIO()
tree.export_graphviz(clf, out_file=dot_data) 
graph = pydot.graph_from_dot_data(dot_data.getvalue())
graph[0]
Image(graph[0].create_png())

# Cross-validation

http://scikit-learn.org/stable/modules/classes.html#module-sklearn.cross_validation

In [None]:
from sklearn.naive_bayes import GaussianNB

clf2 = GaussianNB()

clf2.fit(data.data, data.target)


In [None]:
from sklearn import cross_validation

In [None]:
kf = cross_validation.KFold(len(data.data), n_folds=10)
len(kf)

In [None]:
pc_folds = []
for train_index, test_index in kf:
    clf2 = GaussianNB()
    clf2.fit(data.data[train_index], data.target[train_index])
    predictions = clf2.predict(data.data[test_index])
    pc_right = sum(predictions == data.target[test_index])/float(len(test_index))
    pc_folds.append(pc_right)
    

In [None]:
pc_folds

In [None]:
mean(pc_folds)

In [None]:
min(pc_folds)

## Metrics

In [None]:
from sklearn import metrics

In [None]:
predictions = clf2.predict(data.data)
predictions

In [None]:
metrics.accuracy_score(data.target, predictions)

In [None]:
print(metrics.classification_report(data.target, predictions))

<http://www.dataschool.io/simple-guide-to-confusion-matrix-terminology/>

In [None]:
metrics.confusion_matrix(data.target, predictions)

In [None]:
confmat = metrics.confusion_matrix(data.target, predictions)
imshow(confmat,interpolation='nearest', cmap=cm.gray_r)

In [None]:
confmat = metrics.confusion_matrix(data.target, predictions)

imshow(confmat.max() - confmat,interpolation='nearest', cmap=cm.gray)

for rownum, row in enumerate(confmat):
    for colnum, val in enumerate(row):
        text(colnum, rownum, str(val/sum(data.target == rownum).astype(float)), fontsize=24, color='black' if val < confmat.max()/2 else 'white', ha='center', va='center' )
        
xticks(arange(len(data.target_names)), data.target_names)
yticks(arange(len(data.target_names)), data.target_names);

http://media.aau.dk/null_space_pursuits/2012/01/frustrations-with-music-genre.html

# Dimensionality reduction

http://nbviewer.ipython.org/github/rasbt/pattern_classification/blob/master/dimensionality_reduction/projection/principal_component_analysis.ipynb

http://gael-varoquaux.info/science/ica_vs_pca.html

From: http://nbviewer.ipython.org/github/temporaer/tutorial_ml_gkbionics/blob/master/1%20-%20PCA.ipynb

In [None]:
Cov = array([[2.9, -2.2], [-2.2, 6.5]])
X = random.multivariate_normal([1,2], Cov, size=200)
figure(figsize=(4,4))
scatter(X[:,0], X[:,1])
axis('equal') # equal scaling on both axis;

http://en.wikipedia.org/wiki/Multivariate_normal_distribution

In [None]:
print(cov(X,rowvar=False))

In [None]:
from sklearn.decomposition import PCA
pca = PCA()

In [None]:
X_pca = pca.fit_transform(X)

In [None]:
pca.components_, pca.mean_

In [None]:

figure(figsize=(4,4))
scatter(X_pca[:,0], X_pca[:,1])
# axis('equal');
axis('equal') # equal scaling on both axis;

In [None]:
var(X_pca[:,0]), var(X_pca[:,1])

## Hidden Markov Models

In [None]:
notes = ["A", "B", "C", "D"]

markov_model = [ [ 0.5,  0.3, 0.2, 0],
                [0.2, 0.1, 0.6, 0.1],
                [0.1, 0, 0.2, 0.7],
                [0, 0, 0.9, 0.1] ]
state = random.randint(0, 4)
print(state)

In [None]:
throw = random.random()
print(throw)

In [None]:
probabilities = markov_model[state]

In [None]:
cumsum(probabilities)

In [None]:
cumsum(probabilities) > throw

In [None]:
argmax(cumsum(probabilities) > throw)

In [None]:
state = argmax(cumsum(probabilities) > throw)
print(state)

In [None]:
throw = random.random()
state = argmax(cumsum(probabilities) > throw)
probabilities = markov_model[state]
print(state)

In [None]:
throw = random.random()
state = argmax(cumsum(probabilities) > throw)
probabilities = markov_model[state]
print(state)

In [None]:
def markov(matrix, state, num_iter=10):
    out = [state]
    probabilities = markov_model[state]
    for i in range(num_iter):
        throw = random.random()
        state = argmax(cumsum(probabilities) > throw)
        probabilities = markov_model[state]
        out.append(state)
    return out

In [None]:
markov(markov_model, 0)

In [None]:
markov_model

In [None]:
markov(markov_model, 3)

Hidden Markov Models have a "hidden" internal state which determines the output

In [None]:
possible_outputs = ['M', 'N', 'O']
output_probabilites = [ [ 0.1, 0.9, 0],
                       [0.8, 0.1, 0.1],
                       [0.1, 0, 0.9],
                       [0.2, 0.3, 0.5]]

In [None]:
states = markov(markov_model, 3)
for state in states:
    throw = random.random()
    out_index = argmax(cumsum(output_probabilites[state]) > throw)
    print(possible_outputs[out_index])

In [None]:
def genHMM(markov_model, initial_state, possible_outputs, output_probabilites, num_iter=10):
    out = []
    states = markov(markov_model, initial_state)
    for state in states:
        throw = random.random()
        out_index = argmax(cumsum(output_probabilites[state]) > throw)
        out.append(possible_outputs[out_index])
    return out, states

In [None]:
genHMM(markov_model, 3, possible_outputs, output_probabilites)

By Andrés Cabrera mantaraya36@gmail.com

For course MAT 201A at UCSB

This ipython notebook is licensed under the CC-BY-NC-SA license: http://creativecommons.org/licenses/by-nc-sa/4.0/

![http://i.creativecommons.org/l/by-nc-sa/3.0/88x31.png](http://i.creativecommons.org/l/by-nc-sa/3.0/88x31.png)