# The Iris Dataset
Author: Pierre Nugues using data from the scikit learn documentation

## Importing the modules and loading the dataset

In [None]:
from sklearn import datasets
iris = datasets.load_iris()
iris

### The $\mathbf{X}$ matrix

The $\mathbf{X}$ matrix stores the predictors or features

In [None]:
iris['data'][:10]

### The $\mathbf{y}$ vector

The $\mathbf{y}$ vector stores the responses or classes or targets

In [None]:
iris['target']

### Selecting the classifier

In [None]:
from sklearn import linear_model
clf = linear_model.LogisticRegression(fit_intercept=False)
clf

## Training and evaluating on the training set

### Fitting the model

In [None]:
clf.fit(iris.data[:-1], iris.target[:-1]) 

In [None]:
clf.coef_

### Predicting classes

In [None]:
y_hat = clf.predict_proba([iris.data[-1]])
y_hat

### Evaluating the score on the training set

In [None]:
clf.score(iris.data, iris.target)

## Using a training set and a test set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, 
                                                    test_size=0.2, random_state=0)

### Fitting the training set

In [None]:
clf.fit(X_train, y_train)

### Evaluation on the test set

In [None]:
clf.score(X_test, y_test)

## Using crossvalidation

In [None]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(clf, iris.data, iris.target, cv=10)
scores                                              

### Mean of the accuracies

In [None]:
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))