### Loading data in SciKit-Learn

In [1]:
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
target_names = iris.target_names
print("Feature names:", feature_names)
print("Target names:", target_names)

print("\nFirst 10 rows of X:\n", X[:10])
print("\n",y[:10])

Feature names: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Target names: ['setosa' 'versicolor' 'virginica']

First 10 rows of X:
 [[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 1.4 0.2]
 [4.9 3.1 1.5 0.1]]

 [0 0 0 0 0 0 0 0 0 0]


### Splitting the data

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=1, shuffle=True
)

### Training a classifier

In [4]:
from sklearn.datasets import load_iris

iris = load_iris()
X = iris.data
y = iris.target

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1)

In [6]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

classifier_knn = KNeighborsClassifier(n_neighbors=3)
classifier_knn.fit(X_train, y_train)

y_pred = classifier_knn.predict(X_test)

In [7]:
print('Accuracy: ', metrics.accuracy_score(y_test, y_pred))

Accuracy:  0.9833333333333333


In [9]:
sample = [[5,5,3,2],[2,4,3,5]]
preds = classifier_knn.predict(sample)

pred_species = [iris.target_names[p] for p in preds]

print('Predicions: ', pred_species)


Predicions:  ['versicolor', 'virginica']


### Saving model

In [10]:
import joblib

joblib.dump(classifier_knn, 'iris_clssifier_knn.joblib')

['iris_clssifier_knn.joblib']

In [11]:
joblib.load('iris_clssifier_knn.joblib')

### Preprocessing data using scikit-learn

In [12]:
import numpy as np
from sklearn import preprocessing

input_data = np.array([
    [2.1, -1.9, 5.5],
    [-1.5, 2.4, 3.5],
    [0.5, -7.9, 5.6],
    [5.9, 2.3, -5.8]
])

data_scalar_minmax = preprocessing.MinMaxScaler(feature_range=(0,1))
data_scaled_minmax = data_scalar_minmax.fit_transform(input_data)
print("\nMin max scaled data : ", data_scaled_minmax)


Min max scaled data :  [[0.48648649 0.58252427 0.99122807]
 [0.         1.         0.81578947]
 [0.27027027 0.         1.        ]
 [1.         0.99029126 0.        ]]


### SVM

In [15]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.svm import SVC

X, y = load_iris(return_X_y=True)
clf = SVC()
clf.set_params(kernel = 'linear').fit(X, y)
clf.predict(X[:5])
clf.set_params(kernel='rbf', gamma='scale').fit(X, y)
clf.predict(X[:5])

array([0, 0, 0, 0, 0])

### Decision Tree

In [21]:
from sklearn import tree
from sklearn.model_selection import train_test_split

X=[[165,19],[175,32],[136,35],[174,65],[141,28],[176,15],[131,32],[166,6],[128,32],[179,10],[136,34],[186,2],[126,25],[176,28],[112,38],[169,9],[171,36],[116,25],[196,25], [196,38], [126,40], [197,20], [150,25], [140,32],[136,35]]

Y=['Man','Woman','Woman','Man','Woman','Man','Woman','Man','Woman','Man','Woman','Man','Woman','Woman','Woman','Man','Woman','Woman','Man', 'Woman', 'Woman', 'Man', 'Man', 'Woman', 'Woman']

data_feature_names = ['height','length of hair']
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3, random_state = 1)
DTclf = tree.DecisionTreeClassifier()
DTclf = clf.fit(X,Y)
prediction = DTclf.predict([[135,29]])
print(prediction)

['Woman']


In [29]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import numpy as np
from sklearn.cluster import KMeans
from sklearn.datasets import load_digits

digits = load_digits()
digits.data.shape
kmeans = KMeans(n_clusters = 10, random_state = 0)
clusters = kmeans.fit_predict(digits.data)
kmeans.cluster_centers_.shape




(10, 64)