In [None]:
# -*- coding: utf-8 -*-
"""
Created on Sat Oct  8 17:46:12 2022

@author: Kuba

This example uses code from the University of Warsaw educational materials:
https://colab.research.google.com/drive/1QTOK6B8jPrP3J7NW2I93q3WZJ1CAWN_H

"""

from sklearn.datasets import make_circles
import matplotlib.pyplot as plt
from sklearn.svm import SVC 
import numpy as np

# generating a 'circularly distributed' dataset and target class labels
X, y = make_circles(500, factor=.1, noise=.1)

# scatter plot of the training data
ax = plt.gca()
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Set1, edgecolor="k")
ax.set_xlabel('Some feature 1')
ax.set_ylabel('Some feature 2')

# creating an instance of the support vector machine classifier
# with a radial basis function type of kernel (nonlinear)
svm = SVC(kernel='rbf', C=1E10)

# training
svm.fit(X, y)

# grid of points for drawing the nonlinear decision boundary
xlim = ax.get_xlim()
ylim = ax.get_ylim()
xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T

# drawing the decision bounary (black line) and margins (blue line) using the .decision_function() method
Z = svm.decision_function(xy).reshape(XX.shape)
ax.contour(XX, YY, Z, colors=['b', 'k', 'b'], levels=[-1, 0, 1], alpha=1,
           linestyles=['--', '-', '--'])

# mark support vectors (black crosses)
support_vector1 = svm.support_vectors_[:, 0]
support_vector2 = svm.support_vectors_[:, 1]
ax.scatter(support_vector1, support_vector2, s=50,
           linewidth=1, color='k', marker = 'x')

plt.show()

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Wed Oct  5 12:06:04 2022

@author: Kuba
"""

# Task: modify the example below
# to predict sepal width based on two other features – 
# sepal length (0) and petal length (2). Fill in the program in places marked
# with !!!. In your report, note the values of the learned line parameters and 
# show the plot

# THE CODE LACKS:
    # an import of the proper submodule from sklearn that containes linear models
    # a definition of the X array containing examples of class 0 (setosa) and the first
    # three features only
    # a definition of the linear regression object, to be named multiregr
    # a line of code to train the model

# importing necessary modules
from sklearn import datasets
import matplotlib.pyplot as plt
import numpy as np
# !!!

iris = datasets.load_iris()

X = iris.data
labels = iris.target

# take only examples from class 0 (iris setosa) and first 3 features
# !!!

# random ordering of samples
random0 = np.random.choice(np.arange(0,50),50,replace=False)

# test and training set example indices
train_inds = random0[:40]
test_inds = random0[40:]

# create the regression model and train it
# !!!
# !!!

# check the learned parameters
print(multiregr.coef_, multiregr.intercept_) 

# feature value ranges (for the plot)
x_min, x_max = X[train_inds, 0].min() - 0.5, X[train_inds, 0].max() + 0.5
x_min2, x_max2 = X[train_inds, 2].min() - 0.5, X[train_inds, 2].max() + 0.5

# create a grid of points (x,y) according to the data value ranges
x, y = np.meshgrid(np.arange(x_min,x_max,0.5), np.arange(x_min2,x_max2,0.5))
surface_points = np.stack([np.ravel(x), np.ravel(y)],1)

# predict the dependent variable value
z = multiregr.predict(surface_points).reshape(x.shape)

# 3D graph
fig = plt.figure(figsize=plt.figaspect(1)*2)
ax = plt.axes(projection='3d')
ax.scatter(X[train_inds, 0],X[train_inds, 2],  X[train_inds, 1],label = "Training examples")
ax.scatter(X[test_inds, 0],X[test_inds, 2],  X[test_inds, 1], label = "Test examples")
ax.plot_surface(x, y, z,  color = 'r', alpha = 0.4)
ax.legend()
ax.set_xlabel("Sepal length")
ax.set_ylabel("Petal length")
ax.set_zlabel("Sepal width (dependent variable)")
plt.show()

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Wed Oct  5 13:07:53 2022

@author: Kuba
"""

from sklearn import datasets
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LogisticRegression

iris = datasets.load_iris()
X = iris.data
y = iris.target


x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5

fig, ax = plt.subplots()
scatter = ax.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Set1, edgecolor="k")
plt.xlabel("sepal length")
plt.ylabel("sepal width")

legend1 = ax.legend(*scatter.legend_elements(),loc="lower right", title="Class")
ax.add_artist(legend1)

plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)


# indices 0-49, 50-99, 100-149 in random order
random0 = np.random.choice(np.arange(0,50),50,replace=False)
random1 = np.random.choice(np.arange(50,100),50,replace=False)
random2 = np.random.choice(np.arange(100,150),50,replace=False)

# take 80% (40 samples) of each class to the training set

X0 = X[random0[:40],:]
X1 = X[random1[:40],:]
X2 = X[random2[:40],:]

# take the corresponding labels
y0 = y[random0[:40]]
y1 = y[random1[:40]]
y2 = y[random2[:40]]

# take 20% (10 samples) of each class to the test set
X0_test = X[random0[40:],:]
X1_test = X[random1[40:],:]
X2_test = X[random2[40:],:]

# take the corresponding labels
y0_test = y[random0[40:]]
y1_test = y[random1[40:]]
y2_test = y[random2[40:]]

# compose the training set and the test set - just features 0 and 1 and classes 0 and 1
X01_train = np.concatenate([X0[:,0:2], X1[:,0:2]])
y01_train = np.concatenate([y0, y1])
X01_test = np.concatenate([X0_test[:,0:2], X1_test[:,0:2]])
y01_test = np.concatenate([y0_test, y1_test])

# linear binary classification with a logistic regression model
clf = LogisticRegression(random_state=0).fit(X01_train, y01_train)

# read the logistic regression model parameters
b = clf.intercept_[0]
w1, w2 = clf.coef_.T

# calculate the intercept and gradient of the decision boundary.
c = -b/w2
m = -w1/w2

# plot the data and the classification with the decision boundary.
xmin, xmax = np.min(X01_train,0)[0]-1, np.max(X01_train,0)[0]+1
ymin, ymax = np.min(X01_train,0)[1]-1, np.max(X01_train,0)[1]+1

xd = np.array([xmin, xmax])
yd = m*xd + c

plt.figure()
plt.plot(xd, yd, 'k', lw=1, ls='--')
plt.fill_between(xd, yd, ymin, color='tab:blue', alpha=0.2)
plt.fill_between(xd, yd, ymax, color='tab:orange', alpha=0.2)

plt.scatter(*X01_train.T, c=y01_train, cmap=plt.cm.Set1, edgecolor="k")
plt.scatter(*X01_test.T, c=y01_test, cmap=plt.cm.Set1, edgecolor="b")

plt.xlim(xmin, xmax)
plt.ylim(ymin, ymax)
plt.ylabel(r'sepal width')
plt.xlabel(r'sepal length')