<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/milmor/NLP/blob/main/Notebooks/02_Cvxopt.ipynb">
    <img src="https://www.tensorflow.org/images/colab_logo_32px.png" />
    Run in Google Colab</a>
  </td>
</table>

In [1]:
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from cvxopt import matrix, solvers

# datos
data = ["Lobos Gatos Elefantes",
     "Lobos Lobos Caballos Teatro",
     "Lobos Elefantes Lobos",
     "Teatro Pintura Elefantes",
     "Teatro Música Caballos Teatro Teatro"]

y = np.array([1, 1, 1, -1, -1])


# bow
vectorizer = CountVectorizer()
X_vect = vectorizer.fit_transform(data)

X = X_vect.toarray()
X

array([[0, 1, 1, 1, 0, 0, 0],
       [1, 0, 0, 2, 0, 0, 1],
       [0, 1, 0, 2, 0, 0, 0],
       [0, 1, 0, 0, 0, 1, 1],
       [1, 0, 0, 0, 1, 0, 3]])

- When you use tc='d', you're explicitly telling cvxopt to create a matrix with double precision floating-point numbers. 

In [2]:
# Number of samples
m, n = X.shape

# Compute the Gram matrix S
S = np.dot(X, X.T)

# Set up the parameters for the quadratic programming problem
P = matrix(np.outer(y, y) * S, tc='d')
e = matrix(-np.ones((m, 1)), tc='d')

# Constraint matrices G and h (for λ_i >= 0)
G = matrix(np.vstack((-np.eye(m), np.eye(m))), tc='d') # G = -I
h = matrix(np.hstack((np.zeros(m), np.ones(m) * 1e5)), tc='d') # h = 0

# Equality constraint matrix A and vector b (for Σ λ_i y_i = 0)
A = matrix(y, (1, m), tc='d') # A = y^T
b = matrix(0.0)               # b = 0

# Solve the quadratic programming problem
sol = solvers.qp(P, e, G, h, A, b)

     pcost       dcost       gap    pres   dres
 0:  2.0340e+09 -7.2026e+09  9e+09  2e-12  2e-11
 1:  3.9861e+08 -5.0332e+08  9e+08  1e-12  1e-11
 2:  5.7836e+07 -6.4865e+07  1e+08  2e-13  7e-12
 3:  8.3131e+06 -9.2455e+06  2e+07  7e-13  4e-12
 4:  1.1921e+06 -1.3165e+06  3e+06  1e-13  6e-13
 5:  1.7071e+05 -1.8848e+05  4e+05  7e-14  5e-13
 6:  2.4399e+04 -2.7056e+04  5e+04  1e-16  2e-13
 7:  3.4709e+03 -3.9016e+03  7e+03  2e-14  5e-14
 8:  4.8756e+02 -5.6880e+02  1e+03  4e-15  1e-14
 9:  6.5993e+01 -8.5305e+01  2e+02  4e-16  2e-14
10:  7.8297e+00 -1.3772e+01  2e+01  9e-16  3e-15
11:  3.3182e-01 -2.6841e+00  3e+00  2e-16  1e-15
12: -4.5758e-01 -8.1650e-01  4e-01  2e-16  6e-16
13: -5.1491e-01 -5.4429e-01  3e-02  9e-17  3e-16
14: -5.2490e-01 -5.2727e-01  2e-03  2e-16  1e-16
15: -5.2624e-01 -5.2633e-01  9e-05  2e-16  1e-16
16: -5.2631e-01 -5.2632e-01  1e-06  2e-16  7e-17
17: -5.2632e-01 -5.2632e-01  1e-08  2e-16  1e-16
Optimal solution found.


In [3]:
# Extract the Lagrange multipliers
alphas = np.array(sol['x'])

# Compute the weights
w = np.sum(alphas * y[:, None] * X, axis=0)

# Compute the bias
sv = (alphas > 1e-5).flatten()
b = np.mean(y[sv] - np.dot(X[sv], w))

w, b

(array([ 1.05263142e-01, -1.05263142e-01,  4.21052564e-01,  6.31579008e-01,
        -7.70945732e-08, -5.26315708e-01, -4.21052721e-01]),
 0.05263156852062737)