In [2]:
from pathlib import Path
import numpy as np
from scipy.cluster.vq import whiten, kmeans, vq

In [3]:
data = Path("SMSSpamCollection").read_text()
data = data.strip()
data = data.split("\n")

In [4]:
digit_counts = np.empty((len(data), 2), dtype=int)

In [5]:
for i, line in enumerate(data):
    case, message = line.split("\t")
    num_digits = sum(c.isdigit() for c in message)
    digit_counts[i, 0] = 0 if case == "ham" else 1
    digit_counts[i, 1] = num_digits

In [8]:
unique_counts = np.unique(digit_counts[:, 1], return_counts=True)

In [12]:
unique_counts = np.transpose(np.vstack(unique_counts))

In [14]:
whitened_counts = whiten(unique_counts)
codebook, _ = kmeans(whitened_counts, 3)

In [17]:
codes, _ = vq(whitened_counts, codebook)


In [19]:
ham_code = codes[0]
spam_code = codes[-1]
unknown_code = list(set(range(3)) ^ set((ham_code, spam_code)))[0]

In [20]:
print("definitely ham:", unique_counts[codes == ham_code][-1])
print("definitely spam:", unique_counts[codes == spam_code][-1])
print("unknown:", unique_counts[codes == unknown_code][-1])

definitely ham: [   0 4110]
definitely spam: [47  1]
unknown: [20 18]


In [21]:
digits = digit_counts[:, 1]
predicted_hams = digits == 0
predicted_spams = digits > 20
predicted_unknowns = np.logical_and(digits > 0, digits <= 20)

In [22]:
spam_cluster = digit_counts[predicted_spams]
ham_cluster = digit_counts[predicted_hams]
unk_cluster = digit_counts[predicted_unknowns]

In [23]:
print("hams:", np.unique(ham_cluster[:, 0], return_counts=True))
print("spams:", np.unique(spam_cluster[:, 0], return_counts=True))
print("unknowns:", np.unique(unk_cluster[:, 0], return_counts=True))

hams: (array([0, 1]), array([4071,   39]))
spams: (array([0, 1]), array([  1, 232]))
unknowns: (array([0, 1]), array([755, 476]))


In [24]:
from scipy.optimize import minimize_scalar

def objective_function(x):
    return 3 * x ** 4 - 2 * x + 1

In [25]:
res = minimize_scalar(objective_function)

In [26]:
res

     fun: 0.17451818777634331
 message: '\nOptimization terminated successfully;\nThe returned value satisfies the termination criteria\n(using xtol = 1.48e-08 )'
    nfev: 16
     nit: 12
 success: True
       x: 0.5503212087491959

In [27]:
def objective_function(x):
    return x ** 4 - x ** 2

In [29]:
res = minimize_scalar(objective_function)
res

     fun: -0.24999999999999994
 message: '\nOptimization terminated successfully;\nThe returned value satisfies the termination criteria\n(using xtol = 1.48e-08 )'
    nfev: 15
     nit: 11
 success: True
       x: 0.7071067853059209

In [31]:
res = minimize_scalar(objective_function, bracket=(-1, 0))
res

     fun: -0.24999999999999997
 message: '\nOptimization terminated successfully;\nThe returned value satisfies the termination criteria\n(using xtol = 1.48e-08 )'
    nfev: 17
     nit: 13
 success: True
       x: 0.7071067809244586

In [32]:
import numpy as np
from scipy.optimize import minimize, LinearConstraint

n_buyers = 10
n_shares = 15

In [33]:
np.random.seed(10)
prices = np.random.random(n_buyers)
money_available = np.random.randint(1, 4, n_buyers)

In [34]:
n_shares_per_buyer = money_available / prices
print(prices, money_available, n_shares_per_buyer, sep="\n")

[0.77132064 0.02075195 0.63364823 0.74880388 0.49850701 0.22479665
 0.19806286 0.76053071 0.16911084 0.08833981]
[1 1 1 3 1 3 3 2 1 1]
[ 1.29647768 48.18824404  1.57816269  4.00638948  2.00598984 13.34539487
 15.14670609  2.62974258  5.91328161 11.3199242 ]


In [35]:
constraint = LinearConstraint(np.ones(n_buyers), lb=n_shares, ub=n_shares)


In [36]:
bounds = [(0, n) for n in n_shares_per_buyer]


In [37]:
def objective_function(x, prices):
    return -x.dot(prices)
