In [1]:
import numpy as np
np.set_printoptions(suppress=True)
import warnings
warnings.filterwarnings("ignore")

In [9]:
# 1.4 
# Periodic energy usage.
# The 168-vector w gives the hourly electricity consumption of a manufacturing plant, 
# starting on Sunday midnight to 1AM, over one week, in MWh (megawatt-hours). 
# The consumption pattern is the same each day, i.e., it is 24-periodic, 
# which means that wt+24 = wt for t = 1,...,144. 
# Let d be the 24-vector that gives the energy consumption over one day, 
# starting at midnight.
# 
# (a) Use vector notation to express w in terms of d.
d = np.array(range(24))
w = np.stack(d for _ in range(7)).flatten()

# (b) Use vector notation to express d in terms of w.
w = np.array(range(168))
d = w[:24]

In [17]:
#1.6
# Vector of differences. 
# Suppose x is an n-vector. 
# The associated vector of differences is the (n−1)-vector d 
# given by d=(x2 −x1,x3 −x2,...,xn −xn−1). 
# Express d in terms of x using vector operations 

x = np.array(range(10))
d = x[1:] - x[:-1]

In [18]:
#1.7 
# Suppose that x is a Boolean vector with entries that are 0 or 1, 
# and y is a vector encoding the same information using the values −1 and +1. 
# Express y in terms of x using vector notation. 
# Also, express x in terms of y using vector notation.

x = np.array([0,1,0,1,0,1,0,1,0,1])
y = 2*x - 1

y = np.array([-1,1,-1,1,-1,1,-1,1,-1,1])
x = (y + 1) / 2

In [46]:
# 1.8
# Profit and sales vectors. 
# A company sells n different products or items. 
# The n-vector p gives the profit, in dollars per unit, for each of the n items. 
# The n-vector s gives the total sales of each of the items, over some period
# (such as a month), i.e., si is the total number of units of item i sold.
# Express the total profit in terms of p and s using vector notation.

n_item = 10
p = np.random.randn(n_item).reshape(-1, 1)
periods = 12
s = np.random.randint(low=-1, high=100, size=(n_item, periods))
total_profit = (p.T@s).sum()

In [64]:
# 1.9
# Symptoms vector. 
# A 20-vector s records whether each of 20 different symptoms is present 
# in a medical patient, with si = 1 meaning the patient has the symptom 
# and si = 0 meaning she does not. Express the following using vector notation.

# (a) The total number of symptoms the patient has.
n = 20
s = np.random.randint(low=0, high=2, size=(n, 1))
total_num_symptoms = s.sum()
total_num_symptoms = np.ones((n,1)).T@s

# (b) The patient exhibits five out of the first ten symptoms. 


In [87]:
# 1.10
# Total score from course record.
# The record for each student in a class is given as a 10- vector r,
# where r1, . . . , r8 are the grades for the 8 homework assignments, each on a 0–10 scale,
# r9 is the midterm exam grade on a 0–120 scale, 
# and r10 is final exam score on a 0–160 scale. 
# The student’s total course score s, on a 0–100 scale, is based 25% on the homework, 
# 35% on the midterm exam, and 40% on the final exam. 
# Express s in the form s = wT r. (That is, determine the 10-vector w.) 
# You can give the coefficients of w to 4 digits after the decimal point.

r_homework_max = 10
r_midterm_max = 120
r_final_max = 160
r_homework = np.random.randint(low=0, high=r_homework_max, size=(8, 1))
r_midterm = np.random.randint(low=0, high=r_midterm_max, size=(1, 1))
r_final = np.random.randint(low=0, high=r_final_max, size=(1, 1))
r = np.concatenate((r_homework, r_midterm, r_final), axis=0)

p_homework = 0.25
p_midterm = 0.35
p_final = 0.40
w_homework = p_homework * 100 / (r_homework.shape[0] * r_homework_max)
w_midterm = p_midterm * 100 / r_midterm_max
w_final = p_final * 100 / r_final_max
w = np.array([w_homework] * r_homework.shape[0] + [w_midterm] + [w_final])

s = w.T@r

In [91]:
# 1.11
# Word count and word count histogram vectors. 
# Suppose the n-vector w is the word count vector 
# associated with a document and a dictionary of n words. 
# For simplicity we will assume that all words in the document appear in the dictionary.

# (a) What is 1Tw?
n = 300
w = np.random.randint(low=0, high=10, size=(n, 1))
v1 = np.ones((n,1))
total_num_words = v1.T@w

# (b) What does w282 = 0 mean?
# word 282 does not appear in the document
w[282] = 0

# (c) Let h be the n-vector that gives the histogram of the word counts, 
# i.e., hi is the fraction of the words in the document that are word i. 
# Use vector notation to express h in terms of w. 
# (You can assume that the document contains at least one word.)
h = w / w.sum()

In [93]:
# 1.12
# An international company holds cash in five currencies: 
# USD (US dollar), RMB (Chinese yuan), EUR (euro), GBP (British pound), and JPY (Japanese yen), 
# in amounts given by the 5-vector c. 
# For example, c2 gives the number of RMB held. 
# Negative entries in c represent liabilities or amounts owed. 
# Express the total (net) value of the cash in USD, using vector notation. 
# Be sure to give the size and define the entries of any vectors 
# that you introduce in your solution. Your solution can refer to currency exchange rates.

n_currencies = 5
c = np.random.randint(low=-100, high=100, size=(n_currencies, 1))
v_rates_in_usd = np.array([1, 0.8, 1.2, 1.5, 1.7])
net_usd = v_rates_in_usd.T@c

In [126]:
# 1.13
# Average age in a population. 
# Suppose the 100-vector x represents the distribution of ages
#  in some population of people, with xi being the number of i−1 year olds, 
# for i = 1, . . . , 100. (You can assume that x ̸= 0, 
# and that there is no one in the population over age 99.) 
# Find expressions, using vector notation, for the following quantities.

# (a) The total number of people in the population.
x = np.random.randint(low=0, high=100, size=(100, 1))
total_num_people = np.ones((100,1)).T@x

# (b) The total number of people in the population age 65 and over.
embed_v = np.concatenate([np.zeros((64, 1)), np.ones((100-64, 1))])
total_num_people_ge_65 = embed_v.T@x

# (c) The average age of the population.
weighted_average = np.array(range(0,100)).reshape(-1, 1).T@x / total_num_people

In [192]:
# 1.15
# Cheapest supplier
# You must buy n raw materials in quantities given by the n-vector q, 
# where qi is the amount of raw material i that you must buy. 
# A set of K potential suppliers offer the raw materials at prices 
# given by the n-vectors p1 , . . . , pK . 
# We will assume that all quantities and prices are positive.
# If you must choose just one supplier, how would you do it?

n_item = 100
q = np.random.randint(low=0, high=100, size=(n_item, 1))
p = lambda: np.random.randint(low=1, high=100, size=(n_item, 1))
n_supplier = 10
K = np.stack([p() for _ in range(10)], axis=1).reshape(-1, n_supplier)
v_cost = q.T@K
v_cost.argmin()

# A (highly paid) consultant tells you that you might do better 
# (i.e., get a better total cost) by splitting your order into two, 
# by choosing two suppliers and ordering (1/2)q (i.e., half the quantities) 
# from each of the two. He argues that having a diversity of suppliers is better. 
# Is he right? If so, explain how to find the two suppliers you would use 
# to fill half the order.
s1 = K[:, list(v_cost.argsort()[0]).index(0)].reshape(-1, 1)
s2 = K[:, list(v_cost.argsort()[0]).index(1)].reshape(-1, 1)

print(v_cost.min())
print((q.T@s1 + q.T@s2)/ 2)
print((q.T@s1 + q.T@s2) / 2 < v_cost.min())

print([(q[:i].T@s1[:i] + q[i:].T@s2[i:])[0][0] for i in range(10, 100, 10)])
print([(q[:i].T@s1[:i] + q[i:].T@s2[i:])[0][0] < v_cost.min() for i in range(10, 100, 10)])

209369
[[245503.5]]
[[False]]
[256894, 258977, 256230, 269804, 271454, 272794, 270797, 254377, 244582]
[False, False, False, False, False, False, False, False, False]


In [23]:
# 1.17
# Linear combinations of cash flows.
# We consider cash flow vectors over T time periods,
# with a positive entry meaning a payment received, and negative meaning a payment made. 
# A (unit) single period loan, at time period t, is the T-vector lt that corresponds
# to a payment received of $1 in period t and a payment made of $(1 + r) in period t + 1, 
# with all other payments zero. 
# Here r > 0 is the interest rate (over one period).
# Let c be a $1 T − 1 period loan, starting at period 1. 
# This means that $1 is received in period 1, $(1+r)T−1 is paid in period T, 
# and all other payments (i.e., c2,...,cT−1) are zero. 
# Express c as a linear combination of single period loans.

T = 5
r = 0.1
l = lambda t: np.array([[0]*(t) + [1] + [-(1+r)] + [0]*(T-t-1)]).reshape(-1, 1)
c = np.stack([l(t) * (1.1)**i for i, t in enumerate(range(0, T), start=0)], axis=0)
c.sum(axis=0)

array([[ 1.     ],
       [ 0.     ],
       [ 0.     ],
       [ 0.     ],
       [-0.     ],
       [-1.61051]])

In [372]:
# 1.18
# Linear combinations of linear combinations. 
# Suppose that each of the vectors b1, . . . , bk is a linear combination 
# of the vectors a1, . . . , am,  and c is a linear combination of b1, . . . , bk. 
# Then c is a linear combination of a1,...,am. 
# Show this for the case with m = k = 2. 
# (Showing it in general is not much more difficult, 
# but the notation gets more complicated.)

a = np.random.randn(2)
b = 2 * a + 3 * a
c = 2 * b + 3 * b
np.allclose(c, (2 * (2 * a + 3 * a)) + 3 * (2 * a + 3 * a))

True

In [None]:
# 1.19
# Auto-regressive model. 
# An auto-regressive (AR) model is used to predict zt+1 
# from the previous M values, zt,zt−1,...,zt−M+1
# M-vector β is the AR model coefficient vector

t = 30
M = 10

# (a) β := e1
# yesterday

# (b) β := 2e1 - e2
# 2 * yesterday - two days ago

# (c) β := e6
# 6 days ago

# (d) β := 0.5e1 + 0.5e2
# 0.5 * yesterday + 0.5 * two days ago

In [25]:
# 1.20
# How many bytes does it take to store 100 vectors of length 10^5?
# How many flops it take to form a linear combination of them?
# About how long would this take on a computer capable of carrying out 1 Gflop/s?

a = 8 * 10**2 * 10**5
flops = 100 * 10**5 + 99 * 10**5 
Gflops = 10 ** 6
how_long_would_this_take = a / Gflops