Skip to content

Commit

Permalink
Update latex-math
Browse files Browse the repository at this point in the history
  • Loading branch information
ludwigbothmann authored and github-actions[bot] committed Jul 24, 2024
1 parent 1a7b49f commit 5552cbd
Show file tree
Hide file tree
Showing 8 changed files with 63 additions and 53 deletions.
7 changes: 4 additions & 3 deletions latex-math/basic-math.tex
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
% dependencies: amsmath, amssymb, dsfont
% math spaces
\ifdefined\N
\renewcommand{\N}{\mathds{N}} % N, naturals
Expand All @@ -6,7 +7,7 @@
\newcommand{\Q}{\mathds{Q}} % Q, rationals
\newcommand{\R}{\mathds{R}} % R, reals
\ifdefined\C
\renewcommand{\C}{\mathds{C}} % C, complex
\renewcommand{\C}{\mathds{C}} % C, complex
\else \newcommand{\C}{\mathds{C}} \fi
\newcommand{\continuous}{\mathcal{C}} % C, space of continuous functions
\newcommand{\M}{\mathcal{M}} % machine numbers
Expand Down Expand Up @@ -49,9 +50,9 @@
\newcommand{\prodjp}{\prod\limits_{j=1}^p} % product from j=1 to p

% linear algebra
\newcommand{\one}{\boldsymbol{1}} % 1, unitvector
\newcommand{\one}{\bm{1}} % 1, unitvector
\newcommand{\zero}{\mathbf{0}} % 0-vector
\newcommand{\id}{\boldsymbol{I}} % I, identity
\newcommand{\id}{\bm{I}} % I, identity
\newcommand{\diag}{\operatorname{diag}} % diag, diagonal
\newcommand{\trace}{\operatorname{tr}} % tr, trace
\newcommand{\spn}{\operatorname{span}} % span
Expand Down
67 changes: 35 additions & 32 deletions latex-math/basic-ml.tex
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
% machine learning
\newcommand{\Xspace}{\mathcal{X}} % X, input space
\newcommand{\Yspace}{\mathcal{Y}} % Y, output space
\newcommand{\Zspace}{\mathcal{Z}} % Space of sampled datapoints ! Also defined identically in ml-online.tex !
\newcommand{\Zspace}{\mathcal{Z}} % Z, space of sampled datapoints
\newcommand{\nset}{\{1, \ldots, n\}} % set from 1 to n
\newcommand{\pset}{\{1, \ldots, p\}} % set from 1 to p
\newcommand{\gset}{\{1, \ldots, g\}} % set from 1 to g
Expand All @@ -26,6 +26,7 @@
\newcommand{\xdat}{\left\{ \xv^{(1)}, \ldots, \xv^{(n)}\right\}} % {x1, ..., xn}, input data
\newcommand{\ydat}{\left\{ \yv^{(1)}, \ldots, \yv^{(n)}\right\}} % {y1, ..., yn}, input data
\newcommand{\yvec}{\left(y^{(1)}, \hdots, y^{(n)}\right)^\top} % (y1, ..., yn), vector of outcomes
\newcommand{\greekxi}{\xi} % Greek letter xi
\renewcommand{\xi}[1][i]{\xv^{(#1)}} % x^i, i-th observed value of x
\newcommand{\yi}[1][i]{y^{(#1)}} % y^i, i-th observed value of y
\newcommand{\xivec}{\left(x^{(i)}_1, \ldots, x^{(i)}_p\right)^\top} % (x1^i, ..., xp^i), i-th observation vector
Expand Down Expand Up @@ -54,10 +55,10 @@
\newcommand{\fkx}[1][k]{f_{#1}(\xv)} % f_j(x), discriminant component function
\newcommand{\fh}{\hat{f}} % f hat, estimated prediction function
\newcommand{\fxh}{\fh(\xv)} % fhat(x)
\newcommand{\fxt}{f(\xv ~|~ \thetab)} % f(x | theta)
\newcommand{\fxt}{f(\xv ~|~ \thetav)} % f(x | theta)
\newcommand{\fxi}{f\left(\xv^{(i)}\right)} % f(x^(i))
\newcommand{\fxih}{\hat{f}\left(\xv^{(i)}\right)} % f(x^(i))
\newcommand{\fxit}{f\left(\xv^{(i)} ~|~ \thetab\right)} % f(x^(i) | theta)
\newcommand{\fxit}{f\left(\xv^{(i)} ~|~ \thetav\right)} % f(x^(i) | theta)
\newcommand{\fhD}{\fh_{\D}} % fhat_D, estimate of f based on D
\newcommand{\fhDtrain}{\fh_{\Dtrain}} % fhat_Dtrain, estimate of f based on D
\newcommand{\fhDnlam}{\fh_{\Dn, \lamv}} %model learned on Dn with hp lambda
Expand All @@ -69,9 +70,9 @@
\newcommand{\hx}{h(\xv)} % h(x), discrete prediction function
\newcommand{\hh}{\hat{h}} % h hat
\newcommand{\hxh}{\hat{h}(\xv)} % hhat(x)
\newcommand{\hxt}{h(\xv | \thetab)} % h(x | theta)
\newcommand{\hxt}{h(\xv | \thetav)} % h(x | theta)
\newcommand{\hxi}{h\left(\xi\right)} % h(x^(i))
\newcommand{\hxit}{h\left(\xi ~|~ \thetab\right)} % h(x^(i) | theta)
\newcommand{\hxit}{h\left(\xi ~|~ \thetav\right)} % h(x^(i) | theta)
\newcommand{\hbayes}{h^{\ast}} % Bayes-optimal classification model
\newcommand{\hxbayes}{h^{\ast}(\xv)} % Bayes-optimal classification model

Expand All @@ -82,27 +83,27 @@

% theta
\newcommand{\thetah}{\hat{\theta}} % theta hat
\newcommand{\thetab}{\bm{\theta}} % theta vector
\newcommand{\thetabh}{\bm{\hat\theta}} % theta vector hat
\newcommand{\thetat}[1][t]{\thetab^{[#1]}} % theta^[t] in optimization
\newcommand{\thetatn}[1][t]{\thetab^{[#1 +1]}} % theta^[t+1] in optimization
\newcommand{\thetahDnlam}{\thetabh_{\Dn, \lamv}} %theta learned on Dn with hp lambda
\newcommand{\thetahDlam}{\thetabh_{\D, \lamv}} %theta learned on D with hp lambda
\newcommand{\mint}{\min_{\thetab \in \Theta}} % min problem theta
\newcommand{\argmint}{\argmin_{\thetab \in \Theta}} % argmin theta
\newcommand{\thetav}{\bm{\theta}} % theta vector
\newcommand{\thetavh}{\bm{\hat\theta}} % theta vector hat
\newcommand{\thetat}[1][t]{\thetav^{[#1]}} % theta^[t] in optimization
\newcommand{\thetatn}[1][t]{\thetav^{[#1 +1]}} % theta^[t+1] in optimization
\newcommand{\thetahDnlam}{\thetavh_{\Dn, \lamv}} %theta learned on Dn with hp lambda
\newcommand{\thetahDlam}{\thetavh_{\D, \lamv}} %theta learned on D with hp lambda
\newcommand{\mint}{\min_{\thetav \in \Theta}} % min problem theta
\newcommand{\argmint}{\argmin_{\thetav \in \Theta}} % argmin theta

% densities + probabilities
% pdf of x
\newcommand{\pdf}{p} % p
\newcommand{\pdfx}{p(\xv)} % p(x)
\newcommand{\pixt}{\pi(\xv~|~ \thetab)} % pi(x|theta), pdf of x given theta
\newcommand{\pixit}[1][i]{\pi\left(\xi[#1] ~|~ \thetab\right)} % pi(x^i|theta), pdf of x given theta
\newcommand{\pixt}{\pi(\xv~|~ \thetav)} % pi(x|theta), pdf of x given theta
\newcommand{\pixit}[1][i]{\pi\left(\xi[#1] ~|~ \thetav\right)} % pi(x^i|theta), pdf of x given theta
\newcommand{\pixii}[1][i]{\pi\left(\xi[#1]\right)} % pi(x^i), pdf of i-th x

% pdf of (x, y)
\newcommand{\pdfxy}{p(\xv,y)} % p(x, y)
\newcommand{\pdfxyt}{p(\xv, y ~|~ \thetab)} % p(x, y | theta)
\newcommand{\pdfxyit}{p\left(\xi, \yi ~|~ \thetab\right)} % p(x^(i), y^(i) | theta)
\newcommand{\pdfxyt}{p(\xv, y ~|~ \thetav)} % p(x, y | theta)
\newcommand{\pdfxyit}{p\left(\xi, \yi ~|~ \thetav\right)} % p(x^(i), y^(i) | theta)

% pdf of x given y
\newcommand{\pdfxyk}[1][k]{p(\xv | y= #1)} % p(x | y = k)
Expand All @@ -112,7 +113,7 @@
% prior probabilities
\newcommand{\pik}[1][k]{\pi_{#1}} % pi_k, prior
\newcommand{\lpik}[1][k]{\log \pi_{#1}} % log pi_k, log of the prior
\newcommand{\pit}{\pi(\thetab)} % Prior probability of parameter theta
\newcommand{\pit}{\pi(\thetav)} % Prior probability of parameter theta

% posterior probabilities
\newcommand{\post}{\P(y = 1 ~|~ \xv)} % P(y = 1 | x), post. prob for y=1
Expand All @@ -123,13 +124,13 @@
\newcommand{\pix}{\pi(\xv)} % pi(x), P(y = 1 | x)
\newcommand{\piv}{\bm{\pi}} % pi, bold, as vector
\newcommand{\pikx}[1][k]{\pi_{#1}(\xv)} % pi_k(x), P(y = k | x)
\newcommand{\pikxt}[1][k]{\pi_{#1}(\xv ~|~ \thetab)} % pi_k(x | theta), P(y = k | x, theta)
\newcommand{\pikxt}[1][k]{\pi_{#1}(\xv ~|~ \thetav)} % pi_k(x | theta), P(y = k | x, theta)
\newcommand{\pixh}{\hat \pi(\xv)} % pi(x) hat, P(y = 1 | x) hat
\newcommand{\pikxh}[1][k]{\hat \pi_{#1}(\xv)} % pi_k(x) hat, P(y = k | x) hat
\newcommand{\pixih}{\hat \pi(\xi)} % pi(x^(i)) with hat
\newcommand{\pikxih}[1][k]{\hat \pi_{#1}(\xi)} % pi_k(x^(i)) with hat
\newcommand{\pdfygxt}{p(y ~|~\xv, \thetab)} % p(y | x, theta)
\newcommand{\pdfyigxit}{p\left(\yi ~|~\xi, \thetab\right)} % p(y^i |x^i, theta)
\newcommand{\pdfygxt}{p(y ~|~\xv, \thetav)} % p(y | x, theta)
\newcommand{\pdfyigxit}{p\left(\yi ~|~\xi, \thetav\right)} % p(y^i |x^i, theta)
\newcommand{\lpdfygxt}{\log \pdfygxt } % log p(y | x, theta)
\newcommand{\lpdfyigxit}{\log \pdfyigxit} % log p(y^i |x^i, theta)

Expand All @@ -139,8 +140,10 @@

% residual and margin
\newcommand{\eps}{\epsilon} % residual, stochastic
\newcommand{\epsv}{\bm{\epsilon}} % residual, stochastic, as vector
\newcommand{\epsi}{\epsilon^{(i)}} % epsilon^i, residual, stochastic
\newcommand{\epsh}{\hat{\epsilon}} % residual, estimated
\newcommand{\epsvh}{\hat{\epsv}} % residual, estimated, vector
\newcommand{\yf}{y \fx} % y f(x), margin
\newcommand{\yfi}{\yi \fxi} % y^i f(x^i), margin
\newcommand{\Sigmah}{\hat \Sigma} % estimated covariance matrix
Expand All @@ -153,7 +156,7 @@
\newcommand{\Lxyi}{L\left(\yi, \fxi\right)} % loss of observation
\newcommand{\Lxyt}{L\left(y, \fxt\right)} % loss with f parameterized
\newcommand{\Lxyit}{L\left(\yi, \fxit\right)} % loss of observation with f parameterized
\newcommand{\Lxym}{L\left(\yi, f\left(\bm{\tilde{x}}^{(i)} ~|~ \thetab\right)\right)} % loss of observation with f parameterized
\newcommand{\Lxym}{L\left(\yi, f\left(\bm{\tilde{x}}^{(i)} ~|~ \thetav\right)\right)} % loss of observation with f parameterized
\newcommand{\Lpixy}{L\left(y, \pix\right)} % loss in classification
\newcommand{\Lpiv}{L\left(y, \piv\right)} % loss in classification
\newcommand{\Lpixyi}{L\left(\yi, \pixii\right)} % loss of observation in classification
Expand All @@ -171,26 +174,26 @@
\newcommand{\riskbayes}{\mathcal{R}^\ast}
\newcommand{\riskf}{\risk(f)} % R(f), risk
\newcommand{\riskdef}{\E_{y|\xv}\left(\Lxy \right)} % risk def (expected loss)
\newcommand{\riskt}{\mathcal{R}(\thetab)} % R(theta), risk
\newcommand{\riskt}{\mathcal{R}(\thetav)} % R(theta), risk
\newcommand{\riske}{\mathcal{R}_{\text{emp}}} % R_emp, empirical risk w/o factor 1 / n
\newcommand{\riskeb}{\bar{\mathcal{R}}_{\text{emp}}} % R_emp, empirical risk w/ factor 1 / n
\newcommand{\riskef}{\riske(f)} % R_emp(f)
\newcommand{\risket}{\mathcal{R}_{\text{emp}}(\thetab)} % R_emp(theta)
\newcommand{\risket}{\mathcal{R}_{\text{emp}}(\thetav)} % R_emp(theta)
\newcommand{\riskr}{\mathcal{R}_{\text{reg}}} % R_reg, regularized risk
\newcommand{\riskrt}{\mathcal{R}_{\text{reg}}(\thetab)} % R_reg(theta)
\newcommand{\riskrt}{\mathcal{R}_{\text{reg}}(\thetav)} % R_reg(theta)
\newcommand{\riskrf}{\riskr(f)} % R_reg(f)
\newcommand{\riskrth}{\hat{\mathcal{R}}_{\text{reg}}(\thetab)} % hat R_reg(theta)
\newcommand{\risketh}{\hat{\mathcal{R}}_{\text{emp}}(\thetab)} % hat R_emp(theta)
\newcommand{\riskrth}{\hat{\mathcal{R}}_{\text{reg}}(\thetav)} % hat R_reg(theta)
\newcommand{\risketh}{\hat{\mathcal{R}}_{\text{emp}}(\thetav)} % hat R_emp(theta)
\newcommand{\LL}{\mathcal{L}} % L, likelihood
\newcommand{\LLt}{\mathcal{L}(\thetab)} % L(theta), likelihood
\newcommand{\LLtx}{\mathcal{L}(\thetab | \xv)} % L(theta|x), likelihood
\newcommand{\LLt}{\mathcal{L}(\thetav)} % L(theta), likelihood
\newcommand{\LLtx}{\mathcal{L}(\thetav | \xv)} % L(theta|x), likelihood
\newcommand{\logl}{\ell} % l, log-likelihood
\newcommand{\loglt}{\logl(\thetab)} % l(theta), log-likelihood
\newcommand{\logltx}{\logl(\thetab | \xv)} % l(theta|x), log-likelihood
\newcommand{\loglt}{\logl(\thetav)} % l(theta), log-likelihood
\newcommand{\logltx}{\logl(\thetav | \xv)} % l(theta|x), log-likelihood
\newcommand{\errtrain}{\text{err}_{\text{train}}} % training error
\newcommand{\errtest}{\text{err}_{\text{test}}} % test error
\newcommand{\errexp}{\overline{\text{err}_{\text{test}}}} % avg training error

% lm
\newcommand{\thx}{\thetab^\top \xv} % linear model
\newcommand{\thx}{\thetav^\top \xv} % linear model
\newcommand{\olsest}{(\Xmat^\top \Xmat)^{-1} \Xmat^\top \yv} % OLS estimator in LM
10 changes: 5 additions & 5 deletions latex-math/ml-ensembles.tex
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
\newcommand{\errm}[1][m]{\text{err}^{[#1]}} % weighted in-sample misclassification rate
\newcommand{\wm}[1][m]{w^{[#1]}} % weight vector of basemodel m
\newcommand{\wmi}[1][m]{w^{[#1](i)}} % weight of obs i of basemodel m
\newcommand{\thetam}[1][m]{\thetab^{[#1]}} % parameters of basemodel m
\newcommand{\thetamh}[1][m]{\hat{\thetab}^{[#1]}} % parameters of basemodel m with hat
\newcommand{\blxt}[1][m]{b(\xv, \thetab^{[#1]})} % baselearner, default m
\newcommand{\thetam}[1][m]{\thetav^{[#1]}} % parameters of basemodel m
\newcommand{\thetamh}[1][m]{\hat{\thetav}^{[#1]}} % parameters of basemodel m with hat
\newcommand{\blxt}[1][m]{b(\xv, \thetav^{[#1]})} % baselearner, default m
\newcommand{\ens}{\sum_{m=1}^M \betam \blxt} % ensemble
\newcommand{\rmm}[1][m]{\tilde{r}^{[#1]}} % pseudo residuals
\newcommand{\rmi}[1][m]{\tilde{r}^{[#1](i)}} % pseudo residuals
Expand All @@ -33,6 +33,6 @@
\newcommand{\Lpleft}{\Lp_{\text{left}}}

% ml - boosting iml lecture
\newcommand{\ts}{\thetab^{\star}} % theta*
\newcommand{\bljt}{\bl[j](\xv, \thetab)} % BL j with theta
\newcommand{\ts}{\thetav^{\star}} % theta*
\newcommand{\bljt}{\bl[j](\xv, \thetav)} % BL j with theta
\newcommand{\bljts}{\bl[j](\xv, \ts)} % BL j with theta*
2 changes: 1 addition & 1 deletion latex-math/ml-eval.tex
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@

% performance measure
\newcommand{\rhoL}{\rho_L} % perf. measure derived from pointwise loss
\newcommand{\F}{\boldsymbol{F}} % matrix of prediction scores
\newcommand{\F}{\bm{F}} % matrix of prediction scores
\newcommand{\Fi}[1][i]{\F^{(#1)}} % i-th row vector of the predscore mat
\newcommand{\FJ}[1][J]{\F_{#1}} % predscore mat idxvec J
\newcommand{\FJf}{\FJ[J,f]} % predscore mat idxvec J and model f
Expand Down
2 changes: 1 addition & 1 deletion latex-math/ml-infotheory.tex
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@
\newcommand{\cdentyx}{- \int_{\Xspace, \Yspace} f(x, y) \cdot \log f(y | x) dx dy} % cond diff entropy y|x
\newcommand{\xentpq}{- \sum_{x \in \Xspace} p(x) \cdot \log q(x)} % cross-entropy of p, q
\newcommand{\kldpq}{D_{KL}(p \| q)} % KLD between p and q
\newcommand{\kldpqt}{D_{KL}(p \| q_{\thetab})} % KLD divergence between p and parameterized q
\newcommand{\kldpqt}{D_{KL}(p \| q_{\thetav})} % KLD divergence between p and parameterized q
\newcommand{\explogpq}{\E_p \left[\log \frac{p(X)}{q(X)} \right]} % expected LLR of p, q (def KLD)
\newcommand{\sumlogpq}{\sum_{x \in \Xspace} p(x) \cdot \log \frac{p(x)}{q(x)}} % expected LLR of p, q (def KLD)
16 changes: 8 additions & 8 deletions latex-math/ml-nn.tex
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,18 @@
\newcommand{\Odropout}{\mathnormal{J}(\theta, \mu|X,y)} % dropout objective function

% deeplearning - optimization
\newcommand{\Loss}{L(y, f(\xv, \thetab))}
\newcommand{\Lmomentumnest}{L(\yi, f(x^{(i)}, \thetab + \varphi \nub))} % momentum risk
\newcommand{\Lmomentumtilde}{L(\yi, f(x^{(i)}, \tilde{\thetab}))} % Nesterov momentum risk
\newcommand{\Lmomentum}{L(\yi, f(x^{(i)}, \thetab))}
\newcommand{\Loss}{L(y, f(\xv, \thetav))}
\newcommand{\Lmomentumnest}{L(\yi, f(x^{(i)}, \thetav + \varphi \nub))} % momentum risk
\newcommand{\Lmomentumtilde}{L(\yi, f(x^{(i)}, \tilde{\thetav}))} % Nesterov momentum risk
\newcommand{\Lmomentum}{L(\yi, f(x^{(i)}, \thetav))}
\newcommand{\Hess}{\mathbf{H}}
\newcommand{\nub}{\boldsymbol{\nu}}
\newcommand{\nub}{\bm{\nu}}

% deeplearning - autoencoders
\newcommand{\uauto}{L(x,g(f(x)))} % undercomplete autoencoder objective function
\newcommand{\dauto}{L(x,g(f(\tilde{x})))} % denoising autoencoder objective function

% deeplearning - adversarials
\newcommand{\deltab}{\boldsymbol{\delta}}
\newcommand{\Lossdeltai}{L(\yi, f(\xi + \deltab|\thetab))}
\newcommand{\Lossdelta}{L(y, f(\xv + \deltab| \thetab))}
\newcommand{\deltab}{\bm{\delta}}
\newcommand{\Lossdeltai}{L(\yi, f(\xi + \deltab|\thetav))}
\newcommand{\Lossdelta}{L(y, f(\xv + \deltab| \thetav))}
6 changes: 6 additions & 0 deletions latex-math/ml-regu.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
% \thetah is \hat{\theta}} (theta hat)
% \thetav is \bm{\theta}} (theta vector)
\newcommand{\thetas}{\thetav^*} % theta star
\newcommand{\thetaridge}{\thetav_{\mathrm{ridge}}}} % theta (RIDGE)
\newcommand{\thetalasso}{\thetav_{\mathrm{LASSO}}}} % theta (LASSO)
\newcommand{\thetaols}{\thetav_{\mathrm{OLS}}}} % theta (RIDGE)
6 changes: 3 additions & 3 deletions latex-math/ml-svm.tex
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
\renewcommand{\sl}{\zeta} % slack variable
\newcommand{\slvec}{\left(\zeta^{(1)}, \zeta^{(n)}\right)} % slack variable vector
\newcommand{\sli}[1][i]{\zeta^{(#1)}} % i-th slack variable
\newcommand{\scptxi}{\scp{\thetab}{\xi}} % scalar prodct of theta and xi
\newcommand{\svmhplane}{\yi \left( \scp{\thetab}{\xi} + \theta_0 \right)} % SVM hyperplane (normalized)
\newcommand{\scptxi}{\scp{\thetav}{\xi}} % scalar prodct of theta and xi
\newcommand{\svmhplane}{\yi \left( \scp{\thetav}{\xi} + \theta_0 \right)} % SVM hyperplane (normalized)
\newcommand{\alphah}{\hat{\alpha}} % alpha-hat (basis fun coefficients)
\newcommand{\alphav}{\bm{\alpha}} % vector alpha (bold) (basis fun coefficients)
\newcommand{\alphavh}{\hat{\bm{\alpha}}} % vector alpha-hat (basis fun coefficients)
Expand All @@ -15,4 +15,4 @@
\newcommand{\phix}{\phi(\xv)} % feature map x
\newcommand{\phixt}{\phi(\tilde \xv)} % feature map x tilde
\newcommand{\kxxt}{k(\xv, \tilde \xv)} % kernel fun x, x tilde
\newcommand{\scptxifm}{\scp{\thetab}{\phi(\xi)}} % scalar prodct of theta and xi
\newcommand{\scptxifm}{\scp{\thetav}{\phi(\xi)}} % scalar prodct of theta and xi

0 comments on commit 5552cbd

Please sign in to comment.