Lecture L7

todo-group · Jun 29, 2016 · 378fa69 · 378fa69
1 parent bc867db
commit 378fa69
Show file tree

Hide file tree

Showing 30 changed files with 52,910 additions and 91 deletions.
diff --git a/lecture/70_optimization.tex b/lecture/70_optimization.tex
@@ -11,7 +11,7 @@ \section{最適化問題}
   \item 真の(大局的な)最小値(最大値)を求めるのは難しい
   \item 一般的には極値を求めることしかできない
   \item 多次元では極小を囲い込むことができない
-  \item 導関数を使う方法: ニュートン法、最急降下法、共役勾配法、準ニュートン法
+  \item 導関数を使う方法: ニュートン法、最急降下法、勾配降下法, 共役勾配法、準ニュートン法
   \item 使わない方法: 囲い込み法、Nelder-Meadの滑降シンプレックス法、シミュレーテッド・アニーリング
   \end{itemize}
 \end{frame}
diff --git a/lecture/72_descent.tex b/lecture/72_descent.tex
@@ -1,8 +1,8 @@
-\section{最急降下法と共役勾配法}
+\section{最急降下法と勾配降下法}
 
-\begin{frame}[t,fragile]{最急降下法(steepest-descent)}
+\begin{frame}[t,fragile]{最急降下法(steepest descent)}
   \begin{itemize}
-    %\setlength{\itemsep}{1em}
+    \setlength{\itemsep}{1em}
   \item 関数の微分の情報を使う
   \item 現在の点$\bf x$における勾配を計算
     \[
@@ -14,93 +14,42 @@ \section{最急降下法と共役勾配法}
   \end{itemize}
 \end{frame}
 
-\begin{frame}[t,fragile]{細長い谷の場合}
-  \vspace*{1em}
-  \hspace*{1em}\resizebox{1\textwidth}{!}{\includegraphics{image/steepest-descent.pdf}}
-
-  \vspace*{-2em}
-  \hspace*{20em}{\footnotesize(Press et al 1988)}
-\end{frame}
-
-\begin{frame}[t,fragile]{共役勾配法(conjugate-gradient)}
+\begin{frame}[t,fragile]{勾配降下法(gradient descent)}
   \begin{itemize}
     \setlength{\itemsep}{1em}
-  \item 関数がある点のまわりで
+  \item 勾配方向に一次元最適化を行うかわりに、あらかじめ決めた一定量($\epsilon$)だけ坂を下る
     \[
-    f({\bf x}) \approx c - {\bf b}^T {\bf x} + \frac{1}{2} {\bf x}^T A {\bf x}
+    x_{n+1} = x_n - \epsilon \, \nabla f
     \]
-    と近似できるとする
-  \item この時、${\bf x}$における勾配は、連立方程式$A{\bf x}={\bf b}$の「残差」の形で書ける
-    \[
-    -\nabla f = {\bf b} - A {\bf x}
-    \]
-  \item 新しい勾配方向ではなく、それまでとは「共役な方向」に進みたい
+  \item あらかじめ最適な$\epsilon$を知るのは困難
+  \item 機械学習の分野では、(なぜか) $\epsilon=0.1$が良いとされている
+  \item この方法を「最急降下法」、一次元最適化を行う勾配法を「最適降下法(optimum descent)」と呼ぶ場合も
+  \item c.f.) 確率的勾配降下法(stochastic gradient descent)
   \end{itemize}
 \end{frame}
 
-\begin{frame}[t,fragile]{「共役な方向」とは}
+\begin{frame}[t,fragile]{制約条件付きの場合}
   \begin{itemize}
-    \setlength{\itemsep}{1em}
-  \item あるベクトル${\bf p}$にそった一次元の最適化が完了したとする
+    % \setlength{\itemsep}{1em}
+  \item 目的関数: $f(x)$
+  \item 制約条件:
     \begin{itemize}
-    \item その点における${\bf p}$方向の勾配は零。すなわち${\bf p}^T (\nabla f)=0$
-    \item ${\bf p}$方向の勾配の値を変化させないようにしたい
-  \end{itemize}
-  \item 次に、${\bf q}$にそって、${\bf x}+\epsilon {\bf q}$と移動すると
-    \[
-      \delta(\nabla f) = A \times (\epsilon {\bf q}) \sim A {\bf q}
-      \]
-      これが${\bf p}$に垂直であり続けるためには
-    \[
-      {\bf p}^T A {\bf q} = 0
-      \]
-    \item この関係が成り立つ時、${\bf p}$と${\bf q}$は「互いに共役」という
-  \end{itemize}
-\end{frame}
-
-\begin{frame}[t,fragile]{共役勾配法(Conjugate-gradient)}
-  \begin{itemize}
-    \setlength{\itemsep}{1em}
-  \item 初期条件と漸化式
-    \begin{align*}
-      {\bf p}_0 &= {\bf r}_0 = {\bf b} - A {\bf x}_0 \\
-      {\bf x}_{n+1} &= {\bf x}_n + \alpha_n {\bf p}_n \\
-      {\bf r}_{n+1} &= {\bf r}_n - \alpha_n A {\bf p}_n = {\bf b} - A {\bf x}_{n+1} \\
-      {\bf p}_{n+1} &= {\bf r}_{n+1} + \beta_n {\bf p}_n \\
-      \alpha_n &= \frac{{\bf r}_n^T {\bf p}_n}{{\bf p}_n^T A {\bf p}_n} \ \ \
-      \beta_n = \frac{{\bf r}_{n+1}^T {\bf r}_{n+1}}{{\bf r}_n^T {\bf r}_n}
-    \end{align*}
-  \item このように作ると、全ての$i>j \ge 1$について、自動的に
+    \item $g_i(x) = 0$ \ ($i=1,\cdots,m$) \ (等式制約条件)
+    \item $h_j(x) \ge 0$ \ ($j=1,\cdots,n$) \ (不等式制約条件)
+    \end{itemize}
+  \item 等式制約条件の付いている場合: Lagrangeの未定乗数法
     \[
-      {\bf p}_i^T A {\bf p}_j = 0 \ \ \ {\bf r}_i^T {\bf r}_j = 0
-      \]
+    L(x,\lambda_1,\cdots,\lambda_m)=f(x)+\sum_i \lambda_i g_i(x)
+    \]
+    を考え、$x,\lambda_1,\cdots,\lambda_m$の微分が零となる点を探す
+  \item 不等式制約条件の付いている場合: c.f. 線形計画法, ペナルティ関数法
   \end{itemize}
 \end{frame}
 
-\begin{frame}[t,fragile]{共役勾配法(Conjugate-gradient)}
-  \begin{itemize}
-    \setlength{\itemsep}{1em}
-  \item 残差は互いに直交 $\Rightarrow$ $N$回反復すると残差は零 (完全な二次形式の場合)
-  \item 残差は負の勾配で表される $\Rightarrow$ $A$を知らなくても${\bf r}_i$は計算可
-  \item 実際には、数値誤差により、共役性・直交性がくずれる
-  \item また、完全な二次形式ではない
-  \item しかし、最急降下法と比較すると圧倒的に速く収束
-  \end{itemize}
-\end{frame}
+\begin{frame}[t,fragile]{細長い谷の場合}
+  \vspace*{1em}
+  \hspace*{1em}\resizebox{1\textwidth}{!}{\includegraphics{image/steepest-descent.pdf}}
 
-\begin{frame}[t,fragile]{逆反復法による固有ベクトルの計算}
-  \begin{itemize}
-    \setlength{\itemsep}{1em}
-  \item $f(x)$の極小解は、連立一次方程式$A{\bf x} = {\bf b}$の解
-    \begin{itemize}
-    \item 連立方程式を解くのに共役勾配法を利用可
-    \item 行列ベクトル積だけで計算できるので、$A$が疎行列の時、特に有効
-    \end{itemize}
-  \item 逆反復法
-    \begin{itemize}
-    \item 近似固有値を$\mu$とするとき、行列$(A - \mu I)^{-1}$を考えると、固有ベクトルは$A$と同じ、固有値は$(\lambda-\mu)^{-1}$。
-    \item $\mu$が十分に正確であれば、$(\lambda-\mu)^{-1}$は絶対値最大の固有値。行列$(A - \mu I)^{-1}$を適当な初期ベクトルにかけ続けると$\lambda$に対応する固有ベクトルに収束(c.f. べき乗法)
-    \item 実際には$(A-\mu I) {\bf x}' = {\bf x}$という連立方程式を繰り返し解く
-    \end{itemize}
-  \end{itemize}
+  \vspace*{-2em}
+  \hspace*{20em}{\footnotesize(Press et al 1988)}
 \end{frame}
diff --git a/lecture/73_cg.tex b/lecture/73_cg.tex
@@ -0,0 +1,84 @@
+\section{共役勾配法}
+
+\begin{frame}[t,fragile]{共役勾配法(conjugate-gradient)}
+  \begin{itemize}
+    \setlength{\itemsep}{1em}
+  \item 関数がある点のまわりで
+    \[
+    f({\bf x}) \approx c - {\bf b}^T {\bf x} + \frac{1}{2} {\bf x}^T A {\bf x}
+    \]
+    と近似できるとする
+  \item この時、${\bf x}$における勾配は、連立方程式$A{\bf x}={\bf b}$の「残差」の形で書ける
+    \[
+    -\nabla f = {\bf b} - A {\bf x}
+    \]
+  \item 新しい勾配方向ではなく、それまでとは「共役な方向」に進みたい
+  \end{itemize}
+\end{frame}
+
+\begin{frame}[t,fragile]{「共役な方向」とは}
+  \begin{itemize}
+    \setlength{\itemsep}{1em}
+  \item あるベクトル${\bf p}$にそった一次元の最適化が完了したとする
+    \begin{itemize}
+    \item その点における${\bf p}$方向の勾配は零。すなわち${\bf p}^T (\nabla f)=0$
+    \item ${\bf p}$方向の勾配の値を変化させないようにしたい
+  \end{itemize}
+  \item 次に、${\bf q}$にそって、${\bf x}+\epsilon {\bf q}$と移動すると
+    \[
+      \delta(\nabla f) = A \times (\epsilon {\bf q}) \sim A {\bf q}
+      \]
+      これが${\bf p}$に垂直であり続けるためには
+    \[
+      {\bf p}^T A {\bf q} = 0
+      \]
+    \item この関係が成り立つ時、${\bf p}$と${\bf q}$は「互いに共役」という
+  \end{itemize}
+\end{frame}
+
+\begin{frame}[t,fragile]{共役勾配法(Conjugate-gradient)}
+  \begin{itemize}
+    \setlength{\itemsep}{1em}
+  \item 初期条件と漸化式
+    \begin{align*}
+      {\bf p}_0 &= {\bf r}_0 = {\bf b} - A {\bf x}_0 \\
+      {\bf x}_{n+1} &= {\bf x}_n + \alpha_n {\bf p}_n \\
+      {\bf r}_{n+1} &= {\bf r}_n - \alpha_n A {\bf p}_n = {\bf b} - A {\bf x}_{n+1} \\
+      {\bf p}_{n+1} &= {\bf r}_{n+1} + \beta_n {\bf p}_n \\
+      \alpha_n &= \frac{{\bf r}_n^T {\bf p}_n}{{\bf p}_n^T A {\bf p}_n} \ \ \
+      \beta_n = \frac{{\bf r}_{n+1}^T {\bf r}_{n+1}}{{\bf r}_n^T {\bf r}_n}
+    \end{align*}
+  \item このように作ると、全ての$i>j \ge 1$について、自動的に
+    \[
+      {\bf p}_i^T A {\bf p}_j = 0 \ \ \ {\bf r}_i^T {\bf r}_j = 0
+      \]
+  \end{itemize}
+\end{frame}
+
+\begin{frame}[t,fragile]{共役勾配法(Conjugate-gradient)}
+  \begin{itemize}
+    \setlength{\itemsep}{1em}
+  \item 残差は互いに直交 $\Rightarrow$ $N$回反復すると残差は零 (完全な二次形式の場合)
+  \item 残差は負の勾配で表される $\Rightarrow$ $A$を知らなくても${\bf r}_i$は計算可
+  \item 実際には、数値誤差により、共役性・直交性がくずれる
+  \item また、完全な二次形式ではない
+  \item しかし、最急降下法と比較すると圧倒的に速く収束
+  \end{itemize}
+\end{frame}
+
+\begin{frame}[t,fragile]{逆反復法による固有ベクトルの計算}
+  \begin{itemize}
+    \setlength{\itemsep}{1em}
+  \item $f(x)$の極小解は、連立一次方程式$A{\bf x} = {\bf b}$の解
+    \begin{itemize}
+    \item 連立方程式を解くのに共役勾配法を利用可
+    \item 行列ベクトル積だけで計算できるので、$A$が疎行列の時、特に有効
+    \end{itemize}
+  \item 逆反復法
+    \begin{itemize}
+    \item 近似固有値を$\mu$とするとき、行列$(A - \mu I)^{-1}$を考えると、固有ベクトルは$A$と同じ、固有値は$(\lambda-\mu)^{-1}$。
+    \item $\mu$が十分に正確であれば、$(\lambda-\mu)^{-1}$は絶対値最大の固有値。行列$(A - \mu I)^{-1}$を適当な初期ベクトルにかけ続けると$\lambda$に対応する固有ベクトルに収束(c.f. べき乗法)
+    \item 実際には$(A-\mu I) {\bf x}' = {\bf x}$という連立方程式を繰り返し解く
+    \end{itemize}
+  \end{itemize}
+\end{frame}
diff --git a/lecture/76_comparison.tex b/lecture/76_comparison.tex
@@ -0,0 +1,31 @@
+\section{最適化手法の比較}
+
+\begin{frame}[t,fragile]{例題 (二次元の最適化)}
+  \begin{center}
+    \resizebox{.9\textwidth}{!}{\includegraphics{image/func_2d.pdf}}
+  \end{center}
+\end{frame}
+
+\begin{frame}[t,fragile]{様々な最適化手法の比較 (1/4)}
+  \begin{center}
+    \resizebox{.9\textwidth}{!}{\includegraphics{image/optimization.pdf}}
+  \end{center}
+\end{frame}
+
+\begin{frame}[t,fragile]{様々な最適化手法の比較 (2/4)}
+  \begin{center}
+    \resizebox{.9\textwidth}{!}{\includegraphics{image/optimization2.pdf}}
+  \end{center}
+\end{frame}
+
+\begin{frame}[t,fragile]{様々な最適化手法の比較 (3/4)}
+  \begin{center}
+    \resizebox{.9\textwidth}{!}{\includegraphics{image/optimization3.pdf}}
+  \end{center}
+\end{frame}
+
+\begin{frame}[t,fragile]{様々な最適化手法の比較 (4/4)}
+  \begin{center}
+    \resizebox{.9\textwidth}{!}{\includegraphics{image/optimization4.pdf}}
+  \end{center}
+\end{frame}
diff --git a/lecture/7_optimization.tex b/lecture/7_optimization.tex
@@ -15,8 +15,10 @@
 \input{11_newton.tex}
 \input{71_enclosure.tex}
 \input{72_descent.tex}
-\input{73_simplex.tex}
-\input{74_annealing.tex}
+\input{73_cg.tex}
+\input{74_simplex.tex}
+\input{75_annealing.tex}
+\input{76_comparison.tex}
 \input{79_exercise.tex}
 
 \end{document}
diff --git a/lecture/82_batch.tex b/lecture/82_batch.tex
@@ -0,0 +1,20 @@
+\section{バッチキューシステム}
+
+\begin{frame}[t,fragile]{バッチキューシステム}
+  \begin{itemize}
+    \setlength{\itemsep}{1em}
+  \item 実習用計算機 photon
+    \begin{itemize}
+    \item ログインノード(2CPU, 12コア)+計算ノード(64CPU, 256コア)からなる「クラスタワークステーション」(並列計算機の一種)
+    \item 普段{\tt ssh}して作業しているのはログインノード
+    \end{itemize}
+  \item バッチーキューシステム
+    \begin{itemize}
+    \item 長い(大きな)計算は計算ノードを使う
+    \item 多数の計算ノードの割り振りを手でやるのは非効率的
+    \item バッチキューシステムを使って、「ジョブ」を投入する
+    \end{itemize}
+  \item 詳しい説明は「システム利用マニュアル」({\tt ssh}ログイン時に表示されるメッセージ参照)を見ること
+  \item photon は卒業まで継続して利用可 (希望すれば大学院でも)
+  \end{itemize}
+\end{frame}
diff --git a/lecture/Makefile b/lecture/Makefile
@@ -15,8 +15,8 @@ lecture-2.pdf: 20_ode.tex 21_symplectic.tex 90_vcs.tex 91_subversion.tex
 lecture-3.pdf: 30_intro.tex 31_direct.tex 32_iterative.tex 13_lapack.tex
 lecture-4.pdf: 40_intro.tex 41_dense.tex 42_sparse.tex 43_svd.tex
 lecture-5.pdf: 50_least-square.tex 52_bayes.tex 60_randomized.tex 61_montecarlo_integration.tex 62_random_number.tex 63_mcmc.tex
-lecture-6.pdf: 70_optimization.tex 11_newton.tex 71_enclosure.tex 72_descent.tex 73_simplex.tex 74_annealing.tex
-lecture-7.pdf: 70_optimization.tex 72_descent.tex 80_supercomputer.tex	81_parallel.tex
+lecture-6.pdf: 70_optimization.tex 11_newton.tex 71_enclosure.tex 72_descent.tex 73_cg.tex 74_simplex.tex 75_annealing.tex 76_comparison.tex
+lecture-7.pdf: 70_optimization.tex 72_descent.tex 76_comparison.tex 80_supercomputer.tex	81_parallel.tex 82_batch.tex
 appendix.pdf: 12_algebraic.tex 22_numerov.tex 09_exercise.tex 19_exercise.tex 29_exercise.tex 39_exercise.tex 49_exercise.tex 51_kernel.tex 59_exercise.tex 69_exercise.tex 79_exercise.tex 89_exercise.tex
 
 0_about.pdf: 00_about.tex 01_ssh.tex 09_exercise.tex
@@ -26,8 +26,8 @@ appendix.pdf: 12_algebraic.tex 22_numerov.tex 09_exercise.tex 19_exercise.tex 29
 4_eigenvalue_problem.pdf: 40_intro.tex 41_dense.tex 42_sparse.tex 43_svd.tex 49_exercise.tex
 5_linear_regression.pdf: 50_least-square.tex 51_kernel.tex 52_bayes.tex 59_exercise.tex
 6_monte_carlo.pdf: 60_randomized.tex 61_montecarlo_integration.tex 62_random_number.tex 63_mcmc.tex 69_exercise.tex
-7_optimization.pdf: 70_optimization.tex 11_newton.tex 71_enclosure.tex 72_descent.tex 73_simplex.tex 74_annealing.tex 79_exercise.tex
-8_hpc.pdf: 80_supercomputer.tex	81_parallel.tex 89_exercise.tex
+7_optimization.pdf: 70_optimization.tex 11_newton.tex 71_enclosure.tex 72_descent.tex 73_cg.tex 74_simplex.tex 75_annealing.tex 76_comparison.tex 79_exercise.tex
+8_hpc.pdf: 80_supercomputer.tex	81_parallel.tex 82_batch.tex 89_exercise.tex
 
 .tex.dvi:
 	TEXINPUTS=.:./style//: platex -shell-escape $<

diff --git a/lecture/image/cg_2d.dat b/lecture/image/cg_2d.dat
@@ -0,0 +1,51 @@
+0 -15.000000 3.000000 52440.000000
+1 -2.152678 0.432779 -21.528276
+2 -1.974868 1.321834 -23.274875
+3 -1.746029 1.729715 -23.884669
+4 -1.490615 2.021497 -24.349902
+5 -1.226392 2.237385 -24.742100
+6 -0.959875 2.397397 -25.089907
+7 -0.694363 2.512953 -25.407710
+8 -0.431886 2.591287 -25.703882
+9 -0.173875 2.637326 -25.983806
+10 0.078552 2.654600 -26.251207
+11 0.324446 2.645724 -26.508814
+12 0.562944 2.612683 -26.758726
+13 0.793219 2.556999 -27.002629
+14 1.014434 2.479838 -27.241929
+15 1.225704 2.382074 -27.477848
+16 1.426058 2.264335 -27.711487
+17 1.614386 2.127026 -27.943872
+18 1.789371 1.970341 -28.175982
+19 1.949386 1.794280 -28.408769
+20 2.092328 1.598682 -28.643124
+21 2.215351 1.383353 -28.879751
+22 2.314380 1.148505 -29.118745
+23 2.383299 0.896388 -29.358231
+24 2.413075 0.637474 -29.589906
+25 2.395892 0.410471 -29.786836
+26 2.349640 0.282758 -29.903883
+27 2.309475 0.242443 -29.949816
+28 2.279391 0.237363 -29.970022
+29 2.247352 0.253801 -29.989079
+30 2.185601 0.338607 -30.036643
+31 1.707741 1.557538 -31.413386
+32 1.693859 1.557559 -31.415723
+33 1.681671 1.583226 -31.417822
+34 1.637255 1.616643 -31.421279
+35 1.636666 1.621730 -31.421506
+36 1.635521 1.621865 -31.421518
+37 1.631731 1.627030 -31.421563
+38 1.629719 1.628036 -31.421577
+39 1.629736 1.628195 -31.421577
+40 1.629450 1.628347 -31.421577
+41 1.629080 1.628856 -31.421578
+42 1.629031 1.628861 -31.421578
+43 1.629029 1.628874 -31.421578
+44 1.628968 1.628919 -31.421578
+45 1.628959 1.628940 -31.421578
+46 1.628957 1.628939 -31.421578
+47 1.628955 1.628943 -31.421578
+48 1.628950 1.628947 -31.421578
+49 1.628950 1.628947 -31.421578
+50 1.628949 1.628948 -31.421578