/
lec_2.tex
205 lines (173 loc) Β· 9.5 KB
/
lec_2.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
\lecture{2}{31 Aug.\ 10:30}{Linear Programming with Set Covers}
\section{Linear Programming Rounding}
To get a \(d\)-approximation algorithm, instead of seeing the greedy algorithm, we first see the LP\footnote{See \href{https://www.pbb.wtf/posts/Notes\#linear-programming-math561ioe510to518-umich}{MATH561} for a complete reference.} dual method, which turns out to be exactly the same as the greedy algorithm.
\begin{prev}
Both linear programming and convex programming can be solved in polynomial time.
\end{prev}
Notice that it's more natural to define \hyperref[prb:set-cover]{set cover} in terms of ILP (integer LP). Define our integer variables \(\left\{ x_i \right\} _{i\in [n]}\) such that
\[
x_i = \begin{dcases}
1, & \text{ if } S_i\in \mathcal{S} ^\prime ; \\
0, & \text{ otherwise}.
\end{dcases}
\]
In this way, we have the following ILP formulation for \hyperref[prb:set-cover]{set cover} as
\[
\begin{aligned}
\min~ & \sum_{i} w_i \cdot x_i \\
& \sum_{S_i\ni e}x_{i} \geq 1 & \forall i\in U \\
(\text{IP})\quad & x_{i} \in \left\{ 0, 1 \right\} & \forall i.
\end{aligned}
\]
But we know that this is a \(\NP\)-hard problem, so we relax it to be
\[
\begin{aligned}
\min~ & \sum_{i} w_i \cdot x_i \\
& \sum_{S_i\ni e}x_{i} \geq 1 & \forall i\in U \\
(\text{LP})\quad & x_{i} \geq 0 & \forall i.
\end{aligned}
\]
Write it in a more compact form, we have
\[
\begin{aligned}
\min~ & \left\langle w, x \right\rangle \\
& Ax \geq \mathbbm{1} \\
& x\geq 0
\end{aligned}
\]
where \(A\in \mathbb{R} ^{n\times m}\) such that
\[
A_{ij} = \begin{dcases}
1, & \text{ if } e_i\in S_j ; \\
0, & \text{ otherwise}.
\end{dcases}
\]
\begin{note}
Note when we do relaxation, we want \(x\in fes(\text{IP}) \implies x\in \fes(\text{LP})\), i.e., \(fes(\text{LP})\supseteq fes(\text{IP})\). Note that in this case, for a minimization problem, we have
\[
f(x) = \OPT_\text{LP} \leq \OPT_\text{IP}.
\]
\end{note}
In this case, we see that the most natural way to get an integer solution from the fractional solution obtained from the relaxed LP is to \textbf{round} \(x\) to integral solution. This leads to the following \hyperref[algo:set-cover-LP-rounding]{algorithm}.
\par
\begin{algorithm}[H]\label{algo:set-cover-LP-rounding}
\DontPrintSemicolon
\caption{\hyperref[prb:set-cover]{Set cover} -- LP Rounding}
\KwData{A \hyperref[def:set-system]{set system} \((U, \mathcal{S})\)}
\KwResult{A \hyperref[def:covering]{covering} \(\mathcal{S} ^\prime \)}
\BlankLine
\(x\gets\solve(\text{LP})\)\Comment*[r]{Solve the relaxed (LP)}
\(\mathcal{S} ^\prime \gets \left\{ S_i \colon x_{i} \geq 1 / d \right\}\)\;
\Return{\(\mathcal{S} ^\prime \)}\;
\end{algorithm}
We now prove the correctness and \autoref{algo:set-cover-LP-rounding}'s approximation ratio.
\begin{lemma}
\(\mathcal{S} ^\prime \) is a covering.
\end{lemma}
\begin{proof}
Fix \(e\in U\), let \(S_1, \dots , S_d\) be the sets containing \(e\). We see that
\[
\sum_{i=1}^{d} x_{i} \geq 1\implies \exists j\in[d] \text{ s.t. } x_j \geq \frac{1}{d} \implies S_j\in \mathcal{S} ^\prime.
\]
\end{proof}
\begin{theorem}\label{thm:lec2-1}
\autoref{algo:set-cover-LP-rounding} is \(d\)-approximation algorithm.
\end{theorem}
\begin{proof}
By comparing \(w(\mathcal{S} ^\prime )\) and \(\OPT_\text{LP} = \sum_{i=1}^{m} x_{i} w_{i}\), we see that
\[
\OPT \leq \sum_{S_i\in \mathcal{S} ^\prime }w_i \leq d \sum_{S_i\in \mathcal{S} ^\prime }w_{i} x_{i} \leq d\cdot \OPT_\text{LP} \leq d\cdot \OPT,
\]
which implies \(\OPT / d\leq \OPT_\text{LP} \leq \OPT\).
\begin{note}
Note that \(\OPT\) is assumed to be \(\OPT_{\text{IP}}\), i.e., the optimum of the original IP formulation of \autoref{prb:set-cover}.
\end{note}
\end{proof}
\begin{definition}[Intgrality gap]\label{def:integrality-gap}
Given an integer programming, the \emph{integrality gap} between \(\OPT\) and \(\OPT_{\LP} \) of its LP relaxation is defined as
\[
\sup _{\text{input } I}\frac{\OPT(I)}{\OPT_{\LP }(I)}.
\]
\end{definition}
\begin{remark}
We see that the \hyperref[def:integrality-gap]{integrality gap} of \autoref{algo:set-cover-LP-rounding} is \(d\) from \autoref{thm:lec2-1}.
\end{remark}
\subsection{Randomized Linear Programming Rounding}
And indeed, we can use a more natural way to do the rounding, i.e., respect to the \(x_i\) value.
\begin{intuition}
If \(x_i\) is close to \(1\), it's reasonable to include it, vice versa.
\end{intuition}
We see that algorithm first.
\par
\begin{algorithm}[H]\label{algo:set-cover-randomized-LP-rounding}
\DontPrintSemicolon
\caption{\hyperref[prb:set-cover]{Set cover} -- Randomized LP Rounding}
\KwData{A \hyperref[def:set-system]{set system} \((U, \mathcal{S})\)}
\KwResult{A (possible) \hyperref[def:covering]{covering} \(\mathcal{S} ^\prime \)}
\BlankLine
\(x\gets\solve(\text{LP})\)\Comment*[r]{Solve the relaxed (LP)}
\(\mathcal{S} \gets \varnothing \)\;
\For(){\(i= 1, \dots , m\)}{
add \(S_i\) to \(\mathcal{S} ^\prime \) w.p.\ \(x_i\)\Comment*[r]{independently}
}
\Return{\(\mathcal{S} ^\prime \)}\;
\end{algorithm}
Now, the question is, how is this \(\mathcal{S} ^\prime \)'s quality? In other words, fix \(e\in U\), what's \(\Pr(\text{\(e\) is covered})\)?
\begin{lemma}
\(\Pr(\text{\(e\) is covered})\geq 1 - 1/e\approx 0.63\).
\end{lemma}
\begin{proof}
We bound \(\Pr(\overline{\text{\(e\) is covered}})\) instead. Say \(S_1, \dots , S_d\) are the sets containing \(e\), then we see that
\[
\Pr(\overline{\text{\(e\) is covered}}) = \prod\limits_{i=1}^{d} (1 - x_i)\leq \prod\limits_{i=1}^{d} e^{- x_i} = e^{- (\overbrace{x_1 + \dots x_d}^{\geq 1})} \leq e^{-1}.
\]
\begin{note}
For every \(x\), we have \(1 + x \leq e^x\), and this approximation is close when \(\left\vert x \right\vert \) is small.
\end{note}
\end{proof}
A standard way to boost the correctness of a randomized algorithm is to run it multiple time, which leads to the following.
\par
\begin{algorithm}[H]\label{algo:set-cover-randomized-LP-rounding-multi-time}
\DontPrintSemicolon
\caption{\hyperref[prb:set-cover]{Set cover} -- Multi-time Randomized LP Rounding}
\KwData{A \hyperref[def:set-system]{set system} \((U, \mathcal{S})\), \(\alpha \)}
\KwResult{A (possible) \hyperref[def:covering]{covering} \(\mathcal{S} ^\prime \)}
\BlankLine
\(x\gets\solve(\text{LP})\)\Comment*[r]{Solve the relaxed (LP)}
\(\mathcal{S} \gets \varnothing \)\;
\For(\Comment*[f]{independently}){\(t = 1, \dots , \alpha \)}{
\For(){\(i= 1, \dots , m\)}{
add \(S_i\) to \(\mathcal{S} ^\prime \) w.p.\ \(x_i\)\Comment*[r]{independently}
}
}
\Return{\(\mathcal{S} ^\prime \)}\;
\end{algorithm}
\begin{lemma}\label{lma:lec2-2}
With \(\alpha = 2\ln n\), \(\mathcal{S} ^\prime\) returned from \autoref{algo:set-cover-randomized-LP-rounding-multi-time} is a \hyperref[def:covering]{covering} w.p.\ at least \(1 - \frac{1}{n}\).
\end{lemma}
\begin{proof}
We have \(\Pr(\text{\(e\) is not covered}) \leq e ^{- \alpha }\) from independence of each run. Let \(\alpha = 2\ln n\), then \(\Pr(\text{\(e\) is not covered}) \leq e ^{- \alpha } = 1 / n^{2}\). By union bound,
\[
\Pr(\text{some elements is not covered} ) \leq \sum_{e\in U}\Pr(\text{\(e\) not covered} )\leq n\cdot \frac{1}{n^{2} } = \frac{1}{n}.
\]
This implies w.p.\ at least \(1 - 1/n\), \(\mathcal{S} ^\prime \) is a \hyperref[def:covering]{covering}.
\end{proof}
In other words, with \(\alpha = 2 \ln n\), \autoref{algo:set-cover-randomized-LP-rounding-multi-time} is correct with probability at least \(1 - 1 / n\).
\begin{lemma}\label{lma:lec2-3}
With \(\alpha = 2 \ln n\), \(\mathcal{S} ^\prime \) returned from \autoref{algo:set-cover-randomized-LP-rounding-multi-time} has an approximation ratio \(4 \ln n\) w.p.\ at least \(\frac{1}{2}\).\footnote{Note that \(\mathcal{S} ^\prime \) is not necessary a \hyperref[def:covering]{covering}.}
\end{lemma}
\begin{proof}
Since \(\mathbb{E} [w(\mathcal{S} ^\prime)] \leq \alpha \sum_{i}w_{i} x_{i} = \alpha \OPT_{\text{LP} }\), we have \(\Pr(w(\mathcal{S} ^\prime )\geq 2\cdot \alpha \OPT_{\text{LP} })\leq 1 / 2\) from Markov inequality. We see that w.p.\ \(\geq 1 / 2\), \(w(\mathcal{S} ^\prime )\leq 2\cdot 2\ln n\cdot \OPT_{\text{LP} } \leq 4 \ln n\OPT\).
\end{proof}
\begin{theorem}\label{thm:lec2-2}
By running \autoref{algo:set-cover-randomized-LP-rounding-multi-time} many times, we get a \((4\ln n)\)-approximation algorithm with high probability.\footnote{Note that we still need to choose \(\mathcal{S}^\prime \).}
\end{theorem}
\begin{proof}
Together with \autoref{lma:lec2-2} and \autoref{lma:lec2-3} and using the union bound, the probability of \(\mathcal{S} ^\prime \) not being a \hyperref[def:covering]{covering} or with weight higher than \(4\ln n\OPT\) is at most \(\frac{1}{n} + \frac{1}{2}\), which is less than \(1\). Hence, by running \autoref{algo:set-cover-randomized-LP-rounding-multi-time} many times (independently), the failing possibility is exponential small.
\end{proof}
\begin{note}
With \autoref{thm:lec2-2}, we still need to find a valid \hyperref[def:covering]{covering} with the lowest cost, where a valid \hyperref[def:covering]{covering} with low enough weight is guaranteed to exist with high probability. Note that this is still a polynomial time algorithm since we know that checking \(\mathcal{S} ^\prime \) is a \hyperref[def:covering]{covering} is just linear.
\end{note}
\begin{remark}
Indeed, with some smarter algorithm modified from \autoref{algo:set-cover-randomized-LP-rounding-multi-time}, we can get an \(H_k\) approximation ratio.
\end{remark}