-
Notifications
You must be signed in to change notification settings - Fork 0
/
celery_rabbitmq.tex
234 lines (201 loc) · 6.41 KB
/
celery_rabbitmq.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
\documentclass{beamer}
\mode<presentation> {
% The Beamer class comes with a number of default slide themes
% which change the colors and layouts of slides. Below this is a list
% of all the themes, uncomment each in turn to see what they look like.
%\usetheme{default}
%\usetheme{AnnArbor}
%\usetheme{Antibes}
%\usetheme{Bergen}
%\usetheme{Berkeley}
%\usetheme{Berlin}
%\usetheme{Boadilla}
%\usetheme{CambridgeUS}
%\usetheme{Copenhagen}
%\usetheme{Darmstadt}
%\usetheme{Dresden}
%\usetheme{Frankfurt}
%\usetheme{Goettingen}
%\usetheme{Hannover}
%\usetheme{Ilmenau}
%\usetheme{JuanLesPins}
%\usetheme{Luebeck}
\usetheme{Madrid}
%\usetheme{Malmoe}
%\usetheme{Marburg}
%\usetheme{Montpellier}
%\usetheme{PaloAlto}
%\usetheme{Pittsburgh}
%\usetheme{Rochester}
%\usetheme{Singapore}
%\usetheme{Szeged}
%\usetheme{Warsaw}
% As well as themes, the Beamer class has a number of color themes
% for any slide theme. Uncomment each of these in turn to see how it
% changes the colors of your current slide theme.
%\usecolortheme{albatross}
%\usecolortheme{beaver}
%\usecolortheme{beetle}
%\usecolortheme{crane}
%\usecolortheme{dolphin}
%\usecolortheme{dove}
%\usecolortheme{fly}
%\usecolortheme{lily}
%\usecolortheme{orchid}
%\usecolortheme{rose}
%\usecolortheme{seagull}
%\usecolortheme{seahorse}
%\usecolortheme{whale}
%\usecolortheme{wolverine}
}
\usepackage{graphicx} % Allows including images
\usepackage{booktabs} % Allows the use of \toprule, \midrule and \bottomrule in tables
\usepackage{listings}
\usepackage{hyperref}
%----------------------------------------------------------------------------------------
% TITLE PAGE
%----------------------------------------------------------------------------------------
\title[DDB with Celery and RabbitMQ]{Performance Tuning and Analysis of Celery and RabbitMQ}
\author{Robin}
\date{\today}
\begin{document}
\begin{frame}
\titlepage
\end{frame}
\begin{frame}
\frametitle{Agenda}
\tableofcontents
\end{frame}
\section{Test Environment}
\begin{frame}
\frametitle{DDB Environment}
\begin{itemize}
\item EC2
\begin{itemize}
\item 4 machines
\item Each instance has 6TB EBS disk
\item c3.4xlarge, 16 vCPU, 30GB Memory
\item we test the DDB with 4 Nodes
\end{itemize}
\item Production
\begin{itemize}
\item 4 machines
\item Each instance has 4TB SSD disk
\item 32 CPU and 64GB memory
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Celery and RabbitMQ}
\begin{itemize}
\item Three Liquid Web machines intnodeb, intnodec, and intmaster are used to support Celery and RabbitMQ
\begin{itemize}
\item the three machines have same hardware
\item 1.60TB disk, 30GB memory, 8 cores
\end{itemize}
\item intmaster host RabbitMQ and test Client
\item 100 celery workers are launched in both intnodeb and intnodec
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Test Cases}
\begin{itemize}
\item We construct 669 identical test cases for AH, PH, ACB, PCB, ATC, PTC and UATC. We triple the test cases (2007)
\begin{itemize}
\item Randomize: shuffle all the test cases, and equal divide them to n processes. For each process, we shuffle the assigned test cases, and then run
\item n is in (1, 2, 4, 8, 16, 32, 48)
\item we run the test in intmaster by \textbf{"for n in 8 1 2 4 8 16 32 48; do time python rabbitmq\_mess.py \$n; done | \& tee log"}
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{How do features are parallelized?}
\textbf{Divide and Conquer}: each request is partitioned into small computation units by \textit{one} input parameters, then crate a celery task for each unit, finally result of units are combined and returned to client.
\begin{itemize}
\item AH/PH: date range is partitioned into three parts if the days of the range is greater than 30 days or not in same month.
\item ACB/PCB: 155 countries are partitioned into 10 chunks.
\item ATC/PTC/UATC: partitioned by feed id, one feed one task.
\end{itemize}
\textbf{Pivot and Granularity of Parallelism}
\begin{itemize}
\item Pivot: parallelism pivot highlighted by nature computation structure of each features.
\item Granularity: some is from problem structure, more are based on experiments.
\begin{itemize}
\item the best granularity of {app, pub} country breakdown is 8 or 16.
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}
\begin{figure}
\includegraphics[width=\linewidth]{images/union_granularity.png}
\end{figure}
\end{frame}
\section{Performance Tuning}
\begin{frame}
\frametitle{Change configuration of Celery}
\begin{itemize}
\item librabbitmq
\item each individual task type has it's own rabbitmq queue
\item all queues are transient
\end{itemize}
\end{frame}
\section{Performance Analyze}
\begin{frame}
\begin{figure}
\includegraphics[width=\linewidth]{images/ACB.png}
\end{figure}
\end{frame}
\begin{frame}
\begin{figure}
\includegraphics[width=\linewidth]{images/PCB.png}
\end{figure}
\end{frame}
\begin{frame}
\begin{figure}
\includegraphics[width=\linewidth]{images/AH.png}
\end{figure}
\end{frame}
\begin{frame}
\begin{figure}
\includegraphics[width=\linewidth]{images/PH.png}
\end{figure}
\end{frame}
\begin{frame}
\begin{figure}
\includegraphics[width=\linewidth]{images/ATC.png}
\end{figure}
\end{frame}
\begin{frame}
\begin{figure}
\includegraphics[width=\linewidth]{images/PTC.png}
\end{figure}
\end{frame}
\begin{frame}
\begin{figure}
\includegraphics[width=\linewidth]{images/UATC.png}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{Summary}
\begin{itemize}
\item ACB/PCB: comparing with production and serial in ec2, celery speedup performance very much, also celery/rabbitmq is more stable.
\item AH/PH: celery/rabbitmq is more stable, specially for big data range, performance hierarchies disappeared in celery's graphics.
\item ATC/PTC/UATC: celery has better performance and also more stabler than production and serial one.
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Is it necessary parallelize AH/PU?}
\begin{itemize}
\item although serial AH/PH has good performance for small date range, parallelized AH/PU is more stabler than serial one.
\item it's more manageable if we parallelize AH/PH, that all feature will be follow same architecture and computing flow.
\end{itemize}
\begin{figure}
\includegraphics[width=10cm,height=6cm]{images/ah_ph_parallel_vs_serial.png}
\end{figure}
\end{frame}
%------------------------------------------------
\begin{frame}
\Huge{\centerline{The End}}
\end{frame}
%----------------------------------------------------------------------------------------
\end{document}