Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Debug cache write issues #1572

Draft
wants to merge 5 commits into
base: 7.1.x-netlify
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 90 additions & 14 deletions iocore/cache/CacheRead.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,15 @@
#include "HttpCacheSM.h" //Added to get the scope of HttpCacheSM object.
#endif

#define DebugCache(fmt, ...) DebugSpecific(cont->cont_debug_on, "cache_read", "[%" PRId64 "] " fmt, cont->cont_sm_id, __VA_ARGS__)
#define DebugCacheVC(fmt, ...) DebugSpecific(_action.continuation->cont_debug_on, "cache_read", "[%" PRId64 "] " fmt, _action.continuation->cont_sm_id, __VA_ARGS__)

extern int cache_config_compatibility_4_2_0_fixup;

Action *
Cache::open_read(Continuation *cont, const CacheKey *key, CacheFragType type, const char *hostname, int host_len)
{
DebugCache("%s", "Cache::open_read");
if (!CacheProcessor::IsCacheReady(type)) {
cont->handleEvent(CACHE_EVENT_OPEN_READ_FAILED, (void *)-ECACHE_NOT_READY);
return ACTION_RESULT_DONE;
Expand Down Expand Up @@ -95,6 +99,7 @@ Action *
Cache::open_read(Continuation *cont, const CacheKey *key, CacheHTTPHdr *request, CacheLookupHttpConfig *params, CacheFragType type,
const char *hostname, int host_len)
{
DebugCache("%s", "Cache::open_read (with params)");
if (!CacheProcessor::IsCacheReady(type)) {
cont->handleEvent(CACHE_EVENT_OPEN_READ_FAILED, (void *)-ECACHE_NOT_READY);
return ACTION_RESULT_DONE;
Expand Down Expand Up @@ -164,6 +169,8 @@ Cache::open_read(Continuation *cont, const CacheKey *key, CacheHTTPHdr *request,
uint32_t
CacheVC::load_http_info(CacheHTTPInfoVector *info, Doc *doc, RefCountObj *block_ptr)
{
DebugCacheVC("%s", "Cache::load_http_info");

uint32_t zret = info->get_handles(doc->hdr(), doc->hlen, block_ptr);
if (cache_config_compatibility_4_2_0_fixup && // manual override not engaged
!this->f.doc_from_ram_cache && // it's already been done for ram cache fragments
Expand All @@ -179,6 +186,7 @@ CacheVC::load_http_info(CacheHTTPInfoVector *info, Doc *doc, RefCountObj *block_
int
CacheVC::openReadFromWriterFailure(int event, Event *e)
{
DebugCacheVC("%s", "CacheVC::openReadFromWriterFailure");
od = nullptr;
vector.clear(false);
CACHE_INCREMENT_DYN_STAT(cache_read_failure_stat);
Expand All @@ -191,15 +199,19 @@ CacheVC::openReadFromWriterFailure(int event, Event *e)
int
CacheVC::openReadChooseWriter(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */)
{
DebugCacheVC("%s", "CacheVC::openReadChooseWriter");
intptr_t err = ECACHE_DOC_BUSY;
CacheVC *w = nullptr;

ink_assert(vol->mutex->thread_holding == mutex->thread_holding && write_vc == nullptr);

if (!od)
if (!od) {
DebugCacheVC("%s", "CacheVC::openReadChooseWriter, no OpenDirEntry");
return EVENT_RETURN;
}

if (frag_type != CACHE_FRAG_TYPE_HTTP) {
DebugCacheVC("%s", "CacheVC::openReadChooseWriter, not an HTTP fragment");
ink_assert(od->num_writers == 1);
w = od->writers.head;
if (w->start_time > start_time || w->closed < 0) {
Expand All @@ -212,22 +224,35 @@ CacheVC::openReadChooseWriter(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSE
}
#ifdef HTTP_CACHE
else {
DebugCacheVC("%s", "CacheVC::openReadChooseWriter, dealing with an HTTP fragment");
write_vector = &od->vector;
int write_vec_cnt = write_vector->count();
for (int c = 0; c < write_vec_cnt; c++)
DebugCacheVC("CacheVC::openReadChooseWriter, write_vec_cnt = %d", write_vec_cnt);
for (int c = 0; c < write_vec_cnt; c++) {
vector.insert(write_vector->get(c));
}
// check if all the writers who came before this reader have
// set the http_info.
for (w = (CacheVC *)od->writers.head; w; w = (CacheVC *)w->opendir_link.next) {
if (w->start_time > start_time || w->closed < 0)
DebugCacheVC("%s checking writer.., w->start_time = %d, start_time = %d, w->closed = %d", "CacheVC::openReadChooseWriter",
w->start_time, start_time, w->closed);
if (w->start_time > start_time || w->closed < 0) {
continue;
}

DebugCacheVC("%s checking writer.., cache_config_read_while_writer = %d", "CacheVC::openReadChooseWriter",
cache_config_read_while_writer);
if (!w->closed && !cache_config_read_while_writer) {
return -err;
}
if (w->alternate_index != CACHE_ALT_INDEX_DEFAULT)

if (w->alternate_index != CACHE_ALT_INDEX_DEFAULT) {
DebugCacheVC("%s alternate index isn't default", "CacheVC::openReadChooseWriter");
continue;
}

if (!w->closed && !w->alternate.valid()) {
DebugCacheVC("%s w isn't closed and w->alternate is not valid??", "CacheVC::openReadChooseWriter");
od = nullptr;
ink_assert(!write_vc);
vector.clear(false);
Expand All @@ -236,6 +261,8 @@ CacheVC::openReadChooseWriter(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSE
// construct the vector from the writers.
int alt_ndx = CACHE_ALT_INDEX_DEFAULT;
if (w->f.update) {
DebugCacheVC("%s constructing vector from the writers", "CacheVC::openReadChooseWriter");

// all Update cases. Need to get the alternate index.
alt_ndx = get_alternate_index(&vector, w->update_key);
// if its an alternate delete
Expand All @@ -246,29 +273,43 @@ CacheVC::openReadChooseWriter(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSE
}
}

if (w->alternate.valid())
if (w->alternate.valid()) {
DebugCacheVC("%s constructing vector from the writers", "CacheVC::openReadChooseWriter");

vector.insert(&w->alternate, alt_ndx);
}
}

if (!vector.count()) {
if (od->reading_vec) {
// the writer(s) are reading the vector, so there is probably
// an old vector. Since this reader came before any of the
// current writers, we should return the old data

DebugCacheVC("%s writers are reading the vector, return old data", "CacheVC::openReadChooseWriter");
od = nullptr;
return EVENT_RETURN;
}

DebugCacheVC("%s no doc! (vector count is 0)", "CacheVC::openReadChooseWriter");
return -ECACHE_NO_DOC;
}
if (cache_config_select_alternate) {
DebugCacheVC("%s selecting from alternates..", "CacheVC::openReadChooseWriter");
alternate_index = HttpTransactCache::SelectFromAlternates(&vector, &request, params);
if (alternate_index < 0)
if (alternate_index < 0) {
DebugCacheVC("%s cache miss!", "CacheVC::openReadChooseWriter");
return -ECACHE_ALT_MISS;
} else
}
} else {
alternate_index = 0;
}
CacheHTTPInfo *obj = vector.get(alternate_index);
DebugCacheVC("%s got HTTP cache info for alternate index %d", "CacheVC::openReadChooseWriter", alternate_index);

for (w = (CacheVC *)od->writers.head; w; w = (CacheVC *)w->opendir_link.next) {
if (obj->m_alt == w->alternate.m_alt) {
DebugCacheVC("%s found write_vc", "CacheVC::openReadChooseWriter");
write_vc = w;
break;
}
Expand All @@ -290,6 +331,7 @@ CacheVC::openReadChooseWriter(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSE
int
CacheVC::openReadFromWriter(int event, Event *e)
{
DebugCacheVC("%s", "CacheVC::openReadFromWriter");
if (!f.read_from_writer_called) {
// The assignment to last_collision as nullptr was
// made conditional after INKqa08411
Expand Down Expand Up @@ -323,18 +365,27 @@ CacheVC::openReadFromWriter(int event, Event *e)
if (!write_vc) {
int ret = openReadChooseWriter(event, e);
if (ret < 0) {
DebugCacheVC("%s got error code, releasing lock", "openReadFromWriter");
MUTEX_RELEASE(lock);
DebugCacheVC("%s setting handler to openReadFromWriterFailure", "openReadFromWriter");
SET_HANDLER(&CacheVC::openReadFromWriterFailure);
DebugCacheVC("%s calling openReadFromWriterFailure with CACHE_EVENT_OPEN_READ_FAILED", "openReadFromWriter");
return openReadFromWriterFailure(CACHE_EVENT_OPEN_READ_FAILED, reinterpret_cast<Event *>(ret));
} else if (ret == EVENT_RETURN) {
DebugCacheVC("%s got EVENT_RETURN, releasing lock", "openReadFromWriter");
MUTEX_RELEASE(lock);
DebugCacheVC("%s setting handler to openReadStartHead", "openReadFromWriter");
SET_HANDLER(&CacheVC::openReadStartHead);
return openReadStartHead(event, e);
} else if (ret == EVENT_CONT) {
DebugCacheVC("%s got EVENT_CONT. writer_lock_retry = %d, max_retries = %d", "openReadFromWriter",
writer_lock_retry, cache_config_read_while_writer_max_retries);
ink_assert(!write_vc);
if (writer_lock_retry < cache_config_read_while_writer_max_retries) {
DebugCacheVC("%s scheduling retry, current try is %d / %d", "openReadFromWriter", writer_lock_retry, cache_config_read_while_writer_max_retries);
VC_SCHED_WRITER_RETRY();
} else {
DebugCacheVC("%s returning failure", "openReadFromWriter");
return openReadFromWriterFailure(CACHE_EVENT_OPEN_READ_FAILED, (Event *)-err);
}
} else
Expand Down Expand Up @@ -469,6 +520,7 @@ CacheVC::openReadFromWriter(int event, Event *e)
int
CacheVC::openReadFromWriterMain(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */)
{
DebugCacheVC("%s", "CacheVC::openReadFromWriterMain");
cancel_trigger();
if (seek_to) {
vio.ndone = seek_to;
Expand Down Expand Up @@ -513,6 +565,7 @@ CacheVC::openReadFromWriterMain(int /* event ATS_UNUSED */, Event * /* e ATS_UNU
int
CacheVC::openReadClose(int event, Event * /* e ATS_UNUSED */)
{
DebugCacheVC("%s", "CacheVC::openReadClose");
cancel_trigger();
if (is_io_in_progress()) {
if (event != AIO_EVENT_DONE)
Expand All @@ -537,6 +590,7 @@ CacheVC::openReadClose(int event, Event * /* e ATS_UNUSED */)
int
CacheVC::openReadReadDone(int event, Event *e)
{
DebugCacheVC("%s", "CacheVC::openReadReadDone");
Doc *doc = nullptr;

cancel_trigger();
Expand Down Expand Up @@ -616,6 +670,7 @@ CacheVC::openReadReadDone(int event, Event *e)
int
CacheVC::openReadMain(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */)
{
DebugCacheVC("%s", "CacheVC::openReadMain");
cancel_trigger();
Doc *doc = (Doc *)buf->data();
int64_t ntodo = vio.ntodo();
Expand Down Expand Up @@ -780,6 +835,7 @@ Lread : {
int
CacheVC::openReadStartEarliest(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */)
{
DebugCacheVC("%s", "CacheVC::openReadStartEarliest");
int ret = 0;
Doc *doc = nullptr;
cancel_trigger();
Expand Down Expand Up @@ -921,6 +977,7 @@ CacheVC::openReadStartEarliest(int /* event ATS_UNUSED */, Event * /* e ATS_UNUS
int
CacheVC::openReadVecWrite(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */)
{
DebugCacheVC("%s", "CacheVC::openReadVecWrite");
cancel_trigger();
set_io_not_in_progress();
ink_assert(od);
Expand Down Expand Up @@ -974,26 +1031,34 @@ CacheVC::openReadVecWrite(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */
int
CacheVC::openReadStartHead(int event, Event *e)
{
DebugCacheVC("%s", "CacheVC::openReadStartHead starting");
intptr_t err = ECACHE_NO_DOC;
Doc *doc = nullptr;
cancel_trigger();
set_io_not_in_progress();
if (_action.cancelled)
return free_CacheVC(this);
{
DebugCacheVC("%s", "openReadStartHead try lock...");
CACHE_TRY_LOCK(lock, vol->mutex, mutex->thread_holding);
if (!lock.is_locked())
if (!lock.is_locked()) {
DebugCacheVC("%s", "openReadStartHead try lock... failed! scheduling retry...");
VC_SCHED_LOCK_RETRY();
if (!buf)
DebugCacheVC("%s", "openReadStartHead try lock... failed! scheduling retry... done!");
}
if (!buf) {
goto Lread;
if (!io.ok())
}
if (!io.ok()) {
goto Ldone;
}
// an object needs to be outside the aggregation window in order to be
// be evacuated as it is read
if (!dir_agg_valid(vol, &dir)) {
// a directory entry which is nolonger valid may have been overwritten
if (!dir_valid(vol, &dir))
if (!dir_valid(vol, &dir)) {
last_collision = nullptr;
}
goto Lread;
}
doc = (Doc *)buf->data();
Expand All @@ -1015,18 +1080,23 @@ CacheVC::openReadStartHead(int event, Event *e)
last_collision = nullptr;
goto Lread;
}
if (!(doc->first_key == key))
if (!(doc->first_key == key)) {
goto Lread;
if (f.lookup)
}
if (f.lookup) {
goto Lookup;
}
earliest_dir = dir;
#ifdef HTTP_CACHE
CacheHTTPInfo *alternate_tmp;
if (frag_type == CACHE_FRAG_TYPE_HTTP) {
DebugCacheVC("%s", "openReadStartHead dealing with an HTTP fragment");

uint32_t uml;
ink_assert(doc->hlen);
if (!doc->hlen)
if (!doc->hlen) {
goto Ldone;
}
if ((uml = this->load_http_info(&vector, doc)) != doc->hlen) {
if (buf) {
HTTPCacheAlt *alt = reinterpret_cast<HTTPCacheAlt *>(doc->hdr());
Expand Down Expand Up @@ -1119,6 +1189,7 @@ CacheVC::openReadStartHead(int event, Event *e)
goto Lsuccess;

Lread:
DebugCacheVC("%s", "openReadStartHead: in Lread");
// check for collision
// INKqa07684 - Cache::lookup returns CACHE_EVENT_OPEN_READ_FAILED.
// don't want to go through this BS of reading from a writer if
Expand All @@ -1144,6 +1215,7 @@ CacheVC::openReadStartHead(int event, Event *e)
}
}
Ldone:
DebugCacheVC("openReadStartHead: in Ldone, f.lookup is %d", f.lookup);
if (!f.lookup) {
CACHE_INCREMENT_DYN_STAT(cache_read_failure_stat);
_action.continuation->handleEvent(CACHE_EVENT_OPEN_READ_FAILED, (void *)-err);
Expand All @@ -1153,15 +1225,19 @@ CacheVC::openReadStartHead(int event, Event *e)
}
return free_CacheVC(this);
Lcallreturn:
DebugCacheVC("%s", "openReadStartHead: in Lcallreturn");
return handleEvent(AIO_EVENT_DONE, nullptr); // hopefully a tail call
Lsuccess:
DebugCacheVC("%s", "openReadStartHead: in Lsuccess");
SET_HANDLER(&CacheVC::openReadMain);
return callcont(CACHE_EVENT_OPEN_READ);
Lookup:
DebugCacheVC("%s", "openReadStartHead: in Lookup");
CACHE_INCREMENT_DYN_STAT(cache_lookup_success_stat);
_action.continuation->handleEvent(CACHE_EVENT_LOOKUP, nullptr);
return free_CacheVC(this);
Learliest:
DebugCacheVC("%s", "openReadStartHead: in Learliest");
first_buf = buf;
buf = nullptr;
earliest_key = key;
Expand Down
12 changes: 9 additions & 3 deletions iocore/cache/P_CacheInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,15 @@ struct EvacuationBlock;
do { \
ink_assert(!trigger); \
writer_lock_retry++; \
ink_hrtime _t = HRTIME_MSECONDS(cache_read_while_writer_retry_delay); \
if (writer_lock_retry > 2) \
_t = HRTIME_MSECONDS(cache_read_while_writer_retry_delay) * 2; \
int delay; \
ink_hrtime _t; \
if (writer_lock_retry > 2) { \
delay = cache_read_while_writer_retry_delay * 2; \
} else { \
delay = cache_read_while_writer_retry_delay; \
} \
DebugCacheVC("scheduling retry in %d ms", delay); \
_t = HRTIME_MSECONDS(delay); \
trigger = mutex->thread_holding->schedule_in_local(this, _t); \
return EVENT_CONT; \
} while (0)
Expand Down
3 changes: 3 additions & 0 deletions iocore/eventsystem/I_Continuation.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ class Continuation : private force_VFPT_to_top
*/
ContFlags control_flags;

int64_t cont_sm_id;
bool cont_debug_on;

/**
Receives the event code and data for an Event.

Expand Down
Loading