88 *
99 *
1010 * IDENTIFICATION
11- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.178 2010/03/28 09:27:01 sriggs Exp $
11+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.179 2010/08/29 19:33:14 tgl Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
@@ -74,9 +74,8 @@ static OffsetNumber _bt_findsplitloc(Relation rel, Page page,
7474static void _bt_checksplitloc (FindSplitData * state ,
7575 OffsetNumber firstoldonright , bool newitemonleft ,
7676 int dataitemstoleft , Size firstoldonrightsz );
77- static void _bt_pgaddtup (Relation rel , Page page ,
78- Size itemsize , IndexTuple itup ,
79- OffsetNumber itup_off , const char * where );
77+ static bool _bt_pgaddtup (Page page , Size itemsize , IndexTuple itup ,
78+ OffsetNumber itup_off );
8079static bool _bt_isequal (TupleDesc itupdesc , Page page , OffsetNumber offnum ,
8180 int keysz , ScanKey scankey );
8281static void _bt_vacuum_one_page (Relation rel , Buffer buffer , Relation heapRel );
@@ -753,7 +752,9 @@ _bt_insertonpg(Relation rel,
753752 /* Do the update. No ereport(ERROR) until changes are logged */
754753 START_CRIT_SECTION ();
755754
756- _bt_pgaddtup (rel , page , itemsz , itup , newitemoff , "page" );
755+ if (!_bt_pgaddtup (page , itemsz , itup , newitemoff ))
756+ elog (PANIC , "failed to add new item to block %u in index \"%s\"" ,
757+ itup_blkno , RelationGetRelationName (rel ));
757758
758759 MarkBufferDirty (buf );
759760
@@ -879,6 +880,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
879880 Page origpage ;
880881 Page leftpage ,
881882 rightpage ;
883+ BlockNumber origpagenumber ,
884+ rightpagenumber ;
882885 BTPageOpaque ropaque ,
883886 lopaque ,
884887 oopaque ;
@@ -894,11 +897,27 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
894897 OffsetNumber i ;
895898 bool isroot ;
896899
900+ /* Acquire a new page to split into */
897901 rbuf = _bt_getbuf (rel , P_NEW , BT_WRITE );
902+
903+ /*
904+ * origpage is the original page to be split. leftpage is a temporary
905+ * buffer that receives the left-sibling data, which will be copied back
906+ * into origpage on success. rightpage is the new page that receives
907+ * the right-sibling data. If we fail before reaching the critical
908+ * section, origpage hasn't been modified and leftpage is only workspace.
909+ * In principle we shouldn't need to worry about rightpage either,
910+ * because it hasn't been linked into the btree page structure; but to
911+ * avoid leaving possibly-confusing junk behind, we are careful to rewrite
912+ * rightpage as zeroes before throwing any error.
913+ */
898914 origpage = BufferGetPage (buf );
899915 leftpage = PageGetTempPage (origpage );
900916 rightpage = BufferGetPage (rbuf );
901917
918+ origpagenumber = BufferGetBlockNumber (buf );
919+ rightpagenumber = BufferGetBlockNumber (rbuf );
920+
902921 _bt_pageinit (leftpage , BufferGetPageSize (buf ));
903922 /* rightpage was already initialized by _bt_getbuf */
904923
@@ -923,8 +942,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
923942 lopaque -> btpo_flags &= ~(BTP_ROOT | BTP_SPLIT_END | BTP_HAS_GARBAGE );
924943 ropaque -> btpo_flags = lopaque -> btpo_flags ;
925944 lopaque -> btpo_prev = oopaque -> btpo_prev ;
926- lopaque -> btpo_next = BufferGetBlockNumber ( rbuf ) ;
927- ropaque -> btpo_prev = BufferGetBlockNumber ( buf ) ;
945+ lopaque -> btpo_next = rightpagenumber ;
946+ ropaque -> btpo_prev = origpagenumber ;
928947 ropaque -> btpo_next = oopaque -> btpo_next ;
929948 lopaque -> btpo .level = ropaque -> btpo .level = oopaque -> btpo .level ;
930949 /* Since we already have write-lock on both pages, ok to read cycleid */
@@ -947,9 +966,12 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
947966 item = (IndexTuple ) PageGetItem (origpage , itemid );
948967 if (PageAddItem (rightpage , (Item ) item , itemsz , rightoff ,
949968 false, false) == InvalidOffsetNumber )
950- elog (PANIC , "failed to add hikey to the right sibling"
969+ {
970+ memset (rightpage , 0 , BufferGetPageSize (rbuf ));
971+ elog (ERROR , "failed to add hikey to the right sibling"
951972 " while splitting block %u of index \"%s\"" ,
952- BufferGetBlockNumber (buf ), RelationGetRelationName (rel ));
973+ origpagenumber , RelationGetRelationName (rel ));
974+ }
953975 rightoff = OffsetNumberNext (rightoff );
954976 }
955977
@@ -974,9 +996,12 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
974996 }
975997 if (PageAddItem (leftpage , (Item ) item , itemsz , leftoff ,
976998 false, false) == InvalidOffsetNumber )
977- elog (PANIC , "failed to add hikey to the left sibling"
999+ {
1000+ memset (rightpage , 0 , BufferGetPageSize (rbuf ));
1001+ elog (ERROR , "failed to add hikey to the left sibling"
9781002 " while splitting block %u of index \"%s\"" ,
979- BufferGetBlockNumber (buf ), RelationGetRelationName (rel ));
1003+ origpagenumber , RelationGetRelationName (rel ));
1004+ }
9801005 leftoff = OffsetNumberNext (leftoff );
9811006
9821007 /*
@@ -998,29 +1023,49 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
9981023 {
9991024 if (newitemonleft )
10001025 {
1001- _bt_pgaddtup (rel , leftpage , newitemsz , newitem , leftoff ,
1002- "left sibling" );
1026+ if (!_bt_pgaddtup (leftpage , newitemsz , newitem , leftoff ))
1027+ {
1028+ memset (rightpage , 0 , BufferGetPageSize (rbuf ));
1029+ elog (ERROR , "failed to add new item to the left sibling"
1030+ " while splitting block %u of index \"%s\"" ,
1031+ origpagenumber , RelationGetRelationName (rel ));
1032+ }
10031033 leftoff = OffsetNumberNext (leftoff );
10041034 }
10051035 else
10061036 {
1007- _bt_pgaddtup (rel , rightpage , newitemsz , newitem , rightoff ,
1008- "right sibling" );
1037+ if (!_bt_pgaddtup (rightpage , newitemsz , newitem , rightoff ))
1038+ {
1039+ memset (rightpage , 0 , BufferGetPageSize (rbuf ));
1040+ elog (ERROR , "failed to add new item to the right sibling"
1041+ " while splitting block %u of index \"%s\"" ,
1042+ origpagenumber , RelationGetRelationName (rel ));
1043+ }
10091044 rightoff = OffsetNumberNext (rightoff );
10101045 }
10111046 }
10121047
10131048 /* decide which page to put it on */
10141049 if (i < firstright )
10151050 {
1016- _bt_pgaddtup (rel , leftpage , itemsz , item , leftoff ,
1017- "left sibling" );
1051+ if (!_bt_pgaddtup (leftpage , itemsz , item , leftoff ))
1052+ {
1053+ memset (rightpage , 0 , BufferGetPageSize (rbuf ));
1054+ elog (ERROR , "failed to add old item to the left sibling"
1055+ " while splitting block %u of index \"%s\"" ,
1056+ origpagenumber , RelationGetRelationName (rel ));
1057+ }
10181058 leftoff = OffsetNumberNext (leftoff );
10191059 }
10201060 else
10211061 {
1022- _bt_pgaddtup (rel , rightpage , itemsz , item , rightoff ,
1023- "right sibling" );
1062+ if (!_bt_pgaddtup (rightpage , itemsz , item , rightoff ))
1063+ {
1064+ memset (rightpage , 0 , BufferGetPageSize (rbuf ));
1065+ elog (ERROR , "failed to add old item to the right sibling"
1066+ " while splitting block %u of index \"%s\"" ,
1067+ origpagenumber , RelationGetRelationName (rel ));
1068+ }
10241069 rightoff = OffsetNumberNext (rightoff );
10251070 }
10261071 }
@@ -1034,8 +1079,13 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
10341079 * not be splitting the page).
10351080 */
10361081 Assert (!newitemonleft );
1037- _bt_pgaddtup (rel , rightpage , newitemsz , newitem , rightoff ,
1038- "right sibling" );
1082+ if (!_bt_pgaddtup (rightpage , newitemsz , newitem , rightoff ))
1083+ {
1084+ memset (rightpage , 0 , BufferGetPageSize (rbuf ));
1085+ elog (ERROR , "failed to add new item to the right sibling"
1086+ " while splitting block %u of index \"%s\"" ,
1087+ origpagenumber , RelationGetRelationName (rel ));
1088+ }
10391089 rightoff = OffsetNumberNext (rightoff );
10401090 }
10411091
@@ -1047,16 +1097,19 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
10471097 * neighbors.
10481098 */
10491099
1050- if (!P_RIGHTMOST (ropaque ))
1100+ if (!P_RIGHTMOST (oopaque ))
10511101 {
1052- sbuf = _bt_getbuf (rel , ropaque -> btpo_next , BT_WRITE );
1102+ sbuf = _bt_getbuf (rel , oopaque -> btpo_next , BT_WRITE );
10531103 spage = BufferGetPage (sbuf );
10541104 sopaque = (BTPageOpaque ) PageGetSpecialPointer (spage );
1055- if (sopaque -> btpo_prev != ropaque -> btpo_prev )
1056- elog (PANIC , "right sibling's left-link doesn't match: "
1057- "block %u links to %u instead of expected %u in index \"%s\"" ,
1058- ropaque -> btpo_next , sopaque -> btpo_prev , ropaque -> btpo_prev ,
1105+ if (sopaque -> btpo_prev != origpagenumber )
1106+ {
1107+ memset (rightpage , 0 , BufferGetPageSize (rbuf ));
1108+ elog (ERROR , "right sibling's left-link doesn't match: "
1109+ "block %u links to %u instead of expected %u in index \"%s\"" ,
1110+ oopaque -> btpo_next , sopaque -> btpo_prev , origpagenumber ,
10591111 RelationGetRelationName (rel ));
1112+ }
10601113
10611114 /*
10621115 * Check to see if we can set the SPLIT_END flag in the right-hand
@@ -1081,8 +1134,7 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
10811134 *
10821135 * NO EREPORT(ERROR) till right sibling is updated. We can get away with
10831136 * not starting the critical section till here because we haven't been
1084- * scribbling on the original page yet, and we don't care about the new
1085- * sibling until it's linked into the btree.
1137+ * scribbling on the original page yet; see comments above.
10861138 */
10871139 START_CRIT_SECTION ();
10881140
@@ -1094,19 +1146,21 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
10941146 * (in the page management code) that the center of a page always be
10951147 * clean, and the most efficient way to guarantee this is just to compact
10961148 * the data by reinserting it into a new left page. (XXX the latter
1097- * comment is probably obsolete.)
1149+ * comment is probably obsolete; but in any case it's good to not scribble
1150+ * on the original page until we enter the critical section.)
10981151 *
10991152 * We need to do this before writing the WAL record, so that XLogInsert
11001153 * can WAL log an image of the page if necessary.
11011154 */
11021155 PageRestoreTempPage (leftpage , origpage );
1156+ /* leftpage, lopaque must not be used below here */
11031157
11041158 MarkBufferDirty (buf );
11051159 MarkBufferDirty (rbuf );
11061160
11071161 if (!P_RIGHTMOST (ropaque ))
11081162 {
1109- sopaque -> btpo_prev = BufferGetBlockNumber ( rbuf ) ;
1163+ sopaque -> btpo_prev = rightpagenumber ;
11101164 MarkBufferDirty (sbuf );
11111165 }
11121166
@@ -1120,8 +1174,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
11201174 XLogRecData * lastrdata ;
11211175
11221176 xlrec .node = rel -> rd_node ;
1123- xlrec .leftsib = BufferGetBlockNumber ( buf ) ;
1124- xlrec .rightsib = BufferGetBlockNumber ( rbuf ) ;
1177+ xlrec .leftsib = origpagenumber ;
1178+ xlrec .rightsib = rightpagenumber ;
11251179 xlrec .rnext = ropaque -> btpo_next ;
11261180 xlrec .level = ropaque -> btpo .level ;
11271181 xlrec .firstright = firstright ;
@@ -1920,13 +1974,11 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
19201974 * we insert the tuples in order, so that the given itup_off does
19211975 * represent the final position of the tuple!
19221976 */
1923- static void
1924- _bt_pgaddtup (Relation rel ,
1925- Page page ,
1977+ static bool
1978+ _bt_pgaddtup (Page page ,
19261979 Size itemsize ,
19271980 IndexTuple itup ,
1928- OffsetNumber itup_off ,
1929- const char * where )
1981+ OffsetNumber itup_off )
19301982{
19311983 BTPageOpaque opaque = (BTPageOpaque ) PageGetSpecialPointer (page );
19321984 IndexTupleData trunctuple ;
@@ -1941,8 +1993,9 @@ _bt_pgaddtup(Relation rel,
19411993
19421994 if (PageAddItem (page , (Item ) itup , itemsize , itup_off ,
19431995 false, false) == InvalidOffsetNumber )
1944- elog (PANIC , "failed to add item to the %s in index \"%s\"" ,
1945- where , RelationGetRelationName (rel ));
1996+ return false;
1997+
1998+ return true;
19461999}
19472000
19482001/*
0 commit comments