1515#include <unistd.h>
1616
1717static void delete_walfiles (XLogRecPtr oldest_lsn , TimeLineID oldest_tli ,
18- uint32 xlog_seg_size );
18+ uint32 xlog_seg_size );
19+ static void delete_walfiles_internal (XLogRecPtr keep_lsn , timelineInfo * tli ,
20+ uint32 xlog_seg_size , bool dry_run );
1921static void do_retention_internal (parray * backup_list , parray * to_keep_list ,
2022 parray * to_purge_list );
2123static void do_retention_merge (parray * backup_list , parray * to_keep_list ,
2224 parray * to_purge_list );
2325static void do_retention_purge (parray * to_keep_list , parray * to_purge_list );
24- static void do_retention_wal (void );
26+ static void do_retention_wal (bool dry_run );
2527
28+ // TODO: more useful messages for dry run.
2629static bool backup_deleted = false; /* At least one backup was deleted */
2730static bool backup_merged = false; /* At least one merge was enacted */
31+ static bool wal_deleted = false; /* At least one WAL segments was deleted */
2832
2933void
3034do_delete (time_t backup_id )
@@ -33,8 +37,8 @@ do_delete(time_t backup_id)
3337 parray * backup_list ,
3438 * delete_list ;
3539 pgBackup * target_backup = NULL ;
36- XLogRecPtr oldest_lsn = InvalidXLogRecPtr ;
37- TimeLineID oldest_tli = 0 ;
40+ // XLogRecPtr oldest_lsn = InvalidXLogRecPtr;
41+ // TimeLineID oldest_tli = 0;
3842
3943 /* Get complete list of backups */
4044 backup_list = catalog_get_backup_list (instance_name , INVALID_BACKUP_ID );
@@ -86,24 +90,7 @@ do_delete(time_t backup_id)
8690
8791 /* Clean WAL segments */
8892 if (delete_wal )
89- {
90- Assert (target_backup );
91-
92- /* Find oldest LSN, used by backups */
93- for (i = (int ) parray_num (backup_list ) - 1 ; i >= 0 ; i -- )
94- {
95- pgBackup * backup = (pgBackup * ) parray_get (backup_list , (size_t ) i );
96-
97- if (backup -> status == BACKUP_STATUS_OK || backup -> status == BACKUP_STATUS_DONE )
98- {
99- oldest_lsn = backup -> start_lsn ;
100- oldest_tli = backup -> tli ;
101- break ;
102- }
103- }
104-
105- delete_walfiles (oldest_lsn , oldest_tli , instance_config .xlog_seg_size );
106- }
93+ do_retention_wal (false);
10794
10895 /* cleanup */
10996 parray_walk (backup_list , pgBackupFree );
@@ -172,8 +159,8 @@ int do_retention(void)
172159 do_retention_purge (to_keep_list , to_purge_list );
173160
174161 /* TODO: some sort of dry run for delete_wal */
175- if (delete_wal && ! dry_run )
176- do_retention_wal ();
162+ if (delete_wal )
163+ do_retention_wal (dry_run );
177164
178165 /* TODO: consider dry-run flag */
179166
@@ -622,47 +609,44 @@ do_retention_purge(parray *to_keep_list, parray *to_purge_list)
622609 }
623610}
624611
625- /* Purge WAL */
612+ /* Purge WAL
613+ * Iterate over timelines
614+ * Look for closest_backup, if exists, goto next timelime
615+ * if not exists, look for oldest backup on timeline
616+ */
626617static void
627- do_retention_wal (void )
618+ do_retention_wal (bool dry_run )
628619{
629- parray * backup_list = NULL ;
630-
631- XLogRecPtr oldest_lsn = InvalidXLogRecPtr ;
632- TimeLineID oldest_tli = 0 ;
633- bool backup_list_is_empty = false;
620+ parray * tli_list ;
634621 int i ;
635622
636- /* Get list of backups. */
637- backup_list = catalog_get_backup_list (instance_name , INVALID_BACKUP_ID );
623+ tli_list = catalog_get_timelines (& instance_config );
638624
639- if (parray_num (backup_list ) == 0 )
640- backup_list_is_empty = true;
641-
642- /* Save LSN and Timeline to remove unnecessary WAL segments */
643- for (i = (int ) parray_num (backup_list ) - 1 ; i >= 0 ; i -- )
625+ for (i = 0 ; i < parray_num (tli_list ); i ++ )
644626 {
645- pgBackup * backup = (pgBackup * ) parray_get (backup_list , i );
646-
647- /* Get LSN and TLI of the oldest backup with valid start_lsn and tli */
648- if (backup -> tli > 0 && !XLogRecPtrIsInvalid (backup -> start_lsn ))
649- {
650- oldest_tli = backup -> tli ;
651- oldest_lsn = backup -> start_lsn ;
652- break ;
653- }
654- }
627+ timelineInfo * tlinfo = (timelineInfo * ) parray_get (tli_list , i );
655628
656- /* Be paranoid */
657- if (!backup_list_is_empty && XLogRecPtrIsInvalid (oldest_lsn ))
658- elog (ERROR , "Not going to purge WAL because LSN is invalid" );
629+ /* Empty timeline can be safely skipped */
630+ if (tlinfo -> n_xlog_files == 0 &&
631+ parray_num (tlinfo -> xlog_filelist ) == 0 )
632+ continue ;
659633
660- /* Purge WAL files */
661- delete_walfiles (oldest_lsn , oldest_tli , instance_config .xlog_seg_size );
634+ /* If closest backup is exists, then timeline can be safely skipped */
635+ if (tlinfo -> closest_backup )
636+ continue ;
662637
663- /* Cleanup */
664- parray_walk (backup_list , pgBackupFree );
665- parray_free (backup_list );
638+ /*
639+ * Purge all WAL segments before START LSN of oldest backup.
640+ * If there is no backups on timeline, then whole timeline
641+ * can be safely purged.
642+ */
643+ if (tlinfo -> oldest_backup )
644+ delete_walfiles_internal (tlinfo -> oldest_backup -> start_lsn ,
645+ tlinfo , instance_config .xlog_seg_size , dry_run );
646+ else
647+ delete_walfiles_internal (InvalidXLogRecPtr ,
648+ tlinfo , instance_config .xlog_seg_size , dry_run );
649+ }
666650}
667651
668652/*
@@ -728,6 +712,158 @@ delete_backup_files(pgBackup *backup)
728712 return ;
729713}
730714
715+ /* Purge WAL archive.
716+ * If 'keep_lsn' is InvalidXLogRecPtr, then whole timeline can be purged
717+ * If 'keep_lsn' is valid LSN, then every lesser segment can be purged.
718+ * If 'dry_run' is set, then don`t actually delete anything.
719+ *
720+ * Case 1:
721+ * archive is not empty, 'keep_lsn' is valid and we can delete something.
722+ * Case 2:
723+ * archive is not empty, 'keep_lsn' is valid and prevening us from deleting anything.
724+ * Case 3:
725+ * archive is not empty, 'keep_lsn' is invalid, drop everyhing in archive.
726+ * Case 4:
727+ * archive is empty, 'keep_lsn' is valid, assume corruption of WAL archive.
728+ * Case 5:
729+ * archive is empty, 'keep_lsn' is invalid, drop backup history files
730+ * and partial WAL segments in archive.
731+ *
732+ * Q: Maybe we should stop treating partial WAL segments as second-class citizens?
733+ */
734+ static void
735+ delete_walfiles_internal (XLogRecPtr keep_lsn , timelineInfo * tlinfo ,
736+ uint32 xlog_seg_size , bool dry_run )
737+ {
738+ XLogSegNo StartSegNo ; /* First segment to delete */
739+ XLogSegNo EndSegNo = 0 ; /* Oldest segment to keep */
740+ int rc ;
741+ int i ;
742+ int wal_size_logical = 0 ;
743+ int wal_size_actual = 0 ;
744+ char wal_pretty_size [20 ];
745+ bool purge_all = false;
746+
747+ /* Timeline is completely empty */
748+ if (parray_num (tlinfo -> xlog_filelist ) == 0 )
749+ {
750+ elog (INFO , "Timeline %i is empty, nothing to remove" , tlinfo -> tli );
751+ return ;
752+ }
753+
754+ if (XLogRecPtrIsInvalid (keep_lsn ))
755+ {
756+ /* Drop all segments in timeline */
757+ elog (INFO , "All files on timeline %i will be removed" , tlinfo -> tli );
758+ StartSegNo = tlinfo -> begin_segno ;
759+ EndSegNo = tlinfo -> end_segno ;
760+ purge_all = true;
761+ }
762+ else
763+ {
764+ /* Drop all segments between begin_segno and segment with keep_lsn (excluding) */
765+ StartSegNo = tlinfo -> begin_segno ;
766+ GetXLogSegNo (keep_lsn , EndSegNo , xlog_seg_size );
767+ }
768+
769+ if (EndSegNo > 0 && EndSegNo > StartSegNo )
770+ elog (INFO , "WAL segments between %08X%08X and %08X%08X on timeline %i will be removed" ,
771+ (uint32 ) StartSegNo / xlog_seg_size , (uint32 ) StartSegNo % xlog_seg_size ,
772+ (uint32 ) EndSegNo / xlog_seg_size , (uint32 ) EndSegNo % xlog_seg_size ,
773+ tlinfo -> tli );
774+
775+ if (EndSegNo > StartSegNo )
776+ /* typical scenario */
777+ wal_size_logical = (EndSegNo - StartSegNo ) * xlog_seg_size ;
778+ else if (EndSegNo < StartSegNo )
779+ {
780+ /* It is actually possible for EndSegNo to be less than StartSegNo
781+ * in case of :
782+ * 1. WAL archive corruption.
783+ * 2. There is no actual WAL archive to speak of and
784+ * 'keep_lsn' is coming from STREAM backup.
785+ *
786+ * Assume the worst.
787+ */
788+ if (StartSegNo > 0 && EndSegNo > 0 )
789+ elog (WARNING , "On timeline %i first segment %08X%08X is greater than "
790+ "oldest segment to keep %08X%08X. Possible WAL archive corruption." ,
791+ tlinfo -> tli ,
792+ (uint32 ) StartSegNo / xlog_seg_size , (uint32 ) StartSegNo % xlog_seg_size ,
793+ (uint32 ) EndSegNo / xlog_seg_size , (uint32 ) EndSegNo % xlog_seg_size );
794+ }
795+ else if (EndSegNo == StartSegNo && !purge_all )
796+ {
797+ /* 'Nothing to delete' scenario because of 'keep_lsn'
798+ * with possible exception of partial and backup history files.
799+ */
800+ elog (INFO , "Nothing to remove on timeline %i" , tlinfo -> tli );
801+ }
802+
803+ /* Report the logical size to delete */
804+ if (wal_size_logical > 0 )
805+ {
806+ pretty_size (wal_size_logical , wal_pretty_size , lengthof (wal_pretty_size ));
807+ elog (INFO , "WAL size to remove on timeline %i: %s" ,
808+ tlinfo -> tli , wal_pretty_size );
809+ }
810+
811+ /* Calculate the actual size to delete */
812+ for (i = 0 ; i < parray_num (tlinfo -> xlog_filelist ); i ++ )
813+ {
814+ xlogFile * wal_file = (xlogFile * ) parray_get (tlinfo -> xlog_filelist , i );
815+
816+ if (purge_all || wal_file -> segno < EndSegNo )
817+ wal_size_actual += wal_file -> file .size ;
818+ }
819+
820+ /* Report the actual size to delete */
821+ if (wal_size_actual > 0 )
822+ {
823+ pretty_size (wal_size_actual , wal_pretty_size , lengthof (wal_pretty_size ));
824+ elog (INFO , "Resident data size to free on timeline %i: %s" ,
825+ tlinfo -> tli , wal_pretty_size );
826+ }
827+
828+ if (dry_run )
829+ return ;
830+
831+ for (i = 0 ; i < parray_num (tlinfo -> xlog_filelist ); i ++ )
832+ {
833+ xlogFile * wal_file = (xlogFile * ) parray_get (tlinfo -> xlog_filelist , i );
834+
835+ if (interrupted )
836+ elog (ERROR , "interrupted during WAL archive purge" );
837+
838+ /* Any segment equal or greater than EndSegNo must be kept
839+ * unless it`s a 'purge all' scenario.
840+ */
841+ if (purge_all || wal_file -> segno < EndSegNo )
842+ {
843+ /* unlink segment */
844+ rc = unlink (wal_file -> file .path );
845+ if (rc < 0 )
846+ {
847+ /* Missing file is not considered as error condition */
848+ if (errno != ENOENT )
849+ elog (ERROR , "Could not remove file \"%s\": %s" ,
850+ wal_file -> file .path , strerror (errno ));
851+ }
852+ else
853+ {
854+ if (wal_file -> type == SEGMENT )
855+ elog (VERBOSE , "Removed WAL segment \"%s\"" , wal_file -> file .path );
856+ else if (wal_file -> type == PARTIAL_SEGMENT )
857+ elog (VERBOSE , "Removed partial WAL segment \"%s\"" , wal_file -> file .path );
858+ else if (wal_file -> type == BACKUP_HISTORY_FILE )
859+ elog (VERBOSE , "Removed backup history file \"%s\"" , wal_file -> file .path );
860+ }
861+
862+ wal_deleted = true;
863+ }
864+ }
865+ }
866+
731867/*
732868 * Deletes WAL segments up to oldest_lsn or all WAL segments (if all backups
733869 * was deleted and so oldest_lsn is invalid).
@@ -739,7 +875,7 @@ delete_backup_files(pgBackup *backup)
739875 */
740876static void
741877delete_walfiles (XLogRecPtr oldest_lsn , TimeLineID oldest_tli ,
742- uint32 xlog_seg_size )
878+ uint32 xlog_seg_size )
743879{
744880 XLogSegNo targetSegNo ;
745881 char oldestSegmentNeeded [MAXFNAMELEN ];
0 commit comments