## Imports

In [1]:
import pandas as pd

pd.set_option('display.max_rows', 100)

## Load Sample Data

In [2]:
dates = pd.read_csv(r"C:\Users\jusla\Downloads\Date_Test_Data - Sheet1 (2).txt")
dates.to_csv(r"C:\Users\jusla\Downloads\Date_Test_Data - Sheet1 (2).csv")
dates.sort_values(by='Date_Col')
dates = pd.to_datetime(dates['Date_Col'], format='%m/%d/%Y')
dates

0    2021-01-06
1    2021-01-08
2    2021-01-09
3    2021-01-10
4    2021-01-13
5    2021-01-14
6    2021-01-15
7    2021-01-16
8    2021-01-17
9    2021-01-18
10   2021-01-19
11   2021-01-20
12   2021-01-21
13   2021-01-22
14   2021-01-23
15   2021-01-24
16   2021-01-25
17   2021-01-26
18   2021-01-27
19   2021-01-28
20   2021-01-29
21   2021-01-30
22   2021-01-31
23   2021-02-01
24   2021-02-02
25   2021-02-03
26   2021-02-04
27   2021-02-05
28   2021-02-06
29   2021-02-07
30   2021-02-08
31   2021-02-09
32   2021-02-10
33   2021-02-11
34   2021-02-12
35   2021-02-13
36   2021-02-14
37   2021-02-15
38   2021-02-18
39   2021-02-19
40   2021-02-20
41   2021-02-21
42   2021-02-22
43   2021-02-23
44   2021-02-24
45   2021-02-25
46   2021-02-26
47   2021-02-27
48   2021-02-28
49   2021-03-01
50   2021-03-02
51   2021-03-03
52   2021-03-04
53   2021-03-05
54   2021-03-06
55   2021-03-07
56   2021-03-08
57   2021-03-09
58   2021-03-10
59   2021-03-13
60   2021-03-14
61   2021-03-15
62   202

## Define Function to Calculate Continuous Ranges
The goal of this function is to take in a list of dates and return date ranges that resemble all continuous date ranges found in the data. 
For example: The dates (1/1/2000, 1/2/2000, 1/3/2000) would form the continuous date range of 1/1/2000 - 1/3/2000.

Single dates can be included or excluded from the final list of ranges by changing the `single_dates` parameter. A value of `True` will return single dates as ranges. Default value is `False`.

In [3]:
def find_cont_date_ranges(dates, single_dates=False):
    
    # Create local variables 
    init_date = '' # will be any date that will mark the beginning of a date range
    temp_date = '' # will be any date that is within the current continuous range, kept in case this ends up being an end_date
    end_date = ''  # will be any date that marks the end of a date range
    count = 1      # used to count the days that have passed since the init_date
    ranges = []    # empty list that will eventually contain all continuous date ranges
    
    for idx, date in enumerate(dates): # begin looping through each date, given an index and a date value for each row
        
        if not init_date: # if there is no init_date yet, assign the current date to the init_date and temp_date
            init_date = date
            temp_date = date
            
        else: # if there is already an init_date
            
            if idx + 1 == len(dates): # if this is the last row in the data
                
                if not (date - init_date == pd.Timedelta(count, 'Day')): # and the date is not 'n' days from the init_date, where n = count
                    end_date = temp_date # then this is an end date
                    
                    if not single_dates: # if not allowing single dates as ranges 
                        ranges.append([init_date, end_date]) # then we will append only the the last date range
                        
                    else: # if allowing single dates as ranges, then append both the last range as well as the single date
                        ranges.append([init_date, end_date]) 
                        ranges.append([date, date])
                
                else: # if the last date in the data is continuous with date before it
                    end_date = date # assign current date to end_date
                    ranges.append([init_date, end_date]) # append date range
                    
            else: # if this is not the last row of data
                
                if not (date - init_date == pd.Timedelta(count, 'Day')): # and the date is not 'n' days from the init_date, where n = count
                    
                    if (count == 1) and (not single_dates): # and we detect a single date but aren't allowing single date ranges
                        end_date = '' # clear the end date
                        init_date = date # assign current date to init_date
                        temp_date = date # assign temp_date to init_date
                        
                    else: # single date not detected 
                        end_date = temp_date # so assign the previous date as the end_date
                        ranges.append([init_date, end_date]) # and append the date range
                        
                        # assign current date to init_date and temp_date, and reset end_date and count
                        init_date = date
                        temp_date = date
                        end_date = ''
                        count = 1
                        
                else: # current date is determined at this point to be the date following the previous date
                    temp_date = date # assign the current date to the temp_date
                    count += 1 # increment the count by 1
                    
    return ranges

#### Test with `Single_dates = True`

In [4]:
date_ranges = find_cont_date_ranges(dates, single_dates=True)

In [5]:
print("Continuous Date Ranges from Data (Including Single Dates):\n")
for i in date_ranges:
    print("{} - {}".format(i[0], i[1]))

Continuous Date Ranges from Data (Including Single Dates):

2021-01-06 00:00:00 - 2021-01-06 00:00:00
2021-01-08 00:00:00 - 2021-01-10 00:00:00
2021-01-13 00:00:00 - 2021-02-15 00:00:00
2021-02-18 00:00:00 - 2021-03-10 00:00:00
2021-03-13 00:00:00 - 2021-04-12 00:00:00
2021-04-15 00:00:00 - 2021-04-15 00:00:00


#### Test with `Single_dates =  False`

In [6]:
date_ranges = find_cont_date_ranges(dates, single_dates=False)

In [7]:
print("Continuous Date Ranges from Data (Not Including Single Dates):\n")
for i in date_ranges:
    print("{} - {}".format(i[0], i[1]))

Continuous Date Ranges from Data (Not Including Single Dates):

2021-01-08 00:00:00 - 2021-01-10 00:00:00
2021-01-13 00:00:00 - 2021-02-15 00:00:00
2021-02-18 00:00:00 - 2021-03-10 00:00:00
2021-03-13 00:00:00 - 2021-04-12 00:00:00


### Same function with added print statements for in-situ explanation

In [8]:
def find_cont_date_ranges(dates, single_dates=False):
    
    # Create local variables 
    init_date = '' # will be any date that will mark the beginning of a possible date range
    temp_date = '' # will be any date that is within the current continuous range, kept in case this ends up being an end_date
    end_date = ''  # will be any date that marks the end of a date range
    count = 1      # used to count the days that have passed since the init_date
    ranges = []    # empty list that will eventually contain all continuous date ranges
    
    for idx, date in enumerate(dates): # begin looping through each date, given an index and a date value for each row
        
        print("\nDate: {}\nCount: {}".format(date, count)) # Print the date and count
        
        if not init_date: # if there is no init_date yet, assign the current date to the init_date and temp_date
            init_date = date
            temp_date = date
            print("Assigned as init_date and temp_date")
            
        else: # if there is already an init_date
            
            if idx + 1 == len(dates): # if this is the last row in the data
                
                if not (date - init_date == pd.Timedelta(count, 'Day')): # and the date is not 'n' days from the init_date, where n = count
                    end_date = temp_date # then this is an end date
                    
                    if not single_dates: # if not allowing single dates as ranges 
                        ranges.append([init_date, end_date]) # then we will append only the the last date range
                        
                        # print explanation of action taken
                        print("Because count was {} and {} - {} = {}, and this is the last date in the range, {} is an end_date\nAppending {} and {}, as a date range"
                              .format(count, date, init_date, date - init_date, end_date, init_date, end_date))
                        
                    else: # if allowing single dates as ranges, then append both the last range as well as the single date
                        ranges.append([init_date, end_date]) 
                        ranges.append([date, date])
                        
                        # print explanation of action taken
                        print("Because count was {} and {} - {} = {}, and this is the last date in the range, {} is an end_date\nAppending {} and {}, and {} and {} as date ranges"
                              .format(count, date, init_date, date - init_date, end_date, init_date, end_date, date, date))
                        
                    print("\nLast Date Reached")
                
                else: # if the last date in the data is continuous with date before it
                    end_date = date # assign current date to end_date
                    ranges.append([init_date, end_date]) # append date range
                    
                    # print explanation of action taken
                    print("Because count was {} and {} - {} = {}, and this is the last date in the range, {} is an end_date\nAppending {} and {} as a date range"
                          .format(count, date, init_date, date - init_date, end_date, init_date, end_date))
                    
            else: # if this is not the last row of data
                
                if not (date - init_date == pd.Timedelta(count, 'Day')): # and the date is not 'n' days from the init_date, where n = count
                    
                    if (count == 1) and (not single_dates): # and we detect a single date but aren't allowing single date ranges
                        end_date = '' # clear the end date
                        init_date = date # assign current date to init_date
                        temp_date = date # assign temp_date to init_date
                        
                        # print explanation of action taken
                        print("Single date not appended")
                        print("\nAssigning {} as init_date and temp_date".format(init_date))
                        
                    else: # single date not detected 
                        end_date = temp_date # so assign the previous date as the end_date
                        ranges.append([init_date, end_date]) # and append the date range
                        
                        # print explanation of action taken 
                        print("Because count was {} and {} - {} = {}, {} is an end_date\nAppending {} and {} as a date range"
                              .format(count, date, init_date, date - init_date, end_date, init_date, end_date))
                        
                        # assign current date to init_date and temp_date, and reset end_date and count
                        init_date = date
                        temp_date = date
                        end_date = ''
                        count = 1
                        
                        # print explanation of action taken
                        print("\nAssigning {} as init_date and temp date".format(init_date))
                        
                else: # current date is determined at this point to be the date following the previous date
                    temp_date = date # assign the current date to the temp_date
                    count += 1 # increment the count by 1
                    print("This is continuous from previous date")
    return ranges

#### Test with `Single_dates = True`

In [9]:
date_ranges = find_cont_date_ranges(dates, single_dates=True)
date_ranges


Date: 2021-01-06 00:00:00
Count: 1
Assigned as init_date and temp_date

Date: 2021-01-08 00:00:00
Count: 1
Because count was 1 and 2021-01-08 00:00:00 - 2021-01-06 00:00:00 = 2 days 00:00:00, 2021-01-06 00:00:00 is an end_date
Appending 2021-01-06 00:00:00 and 2021-01-06 00:00:00 as a date range

Assigning 2021-01-08 00:00:00 as init_date and temp date

Date: 2021-01-09 00:00:00
Count: 1
This is continuous from previous date

Date: 2021-01-10 00:00:00
Count: 2
This is continuous from previous date

Date: 2021-01-13 00:00:00
Count: 3
Because count was 3 and 2021-01-13 00:00:00 - 2021-01-08 00:00:00 = 5 days 00:00:00, 2021-01-10 00:00:00 is an end_date
Appending 2021-01-08 00:00:00 and 2021-01-10 00:00:00 as a date range

Assigning 2021-01-13 00:00:00 as init_date and temp date

Date: 2021-01-14 00:00:00
Count: 1
This is continuous from previous date

Date: 2021-01-15 00:00:00
Count: 2
This is continuous from previous date

Date: 2021-01-16 00:00:00
Count: 3
This is continuous from prev

[[Timestamp('2021-01-06 00:00:00'), Timestamp('2021-01-06 00:00:00')],
 [Timestamp('2021-01-08 00:00:00'), Timestamp('2021-01-10 00:00:00')],
 [Timestamp('2021-01-13 00:00:00'), Timestamp('2021-02-15 00:00:00')],
 [Timestamp('2021-02-18 00:00:00'), Timestamp('2021-03-10 00:00:00')],
 [Timestamp('2021-03-13 00:00:00'), Timestamp('2021-04-12 00:00:00')],
 [Timestamp('2021-04-15 00:00:00'), Timestamp('2021-04-15 00:00:00')]]

#### Test with `Single_dates = False`

In [10]:
date_ranges = find_cont_date_ranges(dates, single_dates=False)
date_ranges


Date: 2021-01-06 00:00:00
Count: 1
Assigned as init_date and temp_date

Date: 2021-01-08 00:00:00
Count: 1
Single date not appended

Assigning 2021-01-08 00:00:00 as init_date and temp_date

Date: 2021-01-09 00:00:00
Count: 1
This is continuous from previous date

Date: 2021-01-10 00:00:00
Count: 2
This is continuous from previous date

Date: 2021-01-13 00:00:00
Count: 3
Because count was 3 and 2021-01-13 00:00:00 - 2021-01-08 00:00:00 = 5 days 00:00:00, 2021-01-10 00:00:00 is an end_date
Appending 2021-01-08 00:00:00 and 2021-01-10 00:00:00 as a date range

Assigning 2021-01-13 00:00:00 as init_date and temp date

Date: 2021-01-14 00:00:00
Count: 1
This is continuous from previous date

Date: 2021-01-15 00:00:00
Count: 2
This is continuous from previous date

Date: 2021-01-16 00:00:00
Count: 3
This is continuous from previous date

Date: 2021-01-17 00:00:00
Count: 4
This is continuous from previous date

Date: 2021-01-18 00:00:00
Count: 5
This is continuous from previous date

Date: 

[[Timestamp('2021-01-08 00:00:00'), Timestamp('2021-01-10 00:00:00')],
 [Timestamp('2021-01-13 00:00:00'), Timestamp('2021-02-15 00:00:00')],
 [Timestamp('2021-02-18 00:00:00'), Timestamp('2021-03-10 00:00:00')],
 [Timestamp('2021-03-13 00:00:00'), Timestamp('2021-04-12 00:00:00')]]