## SAS program to fill in gaps in a time series by group

```
***********************************************
*   Create a small data set to demonstrate    *
*   the concept                               *
***********************************************;
data data1;
	input group $ date monyy7. x;
	format date monyy7.;
	datalines;
A Jan2020 1
A Mar2020 2
B Feb2020 3
B Jun2020 4
B Aug2020 5
C Apr2020 6
;

***********************************************
***********************************************
*   Here is a method to fill in the gaps in   *
*   the time series by group using base SAS   *
***********************************************
***********************************************;

***********************************************
*   Find the min and max months by group      *
***********************************************;
proc means data=data1 nway noprint;
	class group;
	var date;
	output out=min_max_dates min=min_date max=max_date;
run;

***********************************************
*  Create a data set template with all months *
*  from the min to the max for each group     *
***********************************************;
data min_max_dates2;
	set min_max_dates;
	n=intck('month',min_date,max_date);
	do i = 0 to n;
		date = intnx('month',min_date,i);
		output;
	end;
	format date monyy7.;
	keep group date;
run;

***********************************************
*   Merge the original data set with the      *
*   template and forward fill the x variable  *
***********************************************;
data data2;
	merge min_max_dates2 data1;
		by group date;
	retain ffill;
	if x ne . then ffill=x;
	drop x;
	rename ffill=x;
run;

***********************************************
***********************************************
*  If the ETS module is available, this task  *
*  can easily be done using proc timeseries   *
***********************************************
***********************************************;
proc timeseries data=data1 out=data3;
    by group;
    id date interval=month setmissing=previous;
	var x;
run;
```

# Python code to produce the same data set

In [1]:
# Import packages/libraries
import pandas as pd

In [2]:
# create the data frame that was used in the SAS program above
data1 = (pd.DataFrame({'group':list('AABBBC'),
                       'date':pd.to_datetime(['Jan2020','Mar2020','Feb2020','Jun2020','Aug2020','Apr2020']),
                       'x':range(1,7)}))

data1

Unnamed: 0,group,date,x
0,A,2020-01-01,1
1,A,2020-03-01,2
2,B,2020-02-01,3
3,B,2020-06-01,4
4,B,2020-08-01,5
5,C,2020-04-01,6


In [3]:
# Set the date variable to the period of month
data1['date'] = data1['date'].dt.to_period('M')

# Use the date column as the index
data1.set_index('date',inplace=True)

# Resample months by group and forward fill the x column
(data1.groupby('group')['x']
      .resample('M')
      .sum()
      .fillna(method='ffill')
      .reset_index()
)

Unnamed: 0,group,date,x
0,A,2020-01,1.0
1,A,2020-02,1.0
2,A,2020-03,2.0
3,B,2020-02,3.0
4,B,2020-03,3.0
5,B,2020-04,3.0
6,B,2020-05,3.0
7,B,2020-06,4.0
8,B,2020-07,4.0
9,B,2020-08,5.0
