## SAS program to expand rows based upon a string

```
***********************************************
*   Create a small data set to demonstrate    *
*   the concept                               *
***********************************************;
data data1;
	input group $ x $10.;
	datalines;
	A a1
	B b1,b2
	C c1,c2,c3
	;
	
***********************************************
*   Create a new row for each part of the     *
*   string for each value of x                *
***********************************************;
data data2;
	set data1;
	commas = compress(x,',','k');
	if commas = "" then n=1;
	else n= length(commas) + 1;
	do i = 1 to n;
		part = scan(x,i);
		output;
	end;
	keep group part;
run;
```

# Python code to produce the same data sets

In [1]:
# Import packages/libraries
import pandas as pd

In [2]:
# create the data frame that was used in the SAS program above
data1 = pd.DataFrame({'category':['A','B','C'],
                       'x':['a1','b1,b2','c1,c2,c3']})

data1

Unnamed: 0,category,x
0,A,a1
1,B,"b1,b2"
2,C,"c1,c2,c3"


In [3]:
# Split x values into columns containing each part of the string
x_split = data1['x'].apply(lambda v: pd.Series(v.split(',')))

# Join these new columns with data1 and set the index to 'category'
data2 = pd.concat([data1,x_split],axis=1).set_index('category')
data2

Unnamed: 0_level_0,x,0,1,2
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,a1,a1,,
B,"b1,b2",b1,b2,
C,"c1,c2,c3",c1,c2,c3


In [4]:
# Use the stack method to transpose to the long format
data3 = data2.drop(columns='x').stack().dropna().reset_index().drop(columns='level_1').rename(columns={0:'part'})
data3

Unnamed: 0,category,part
0,A,a1
1,B,b1
2,B,b2
3,C,c1
4,C,c2
5,C,c3
