## SAS program to calculate mutliple summary statistics per variable

```
***********************************************
*   Create a small data set to demonstrate    *
*   the use of proc means                     *
***********************************************;
data data1;
  input category $ x y;
  datalines;
  A 1 10
  A 2 11
  B 3 12
  B 4 13
  B 5 14
  C 6 15
  C 7 16
  C 8 17
  C 9 18  
  ;

**********************************************
*   Compute the mean, min, and max of        *
*   numeric variables by category.           *
**********************************************;
proc means data=data1 nway noprint;
	class category;
	var x y;
	output out=summary_stats(drop=_type_ _freq_) mean= min= max= / autoname;
run;
```

## Python code to produce the same summary

In [1]:
# Import packages/libraries
import numpy as np
import pandas as pd

In [2]:
# create the data frame that was used in the SAS program above
data1 = (pd.DataFrame({'category':['A','A','B','B','B','C','C','C','C'],
                       'x':range(1,10),
                       'y':range(10,19)}))
data1

Unnamed: 0,category,x,y
0,A,1,10
1,A,2,11
2,B,3,12
3,B,4,13
4,B,5,14
5,C,6,15
6,C,7,16
7,C,8,17
8,C,9,18


In [3]:
# Calculate mean, min, and max by category
summary_stats = data1.groupby('category').agg(['mean','min','max']).reset_index()

# Flatten the multi-indexed column names
summary_stats.columns = ['_'.join(filter(None,col)) for col in summary_stats.columns.values]

summary_stats

Unnamed: 0,category,x_mean,x_min,x_max,y_mean,y_min,y_max
0,A,1.5,1,2,10.5,10,11
1,B,4.0,3,5,13.0,12,14
2,C,7.5,6,9,16.5,15,18
