##### Bring Your SAS and Python Worlds Together With SASPy!
A Microsoft Visual Studio Code Python notebook that demonstrates how SASPy can be used to:
 <ol>
 <li>Prepare a SAS dataset of NBA player salary info using the Cloud-based SAS OnDemand for Analytics
 <li>Download the SAS dataset into a Pandas dataframe on a PC-based Python VS Code session
 <li>Use Python and Plotly to visualize the NBA salary data in in an interactive Sunburst Chart
 </ol>

In [1]:
# Connect to Cloud-Based SAS OnDemand for Analytics (ODA) from PC-Based Python
# Note: Connection info is in sascfg_personal.py, while authentication info is in _authinfo. 
#       Both files are in C:\Users\<CurrentUserName>.
import saspy                                          # Import SASpy package
sas_session = saspy.SASsession()                      # Start a SAS session named 'sas_session'
sas_session.saslib('L', path="/home/ted.conway");     # Create libname 'L' pointed to HOME directory

Using SAS Config named: oda
SAS Connection established. Subprocess id is 4936


5                                                          The SAS System                       Sunday, October 20, 2024 05:27:00 AM

24         
25         libname L    '/home/ted.conway'  ;
26         
27         
28         

6                                                          The SAS System                       Sunday, October 20, 2024 05:27:00 AM

29         


In [2]:
%%SAS sas_session
                                                      * Use %%SAS magic to run SAS code in cell;                                    
proc means data=l.nba_salary_data; var salary;        * Run PROC MEANS;
proc print data=l.nba_salary_data(obs=1); run;        * Run PROC PRINT;
run;

Analysis Variable : SALARY,Analysis Variable : SALARY,Analysis Variable : SALARY,Analysis Variable : SALARY,Analysis Variable : SALARY
N,Mean,Std Dev,Minimum,Maximum
475,9924594.54,11295475.05,28954.0,51915615.0

Obs,RK,NAME,TEAM,DIVISION,CONFERENCE,POSITION,SALARY,PLAYERS
1,1,"Stephen Curry, PG",Golden State Warriors,Pacific,Western,PG,"$51,915,615",1


In [3]:
%%SAS sas_session
ods graphics / height=4.5in width=10in;    * SAS Box+Scatter plots of salaries by position;   
proc sgplot data=l.nba_salary_data;        * Use NBA SAS dataset;
hbox salary / category=position;           * Box plot showing distribution of salaries by position;
scatter y=position x=salary / jitter;      * Add scatter plot to show additional distribution detail;

In [4]:
import pandas as pd                             # Create Pandas Dataframe on PC from SAS dataset
df=sas_session.sasdata2dataframe('nba_salary_data','l') # Download SAS dataset into Pandas
df.SALARY=df.SALARY.astype('Int64')             # Convert salary from float to integer
df                                              # Display players with five highest & lowest salaries 

Unnamed: 0,RK,NAME,TEAM,DIVISION,CONFERENCE,POSITION,SALARY,PLAYERS
0,1.0,"Stephen Curry, PG",Golden State Warriors,Pacific,Western,PG,51915615,1.0
1,2.0,"Kevin Durant, PF",Phoenix Suns,Pacific,Western,PF,47649433,1.0
2,3.0,"LeBron James, SF",Los Angeles Lakers,Pacific,Western,SF,47607350,1.0
3,4.0,"Nikola Jokic, C",Denver Nuggets,Northwest,Western,C,47607350,1.0
4,5.0,"Joel Embiid, C",Philadelphia 76ers,Atlantic,Eastern,C,46900000,1.0
...,...,...,...,...,...,...,...,...
470,471.0,"Patty Mills, PG",Miami Heat,Southeast,Eastern,PG,475908,1.0
471,472.0,"Dominick Barlow, F",San Antonio Spurs,Southwest,Western,F,455620,1.0
472,473.0,"Taj Gibson, F",Detroit Pistons,Central,Eastern,F,348225,1.0
473,474.0,"Onuralp Bitim, SF",Chicago Bulls,Central,Eastern,SF,334582,1.0


In [5]:
df_gs=df[df["TEAM"]=="Golden State Warriors"]   # Select just the Golden State Warriors players
my_sas_dataset = sas_session.df2sd(df_gs)       # Use dataframe to create SAS data set with df2sd()
my_sas_dataset.describe()                       # Use describe() to get SAS PROC MEANS output



Unnamed: 0,Variable,N,NMiss,Median,Mean,StdDev,Min,P25,P50,P75,Max
0,RK,16.0,0.0,211.0,227.5,163.214,1.0,61.0,211.0,380.0,467.0
1,SALARY,16.0,0.0,6245920.0,13456370.0,16232730.0,548815.0,2019706.0,6245920.0,23325893.0,51915615.0
2,PLAYERS,16.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0


In [6]:
import plotly.express as px                     # Interactive Plotly Sunburst Chart of NBA Salaries (2023-24)
df['STAT']='<b>2023-24<br>NBA SALARIES</b>'     # Title for center (grand total)                                            
fig = px.sunburst(df,                           # Specify hierarchy levels for Sunburst salary chart
                  path=['STAT','CONFERENCE','DIVISION','TEAM','NAME'], values='SALARY', color='TEAM')
fig.update_layout(autosize=True, margin=dict(l=0, r=0, t=0, b=0), width=800) # Make margins smaller
fig.update_traces(texttemplate='%{label}<br>%{value:$,}')  # Display labels & salary in wedges
fig.update_traces(hovertemplate='%{label}<br>%{value:$,}') # Display labels & salary  $ in hover text
fig.add_layout_image(dict(source="https://upload.wikimedia.org/wikipedia/en/thumb/0/03/National_Basketball_Association_logo.svg/903px-National_Basketball_Association_logo.svg.png",
            xref="paper", yref="paper", x=0, y=1, sizex=.2, sizey=.2)) # Add NBA logo from Wikipedia
fig.add_annotation(text="2023-24<br>NBA Salaries", xref="paper", yref="paper", x=1, y=0, showarrow=False,
                   font_family="Calibri", font_size=32) # Add description
fig.show(renderer="browser")                    # Open Sunburst chart in new browser window
df.describe()                                   # Display some basic descriptive stats about data

Unnamed: 0,RK,SALARY,PLAYERS
count,475.0,475.0,475.0
mean,238.0,9924594.536842,1.0
std,137.264951,11295475.0508,0.0
min,1.0,28954.0,1.0
25%,119.5,2019760.0,1.0
50%,238.0,5000000.0,1.0
75%,356.5,12452500.0,1.0
max,475.0,51915615.0,1.0


In [15]:
fig.update_layout(height=650,width=750)         # Adjust image size (make smaller for notebook)
fig.show()                                      # Also display Sunburst chart in notebook for documentation purposes