---
# Activity: p104a07.sas
---

In [None]:
# Standard imports.
import saspy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import HTML
from datetime import datetime
from numpy import nan as NA
import math

In [None]:
# Get a SAS session object
sas = saspy.SASsession(cfgname="oda")

In [None]:
# Assign the PG1 library
sas.saslib('pg1', engine='base', path='~/PG1/Data/data/data')

---
# SAS Code
```
data storm_cat;
	set pg1.storm_summary;
	keep Name Basin MinPressure StartDate PressureGroup;
	*add ELSE keyword and remove final condition;
	if MinPressure=. then PressureGroup=.;
	else if MinPressure<=920 then PressureGroup=1;
	else PressureGroup=0;
	
run;

proc freq data=storm_cat;
	tables PressureGroup;
run;
```
---

In [None]:
# Creat a new DataFrame object by pulling PG1.STORM_SUMMARY data from SAS.
storm_summary_df = sas.sasdata('STORM_SUMMARY', 'PG1').to_df()

In [None]:
# Python doesn't have a 'keep' function so we need to stick with 'drop'.
# For convienece I create a DropList and pass it to the drop() method.
DropList = ['EndDate', 'Hem_EW', 'Hem_NS', 'Lat', 'Lon', 'MaxWindMPH', 'Season', 'Type']
storm_cat_df = storm_summary_df.drop(columns=DropList)

In [None]:
for i in storm_cat_df.index: 
    mp = storm_cat_df.loc[i, 'MinPressure']
    if math.isnan(mp): 
        pg = NA
    elif mp <= 920:
        pg = 1
    else:
        pg = 0
    storm_cat_df.loc[i, 'PressureGroup'] = pg

In [None]:
storm_cat_df

In [None]:
# Get a frequency report
storm_cat_df['PressureGroup'].value_counts()

In [None]:
# Cross Tabulation
pd.crosstab(index=storm_cat_df['PressureGroup'], columns='count')