## Setting up a SWAT Connection for Python

In [None]:
import os

import pandas as pd
import swat

In [None]:
os.environ['CAS_CLIENT_SSL_CA_LIST'] = R"C:\SAS Viya\CAS_demo_ex_cert.crt"

In [None]:
conn = swat.CAS(
  hostname = "sas-cas-server-default-bin-demo.uksouth.cloudapp.azure.com",
  port=5570,
  authinfo=R"C:\SAS Viya\.authinfo"
)

In [None]:
conn.serverstatus()

## Create a reference to in-memory tables

In [None]:
cirrhosis = conn.CASTable("cirrhosis", caslib="public")

## Use pandas code and functions on the CAS tables as if they were standard dataframes

These will be translated into CAS actions, and alter the underlying table in CAS

In [None]:
# Get head data from table, methods similar to pandas
cirrhosis.head()

In [None]:
# Work with the tables as if they were pandas dataframes

cirrhosis["Bilirubin_high"] = cirrhosis["Bilirubin"] > 1.2

## Use the full capabilities of CAS

Can load in Action Sets to perform any CAS action, i.e. powerful in-memory & distributed analysis

In [None]:
conn.loadactionset("regression")

In [None]:
output = cirrhosis.glm(
    target = "Platelets",
    inputs = ["Stage", "Status", "Drug", "Bilirubin_high", "Albumin", "Copper"],
    nominals = ["Stage"],
    output = {
        "casOut": {"name": "PlateletsPrediction", "replace": True},
        "copyvars": "all",
        "pred": "PredictedPlatelets",
        "resid": "ResidualPlatelets",
    }
)


In [None]:
result = conn.CASTable("PlateletsPrediction")

In [None]:
result.head()

In [None]:
result.plot.scatter("PredictedPlatelets", "Platelets")

Alternatively, I could have done it with Python:

In [None]:
cirrhosis_pdf = cirrhosis.to_frame()

In [None]:
cirrhosis_pdf = pd.read_csv("../create_data/cirrhosis_clean.csv")

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Define the features and target variable
features = ['Stage', 'Status', 'Drug', 'Bilirubin', 'Albumin', 'Copper']
target = 'Platelets'
cirrhosis_complete = cirrhosis_pdf.dropna(subset=features + [target])

X = cirrhosis_complete[['Stage', 'Status', 'Drug', 'Bilirubin', 'Albumin', 'Copper']]
y = cirrhosis_complete['Platelets']

# Convert categorical variables to dummy variables
X = pd.get_dummies(X, columns=["Stage", "Status", "Drug"], drop_first=True)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X)

In [None]:
model.score(X_test, y_test)

In [None]:
pd.DataFrame({"Actual": y, "Predicted": y_pred}).plot.scatter("Predicted", "Actual")