In [1]:
%reload_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, DBSCAN

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

from umap import UMAP

from prince import MCA, FAMD

# These are the subject of today.
# The rest are some common suspects from DR and clustering weeks
import plotly.express as px
from ipywidgets import interact

import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

<IPython.core.display.Javascript object>

## ipywidget recipes:

good resource with examples [here](https://towardsdatascience.com/interactive-controls-for-jupyter-notebooks-f5c94829aee6)

### Very general process

In [3]:
@interact
def do_stuff_interactively(
    text="text input",
    select=["option 1", "option2"],
    num=5,
    num_range=(3, 8),
    bool_tf=True,
):
    """
    * Name the function whatever you want
    * Name the arguments whatever you want
    * @interact chooses an appropriate widget based on default args
    * Do whatever you want in the function code
    * Function will be rerun every time the input changes
    """
    print("^Above widgets auto-generated by `@interact`")
    print("\n----------------------------\n")
    print("Below are the outputs of the function:\n")

    print(f"text: {text}")
    print(f"select: {select}")
    print(f"num: {num}")
    print(f"num_range: {num_range}")
    print(f"bool_tf: {bool_tf}")

interactive(children=(Text(value='text input', description='text'), Dropdown(description='select', options=('o…

<IPython.core.display.Javascript object>

### Use widgets with plotting

#### Homemade pair plot for numeric columns

In [4]:
# Load data
df = sns.load_dataset("iris")
df.head(3)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa


<IPython.core.display.Javascript object>

In [5]:
# Isolate numbers
nums = df.select_dtypes("number")


@interact
def scatterplot(x=nums.columns, y=nums.columns, hue=df.columns):
    sns.scatterplot(x=x, y=y, hue=hue, data=df)
    plt.show()

interactive(children=(Dropdown(description='x', options=('sepal_length', 'sepal_width', 'petal_length', 'petal…

<IPython.core.display.Javascript object>

----

### Play time

Use either dataset below, or any other (maybe your capstone data if you have some in mind 👀).

Let's explore using plotly and ipywidges

In [6]:
marathon_data_url = "https://github.com/jhl126/bostonmarathon/blob/master/results/2014/results.csv?raw=true"

marathon = pd.read_csv(marathon_data_url)
marathon = marathon.drop(columns=["ctz"])

# dashes representing missing values in the time columns
# replace them with na
marathon = marathon.replace("-", np.nan)

# Not dropping missing values from state
# as its really not missing, just indicates
# theyre not from USA or Canada
marathon["state"] = marathon["state"].fillna("Not US/CAN")

# drop rows with NAs
# (time columns and city are only cols with NA at this point)
marathon = marathon.dropna()

# Try to change all columns to numeric
# if theres an error, ignore it and move to next column
marathon = marathon.apply(pd.to_numeric, errors="ignore")

marathon.head(3)

Unnamed: 0,10k,name,division,25k,gender,age,official,bib,genderdiv,35k,overall,pace,state,30k,5k,half,20k,country,city,40k
0,17.37,"Yamamoto, Hiroyuki",8,47.67,M,47,85.25,W1,8,71.4,8,3.27,Not US/CAN,59.18,8.02,39.72,37.65,JPN,Fukuoka,80.43
1,32.58,"Jeptoo, Rita",1,82.43,F,33,138.95,F1,1,116.37,21,5.3,Not US/CAN,99.33,16.22,69.47,65.83,KEN,Eldoret,132.1
2,16.62,"Van Dyk, Ernst F.",1,45.8,M,41,80.6,W2,1,67.42,1,3.08,Not US/CAN,56.45,7.75,38.03,36.1,RSA,Paarl,76.1


<IPython.core.display.Javascript object>

In [7]:
data_url = "https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Clustering/nba_player_seasons.csv"
nba = pd.read_csv(data_url).dropna()
nba.head(3)

Unnamed: 0,Seas,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,2019,Álex Abrines,SG,25,OKC,31,2,19.0,1.8,5.1,...,0.923,0.2,1.4,1.5,0.6,0.5,0.2,0.5,1.7,5.3
1,2019,Quincy Acy,PF,28,PHO,10,0,12.3,0.4,1.8,...,0.7,0.3,2.2,2.5,0.8,0.1,0.4,0.4,2.4,1.7
2,2019,Jaylen Adams,PG,22,ATL,34,1,12.6,1.1,3.2,...,0.778,0.3,1.4,1.8,1.9,0.4,0.1,0.8,1.3,3.2


<IPython.core.display.Javascript object>