In [21]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))


Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display



In [22]:
import cuml

import pandas as pd
import numpy as np
import tqdm, glob
import plotly
from plotly.offline import iplot
import plotly.graph_objs as go
import plotly.express as px

from IPython.display import Javascript

from ipywidgets import IntProgress

from sentence_transformers import SentenceTransformer

In [23]:
# Get baseline data for ethnicity per state
RacePerState_path = "DO2022_additional_data/race_per_state_clean.csv"
df_RPS = pd.read_csv(RacePerState_path)
df_RPS.set_index('Label (Grouping)', inplace=True)
df_RPS = df_RPS.copy().T
df_RPS = df_RPS.dropna(axis='columns', how='all')
df_RPS = df_RPS.reset_index()
df_RPS = df_RPS.rename_axis(None, axis=1)
df_RPS = df_RPS.rename(columns={'index': 'State', 'Total':'eftotlt','Hispanic or Latino total': 'efhispt', 'White total':'efwhitt', 'Black or African American total':'efbkaat', 'American Indian and Alaska Native total':'efaiant', 'Asian total':'efasiat', 'Native Hawaiian and Other Pacific Islander total':'efnhpit', 'Some Other Race total':'efunknt', 'Two or more races total':'ef2mort'})
colsS = ['eftotlt','efhispt','efwhitt','efbkaat','efaiant','efasiat','efnhpit','efunknt','ef2mort']
df_RPS[colsS]=df_RPS[colsS].apply(lambda x: x.str.replace(',',''))
df_RPS[colsS]=df_RPS[colsS].astype("float")
df_RPS.head()

# Baseline ratios for ethnicity per state
df_ratiosS = pd.DataFrame()
df_ratiosS['state'] = df_RPS['State']
df_ratiosS['airatioS'] = df_RPS.apply(lambda row: row.efaiant / row.eftotlt, axis=1)
df_ratiosS['asratioS'] = df_RPS.apply(lambda row: row.efasiat / row.eftotlt, axis=1)
df_ratiosS['bkratioS'] = df_RPS.apply(lambda row: row.efbkaat / row.eftotlt, axis=1)
df_ratiosS['hiratioS'] = df_RPS.apply(lambda row: row.efhispt / row.eftotlt, axis=1)
df_ratiosS['nhratioS'] = df_RPS.apply(lambda row: row.efnhpit / row.eftotlt, axis=1)
df_ratiosS['whratioS'] = df_RPS.apply(lambda row: row.efwhitt / row.eftotlt, axis=1)
df_ratiosS['tmratioS'] = df_RPS.apply(lambda row: row.ef2mort / row.eftotlt, axis=1)
df_ratiosS['unratioS'] = df_RPS.apply(lambda row: row.efunknt / row.eftotlt, axis=1)
df_ratiosS.head()

Unnamed: 0,state,airatioS,asratioS,bkratioS,hiratioS,nhratioS,whratioS,tmratioS,unratioS
0,Alabama,0.004601,0.01511,0.256387,0.052554,0.00052,0.631205,0.036745,0.002877
1,Alaska,0.148404,0.059244,0.028267,0.067936,0.016983,0.575079,0.097848,0.006238
2,Arizona,0.036906,0.034795,0.044349,0.306544,0.002003,0.533671,0.037312,0.00442
3,Arkansas,0.006823,0.017005,0.149387,0.085288,0.004742,0.685218,0.048865,0.002672
4,California,0.003948,0.151216,0.053601,0.39404,0.003495,0.346869,0.041168,0.005664


In [24]:
SN = len(df_ratiosS)
sdf = df_ratiosS.sample(n=min(SN, len(df_ratiosS)))

In [25]:
sdf.columns[1:]

Index(['airatioS', 'asratioS', 'bkratioS', 'hiratioS', 'nhratioS', 'whratioS',
       'tmratioS', 'unratioS'],
      dtype='object')

In [26]:
# sdf[sdf.columns[1:]].values

In [27]:

TN = len(df_ratiosS)
tdf = sdf[:TN]
encs = sdf[sdf.columns[1:]].values
tencs = encs[:TN, :]

trans = cuml.UMAP(n_components=2, min_dist=0.02, n_neighbors=10, output_type='numpy').fit_transform(tencs)
trans = np.array(trans)
tdf = tdf.assign(px = trans[:,0], py = trans[:,1])


In [28]:
def fr(pts):
    r = (np.quantile(pts, 0.05), np.quantile(pts, 0.95))
    w = r[1] - r[0]
    return (r[0] - 0.3 * w, r[1] + 0.3 * w)

xr = fr(tdf['px'])
yr = fr(tdf['py'])
tdf = tdf[(tdf['px'] > xr[0]) & (tdf['px'] < xr[1]) & (tdf['py'] > yr[0]) & (tdf['py'] < yr[1])]

In [29]:

PN = 6000

bins = int((len(tdf) / 2) ** 0.5)
xs = np.linspace(xr[0], xr[1], num=bins)
ys = np.linspace(yr[0], yr[1], num=bins)

azs, _, _ = np.histogram2d(tdf.px, tdf.py, bins=(xs, ys))
azs = np.minimum(azs, np.quantile(azs, 0.90))

In [30]:
pdf = tdf[:PN]
fig = go.Figure()
fig.add_contour(x=0.5*(xs[:-1]+xs[1:]), y=0.5*(ys[:-1]+ys[1:]), z=azs.T, colorscale="Greys", showscale=False, opacity=0.4)
fig.add_traces(px.scatter(pdf, x='px', y='py', color='state', hover_data=['state']).data)
fig.update_layout(autosize=False, width=1600, height=1400)

# fig.write_html("plots/final/umap_xtra.html".format(PN), include_plotlyjs='cdn')
fig.update_layout(
    autosize=False,
    width=2000,
    height=2000,)
iplot(fig)