In [11]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.cluster import KMeans
from sklearn.metrics.cluster import adjusted_rand_score
from matplotlib import pyplot as plt
from sklearn.decomposition import PCA

In [None]:
df = pd.read_csv('adaptive_speak_eval_all.csv')
df.head(2)

## Diagnostic classification

In [None]:
CLF_TYPE = 'LR'
N_STEPS = 1

In [27]:
def train_and_classify(X, Y, classifier_type='MLP'):
  X_train, X_test, Y_train, Y_test = train_test_split(X, Y, stratify=Y, test_size=0.2, random_state=1)
  # print(f'X_train: {X_train.shape}  X_test: {X_test.shape}  Y_train: {Y_train.shape}  Y_test: {Y_test.shape}')

  scaler = preprocessing.StandardScaler().fit(X_train)
  X_train_scaled = scaler.transform(X_train)
  X_test_scaled = scaler.transform(X_test)

  if classifier_type == 'MLP':
    clf = MLPClassifier(random_state=1, max_iter=1000).fit(X_train_scaled, Y_train)
  elif classifier_type == 'LR':
    clf = LogisticRegression(random_state=1, max_iter=1000).fit(X_train_scaled, Y_train)
  else:
    raise ValueError()


  Y_pred = clf.predict(X_test_scaled)
  
  return classification_report(Y_test, Y_pred)

### Predict target domain from non-adapted hidden representation

In [None]:
X, Y = [], []

for _, row in df.iterrows():
  X.append(eval(row['original h0']))
  Y.append(row['target domain'])

X, Y = np.array(X), np.array(Y)

print(train_and_classify(X, Y, CLF_TYPE))

### Predict target domain from adapted hidden representation

In [None]:
for step in np.arange(N_STEPS):

  print(f'Step {step}')
  
  X, Y = [], []

  for _, row in df.iterrows():
    X.append(eval(row[f'adapted h0 s{step}']))
    Y.append(row['target domain'])

  X, Y = np.array(X), np.array(Y)

  print(train_and_classify(X, Y, CLF_TYPE))

### Predict listener domain from adapted hidden representation

In [None]:
for step in np.arange(N_STEPS):

  print(f'Step {step}')

  X, Y = [], []
  for _, row in df.iterrows():
    X.append(eval(row[f'adapted h0 s{step}']))
    Y.append(row['listener domain'])

  X, Y = np.array(X), np.array(Y)

  print(train_and_classify(X, Y, CLF_TYPE))

### Predict listener domain from difference between adapted and non-adapted hidden representation

In [None]:
for step in np.arange(N_STEPS):

  print(f'Step {step}')

  X, Y = [], []
  for _, row in df.iterrows():
    X.append(np.array(eval(row[f'adapted h0 s{step}'])) - np.array(eval(row['original h0'])))
    Y.append(row['listener domain'])

  X, Y = np.array(X), np.array(Y)

  print(train_and_classify(X, Y, CLF_TYPE))

## Diagnostic clustering

In [30]:
def cluster_and_plot(X, Y, k=5):
  kmeans = KMeans(n_clusters=k, random_state=1).fit(X)
  Y_pred = kmeans.labels_

  print(f'Adjusted rand score: {adjusted_rand_score(Y_pred, Y)}')

  pca = PCA(2)
  X_2dim = pca.fit_transform(X)
  labels = np.unique(Y_pred)
  
  for i in labels:
      plt.scatter(X_2dim[Y_pred == i , 0] , X_2dim[Y_pred == i , 1] , label = i)
  plt.legend()
  plt.show()
  
  return kmeans, plt

### Cluster non-adapted hidden representations (vs. target domains)

In [None]:
X, Y = [], []

for _, row in df.iterrows():
  X.append(eval(row['original h0']))
  Y.append(row['target domain'])

X, Y = np.array(X), np.array(Y)

cluster_and_plot(X, Y)

### Cluster adapted hidden representations (vs. target domains)



In [None]:
for step in np.arange(N_STEPS):

  print(f'Step {step}')

  X, Y = [], []

  for _, row in df.iterrows():
    X.append(eval(row[f'adapted h0 s{step}']))
    Y.append(row['target domain'])

  X, Y = np.array(X), np.array(Y)

  cluster_and_plot(X, Y)

### Cluster adapted hidden representations (vs. listener domains)


In [None]:
for step in np.arange(N_STEPS):

  print(f'Step {step}')

  X, Y = [], []

  for _, row in df.iterrows():
    X.append(eval(row[f'adapted h0 s{step}']))
    Y.append(row['listener domain'])

  X, Y = np.array(X), np.array(Y)

  cluster_and_plot(X, Y)

### Cluster difference between adapted and non-adapted hidden representations (vs. listener domains)


In [None]:
for step in np.arange(N_STEPS):

  print(f'Step {step}')

  X, Y = [], []

  for _, row in df.iterrows():
    X.append(np.array(eval(row[f'adapted h0 s{step}'])) - np.array(eval(row['original h0'])))
    Y.append(row['listener domain'])

  X, Y = np.array(X), np.array(Y)

  cluster_and_plot(X, Y)