In [4]:
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.cluster import KMeans
from sklearn.metrics.cluster import adjusted_rand_score
from matplotlib import pyplot as plt
from sklearn.decomposition import PCA

In [3]:
df = pd.read_csv('/content/output_val')
df.head(2)

FileNotFoundError: ignored

## Diagnostic classification

In [75]:
CLF_TYPE = 'MLP'

def train_and_classify(X, Y, classifier_type='MLP'):
  X_train, X_test, Y_train, Y_test = train_test_split(X, Y, stratify=Y, test_size=0.2, random_state=1)
  print(f'X_train: {X_train.shape}  X_test: {X_test.shape}  Y_train: {Y_train.shape}  Y_test: {Y_test.shape}')

  if classifier_type == 'MLP':
    clf = MLPClassifier(random_state=1, max_iter=300).fit(X_train, Y_train)
  elif classifier_type == 'LR':
    clf = LogisticRegression(random_state=1).fit(X_train, Y_train)
  else:
    raise ValueError()


  Y_pred = clf.predict(X_test)
  
  return classification_report(Y_test, Y_pred)

### Predict target domain from non-adapted hidden representation

In [76]:
X, Y = [], []

for _, row in df.iterrows():
  X.append(eval(row['original h0']))
  Y.append(row['target domain'])

X, Y = np.array(X), np.array(Y)

print(train_and_classify(X, Y, CLF_TYPE))

X_train: (13, 512)  X_test: (4, 512)  Y_train: (13,)  Y_test: (4,)
              precision    recall  f1-score   support

        food       1.00      1.00      1.00         4

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4



### Predict target domain from adapted hidden representation

In [57]:
for step in np.arange(3):

  print(f'Step {step}')
  
  X, Y = [], []

  for _, row in df.iterrows():
    X.append(eval(row[f'adapted h0 s{step}']))
    Y.append(row['target domain'])

  X, Y = np.array(X), np.array(Y)

  print(train_and_classify(X, Y, CLF_TYPE))

Step 0
X_train: (13, 512)  X_test: (4, 512)  Y_train: (13,)  Y_test: (4,)
              precision    recall  f1-score   support

        food       1.00      1.00      1.00         4

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4

Step 1
X_train: (13, 512)  X_test: (4, 512)  Y_train: (13,)  Y_test: (4,)
              precision    recall  f1-score   support

        food       1.00      1.00      1.00         4

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4

Step 2
X_train: (13, 512)  X_test: (4, 512)  Y_train: (13,)  Y_test: (4,)
              precision    recall  f1-score   support

        food       1.00      1.00      1.00         4

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg  

### Predict listener domain from adapted hidden representation

In [None]:
for step in np.arange(3):

  print(f'Step {step}')

  X, Y = [], []
  for _, row in df.iterrows():
    X.append(eval(row[f'adapted h0 s{step}']))
    Y.append(row['listener domain'])

  X, Y = np.array(X), np.array(Y)

  print(train_and_classify(X, Y, CLF_TYPE))

### Predict listener domain from difference between adapted and non-adapted hidden representation

In [58]:
for step in np.arange(3):

  print(f'Step {step}')

  X, Y = [], []
  for _, row in df.iterrows():
    X.append(np.array(eval(row[f'adapted h0 s{step}'])) - np.array(eval(row['original h0'])))
    Y.append(row['listener domain'])

  X, Y = np.array(X), np.array(Y)

  print(train_and_classify(X, Y, CLF_TYPE))

Step 0
X_train: (13, 512)  X_test: (4, 512)  Y_train: (13,)  Y_test: (4,)




              precision    recall  f1-score   support

        food       1.00      1.00      1.00         4

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4

Step 1
X_train: (13, 512)  X_test: (4, 512)  Y_train: (13,)  Y_test: (4,)




              precision    recall  f1-score   support

        food       1.00      1.00      1.00         4

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4

Step 2
X_train: (13, 512)  X_test: (4, 512)  Y_train: (13,)  Y_test: (4,)
              precision    recall  f1-score   support

        food       1.00      1.00      1.00         4

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4





## Diagnostic clustering

In [5]:
def cluster_and_plot(X, Y, k=5):
  kmeans = KMeans(n_clusters=k, random_state=1).fit(X)
  Y_pred = kmeans.labels_

  print(f'Adjusted rand score: {adjusted_rand_score(Y_pred, Y)}')

  pca = PCA(2)
  X_2dim = pca.fit_transform(X)
  labels = np.unique(Y_pred)
  
  for i in labels:
      plt.scatter(X_2dim[Y_pred == i , 0] , X_2dim[Y_pred == i , 1] , label = i)
  plt.legend()
  plt.show()
  
  return kmeans, plt

### Cluster non-adapted hidden representations (vs. target domains)

In [1]:
X, Y = [], []

for _, row in df.iterrows():
  X.append(eval(row['original h0']))
  Y.append(row['target domain'])

X, Y = np.array(X), np.array(Y)

cluster_and_plot(X, Y)

NameError: ignored

### Cluster adapted hidden representations (vs. target domains)



In [82]:
for step in np.arange(3):

  print(f'Step {step}')

  X, Y = [], []

  for _, row in df.iterrows():
    X.append(eval(row[f'adapted h0 s{step}']))
    Y.append(row['target domain'])

  X, Y = np.array(X), np.array(Y)

  cluster_and_plot(X, Y)

17

### Cluster adapted hidden representations (vs. listener domains)


In [None]:
for step in np.arange(3):

  print(f'Step {step}')

  X, Y = [], []

  for _, row in df.iterrows():
    X.append(eval(row[f'adapted h0 s{step}']))
    Y.append(row['listener domain'])

  X, Y = np.array(X), np.array(Y)

  cluster_and_plot(X, Y)

### Cluster difference between adapted and non-adapted hidden representations (vs. listener domains)


In [None]:
for step in np.arange(3):

  print(f'Step {step}')

  X, Y = [], []

  for _, row in df.iterrows():
    X.append(np.array(eval(row[f'adapted h0 s{step}'])) - np.array(eval(row['original h0'])))
    Y.append(row['listener domain'])

  X, Y = np.array(X), np.array(Y)

  cluster_and_plot(X, Y)