In [None]:
import numpy as np 
import pandas as pd 
import tensorflow as tf 
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Single Label

## emotion-twitter

In [None]:
emotion_twitter = 'downstream/e5-small/emot_emotion-twitter'

In [None]:
train_emotion_twitter = pd.read_csv(f'{emotion_twitter}/train_preprocess.csv')
valid_emotion_twitter = pd.read_csv(f'{emotion_twitter}/valid_preprocess.csv')
test_emotion_twitter = pd.read_csv(f'{emotion_twitter}/test_preprocess.csv')

train_emotion_twitter_embedding_e5 = np.load('downstream/e5-small/emot_emotion-twitter/train.npy')
valid_emotion_twitter_embedding_e5 = np.load('downstream/e5-small/emot_emotion-twitter/valid.npy')
test_emotion_twitter_embedding_e5 = np.load('downstream/e5-small/emot_emotion-twitter/test.npy')

train_emotion_twitter_embedding_distill = np.load('downstream/distill-e5-small/emot_emotion-twitter/train.npy')
valid_emotion_twitter_embedding_distill = np.load('downstream/distill-e5-small/emot_emotion-twitter/valid.npy')
test_emotion_twitter_embedding_distill = np.load('downstream/distill-e5-small/emot_emotion-twitter/test.npy')

label_list = train_emotion_twitter['label'].unique().tolist()

train_emotion_twitter_label = []
for label in train_emotion_twitter['label']:
    _label = label_list.index(label)
    train_emotion_twitter_label.append(_label)

valid_emotion_twitter_label = []
for label in valid_emotion_twitter['label']:
    _label = label_list.index(label)
    valid_emotion_twitter_label.append(_label)

test_emotion_twitter_label = []
for label in test_emotion_twitter['label']:
    _label = label_list.index(label)
    test_emotion_twitter_label.append(_label)



In [None]:
train_emotion_twitter_embedding_distill.shape

In [None]:
model_e5 = LogisticRegression(random_state=42)
model_e5.fit(train_emotion_twitter_embedding_e5.reshape(train_emotion_twitter_embedding_e5.shape[0], -1), train_emotion_twitter_label)
y_pred = model_e5.predict(test_emotion_twitter_embedding_e5.reshape(test_emotion_twitter_embedding_e5.shape[0], -1))

accuracy = accuracy_score(y_pred, test_emotion_twitter_label)
print(f"Accuracy on E5-Small: {accuracy:.4f}")

model_distill = LogisticRegression(random_state=42)
model_distill.fit(train_emotion_twitter_embedding_distill.reshape(train_emotion_twitter_embedding_distill.shape[0], -1), train_emotion_twitter_label)
y_pred = model_distill.predict(test_emotion_twitter_embedding_distill.reshape(test_emotion_twitter_embedding_distill.shape[0], -1))

accuracy = accuracy_score(y_pred, test_emotion_twitter_label)
print(f"Accuracy on Distill E5-Small: {accuracy:.4f}")



In [None]:
model_e5 = GaussianNB()
model_e5.fit(train_emotion_twitter_embedding_e5.reshape(train_emotion_twitter_embedding_e5.shape[0], -1), train_emotion_twitter_label)
y_pred = model_e5.predict(test_emotion_twitter_embedding_e5.reshape(test_emotion_twitter_embedding_e5.shape[0], -1))

accuracy = accuracy_score(y_pred, test_emotion_twitter_label)
print(f"Accuracy on E5-Small: {accuracy:.4f}")

model_distill = GaussianNB()
model_distill.fit(train_emotion_twitter_embedding_distill.reshape(train_emotion_twitter_embedding_distill.shape[0], -1), train_emotion_twitter_label)
y_pred = model_distill.predict(test_emotion_twitter_embedding_distill.reshape(test_emotion_twitter_embedding_distill.shape[0], -1))

accuracy = accuracy_score(y_pred, test_emotion_twitter_label)
print(f"Accuracy on Distill E5-Small: {accuracy:.4f}")



## sentiment-prosa

In [None]:
sentiment_prosa = 'downstream/e5-small/smsa_doc-sentiment-prosa'

In [None]:
train_sentiment_prosa = pd.read_csv(f'{sentiment_prosa}/train_preprocess.tsv', sep='\t', header=None)
train_sentiment_prosa.columns = ['text', 'label']

valid_sentiment_prosa = pd.read_csv(f'{sentiment_prosa}/valid_preprocess.tsv', sep='\t', header=None)
valid_sentiment_prosa.columns = ['text', 'label']

test_sentiment_prosa = pd.read_csv(f'{sentiment_prosa}/test_preprocess.tsv', sep='\t', header=None)
test_sentiment_prosa.columns = ['text', 'label']

train_sentiment_prosa_embedding_e5 = np.load('downstream/e5-small/smsa_doc-sentiment-prosa/train.npy')
valid_sentiment_prosa_embedding_e5 = np.load('downstream/e5-small/smsa_doc-sentiment-prosa/valid.npy')
test_sentiment_prosa_embedding_e5 = np.load('downstream/e5-small/smsa_doc-sentiment-prosa/test.npy')

train_sentiment_prosa_embedding_distill = np.load('downstream/distill-e5-small/smsa_doc-sentiment-prosa/train.npy')
valid_sentiment_prosa_embedding_distill = np.load('downstream/distill-e5-small/smsa_doc-sentiment-prosa/valid.npy')
test_sentiment_prosa_embedding_distill = np.load('downstream/distill-e5-small/smsa_doc-sentiment-prosa/test.npy')

label_list_prosa = train_sentiment_prosa['label'].unique().tolist()

train_sentiment_prosa_label = []
for label in train_sentiment_prosa['label']:
    _label = label_list_prosa.index(label)
    train_sentiment_prosa_label.append(_label)

valid_sentiment_prosa_label = []
for label in valid_sentiment_prosa['label']:
    _label = label_list_prosa.index(label)
    valid_sentiment_prosa_label.append(_label)

test_sentiment_prosa_label = []
for label in test_sentiment_prosa['label']:
    _label = label_list_prosa.index(label)
    test_sentiment_prosa_label.append(_label)

In [None]:
model_e5 = LogisticRegression(random_state=42)
model_e5.fit(train_sentiment_prosa_embedding_e5.reshape(train_sentiment_prosa_embedding_e5.shape[0], -1), train_sentiment_prosa_label)
y_pred = model_e5.predict(test_sentiment_prosa_embedding_e5.reshape(test_sentiment_prosa_embedding_e5.shape[0], -1))

accuracy = accuracy_score(y_pred, test_sentiment_prosa_label)
print(f"Accuracy on E5-Small: {accuracy:.4f}")

model_distill = LogisticRegression(random_state=42)
model_distill.fit(train_sentiment_prosa_embedding_distill.reshape(train_sentiment_prosa_embedding_distill.shape[0], -1), train_sentiment_prosa_label)
y_pred = model_distill.predict(test_sentiment_prosa_embedding_distill.reshape(test_sentiment_prosa_embedding_distill.shape[0], -1))

accuracy = accuracy_score(y_pred, test_sentiment_prosa_label)
print(f"Accuracy on Distill E5-Small: {accuracy:.4f}")



In [None]:
model_e5 = GaussianNB()
model_e5.fit(train_sentiment_prosa_embedding_e5.reshape(train_sentiment_prosa_embedding_e5.shape[0], -1), train_sentiment_prosa_label)
y_pred = model_e5.predict(test_sentiment_prosa_embedding_e5.reshape(test_sentiment_prosa_embedding_e5.shape[0], -1))

accuracy = accuracy_score(y_pred, test_sentiment_prosa_label)
print(f"Accuracy on E5-Small: {accuracy:.4f}")

model_distill = GaussianNB()
model_distill.fit(train_sentiment_prosa_embedding_distill.reshape(train_sentiment_prosa_embedding_distill.shape[0], -1), train_sentiment_prosa_label)
y_pred = model_distill.predict(test_sentiment_prosa_embedding_distill.reshape(test_sentiment_prosa_embedding_distill.shape[0], -1))

accuracy = accuracy_score(y_pred, test_sentiment_prosa_label)
print(f"Accuracy on Distill E5-Small: {accuracy:.4f}")

