In [14]:
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from gensim.models import KeyedVectors
import joblib
from data_utils import ml_read_data



In [15]:
glove_model = KeyedVectors.load_word2vec_format('glove.twitter.27B.200d.bin', binary=True)
emoji2vec_model = KeyedVectors.load_word2vec_format('emoji2vec_twitter.bin', binary=True)

In [16]:
X_train, y_train, X_emoji_train, y_emoji_train = ml_read_data("train_sentence.csv", "train_label.csv", glove_model, emoji2vec_model)
X_test, y_test, X_emoji_test, y_emoji_test = ml_read_data("test_sentence.csv", "test_label.csv", glove_model, emoji2vec_model)
X_subtest, y_subtest, X_emoji_subtest, y_emoji_subtest = ml_read_data("subtest_sentence.csv", "subtest_label.csv", glove_model, emoji2vec_model)

## Support Vector Machine(SVM)

In [17]:
try:
	svm_classifier = joblib.load(f'baseline_models/svm_classifier.pkl')
	svm_classifier_we = joblib.load(f'baseline_models/svm_classifier_we.pkl')
	print("Loaded SVM models from files successfully.")
except FileNotFoundError:
	print("Training single-modal models:")
	svm_classifier = SVC()
	svm_classifier.fit(X_train, y_train)
	print("Training multi-modal models:")
	svm_classifier_we = SVC()
	svm_classifier_we.fit(X_emoji_train, y_emoji_train)
	print("Saving models")
	joblib.dump(svm_classifier, 'baseline_models/svm_classifier.pkl')
	joblib.dump(svm_classifier_we, 'baseline_models/svm_classifier_we.pkl')
	print("Saved models to files successfully.")

Loaded SVM models from files successfully.


In [18]:
svm_y_pred = svm_classifier.predict(X_test).astype(int)
svm_y_emoji_pred = svm_classifier_we.predict(X_emoji_test).astype(int)
svm_y_sub_pred = svm_classifier.predict(X_subtest).astype(int)
svm_y_sub_emoji_pred = svm_classifier_we.predict(X_emoji_subtest).astype(int)

svm_accuracy = accuracy_score(y_test, svm_y_pred)
svm_accuracy_emoji = accuracy_score(y_test, svm_y_emoji_pred)
svm_accuracy_sub = accuracy_score(y_subtest, svm_y_sub_pred)
svm_accuracy_sub_emoji = accuracy_score(y_subtest, svm_y_sub_emoji_pred)
print("SVM Accuracy single-modal:", svm_accuracy)
print("SVM Accuracy multi-modal:", svm_accuracy_emoji)
print("SVM Accuracy single-modal in subtest:", svm_accuracy_sub)
print("SVM Accuracy multi-modal in subtest:", svm_accuracy_sub_emoji)

SVM Accuracy single-modal: 0.769
SVM Accuracy multi-modal: 0.763
SVM Accuracy single-modal in subtest: 0.8129496402877698
SVM Accuracy multi-modal in subtest: 0.8237410071942446


##  Decision Tree(DT)

In [19]:
try:
	dt_classifier = joblib.load(f'baseline_models/dt_classifier.pkl')
	dt_classifier_we = joblib.load(f'baseline_models/dt_classifier_we.pkl')
	print("Loaded dt models from files successfully.")
except FileNotFoundError:
	print("Training single-modal models:")
	dt_classifier = DecisionTreeClassifier()
	dt_classifier.fit(X_train, y_train)
	print("Training multi-modal models:")
	dt_classifier_we = SVC()
	dt_classifier_we.fit(X_emoji_train, y_emoji_train)
	print("Saving models")
	joblib.dump(dt_classifier, 'baseline_models/dt_classifier.pkl')
	joblib.dump(dt_classifier_we, 'baseline_models/dt_classifier_we.pkl')
	print("Saved models to files successfully.")

Loaded dt models from files successfully.


In [20]:
dt_y_pred = dt_classifier.predict(X_test).astype(int)
dt_y_emoji_pred = dt_classifier_we.predict(X_emoji_test).astype(int)
dt_y_sub_pred = dt_classifier.predict(X_subtest).astype(int)
dt_y_sub_emoji_pred = dt_classifier_we.predict(X_emoji_subtest).astype(int)

dt_accuracy = accuracy_score(y_test, dt_y_pred)
dt_accuracy_emoji = accuracy_score(y_test, dt_y_emoji_pred)
dt_accuracy_sub = accuracy_score(y_subtest, dt_y_sub_pred)
dt_accuracy_sub_emoji = accuracy_score(y_subtest, dt_y_sub_emoji_pred)
print("Decision Tree Accuracy single-modal:", dt_accuracy)
print("Decision Tree Accuracy multi-modal:", dt_accuracy_emoji)
print("Decision Tree Accuracy single-modal in subtest:", dt_accuracy_sub)
print("Decision Tree Accuracy multi-modal in subtest:", dt_accuracy_sub_emoji)

Decision Tree Accuracy single-modal: 0.7265
Decision Tree Accuracy multi-modal: 0.7295
Decision Tree Accuracy single-modal in subtest: 0.7769784172661871
Decision Tree Accuracy multi-modal in subtest: 0.7985611510791367


## Random Forest(RF)

In [21]:
try:
	rf_classifier = joblib.load(f'baseline_models/rf_classifier.pkl')
	rf_classifier_we = joblib.load(f'baseline_models/rf_classifier_we.pkl')
	print("Loaded rf models from files successfully.")
except FileNotFoundError:
	print("Training single-modal models:")
	rf_classifier = SVC()
	rf_classifier.fit(X_train, y_train)
	print("Training multi-modal models:")
	rf_classifier_we = RandomForestClassifier()
	rf_classifier_we.fit(X_emoji_train, y_emoji_train)
	print("Saving models")
	joblib.dump(rf_classifier, 'baseline_models/rf_classifier.pkl')
	joblib.dump(rf_classifier_we, 'baseline_models/rf_classifier_we.pkl')
	print("Saved models to files successfully.")

Loaded rf models from files successfully.


In [22]:
rf_y_pred = rf_classifier.predict(X_test).astype(int)
rf_y_emoji_pred = rf_classifier_we.predict(X_emoji_test).astype(int)
rf_y_sub_pred = rf_classifier.predict(X_subtest).astype(int)
rf_y_sub_emoji_pred = rf_classifier_we.predict(X_emoji_subtest).astype(int)

rf_accuracy = accuracy_score(y_test, rf_y_pred)
rf_accuracy_emoji = accuracy_score(y_test, rf_y_emoji_pred)
rf_accuracy_sub = accuracy_score(y_subtest, rf_y_sub_pred)
rf_accuracy_sub_emoji = accuracy_score(y_subtest, rf_y_sub_emoji_pred)
print("Random Forest Accuracy single-modal:", rf_accuracy)
print("Random Forest Accuracy multi-modal:", rf_accuracy_emoji)
print("Random Forest Accuracy single-modal in subtest:", rf_accuracy_sub)
print("Random Forest Accuracy multi-modal in subtest:", rf_accuracy_sub_emoji)

Random Forest Accuracy single-modal: 0.8145
Random Forest Accuracy multi-modal: 0.818
Random Forest Accuracy single-modal in subtest: 0.8057553956834532
Random Forest Accuracy multi-modal in subtest: 0.8525179856115108


## Gradient Boosting(GBT)

In [23]:
try:
	gbt_classifier = joblib.load(f'baseline_models/gbt_classifier.pkl')
	gbt_classifier_we = joblib.load(f'baseline_models/gbt_classifier_we.pkl')
	print("Loaded gbt models from files successfully.")
except FileNotFoundError:
	print("Training single-modal models:")
	gbt_classifier = SVC()
	gbt_classifier.fit(X_train, y_train)
	print("Training multi-modal models:")
	gbt_classifier_we = GradientBoostingClassifier()
	gbt_classifier_we.fit(X_emoji_train, y_emoji_train)
	print("Saving models")
	joblib.dump(gbt_classifier, 'baseline_models/gbt_classifier.pkl')
	joblib.dump(gbt_classifier_we, 'baseline_models/gbt_classifier_we.pkl')
	print("Saved models to files successfully.")

Loaded gbt models from files successfully.


In [24]:
gbt_y_pred = gbt_classifier.predict(X_test).astype(int)
gbt_y_emoji_pred = gbt_classifier_we.predict(X_emoji_test).astype(int)
gbt_y_sub_pred = gbt_classifier.predict(X_subtest).astype(int)
gbt_y_sub_emoji_pred = gbt_classifier_we.predict(X_emoji_subtest).astype(int)

gbt_accuracy = accuracy_score(y_test, gbt_y_pred)
gbt_accuracy_emoji = accuracy_score(y_test, gbt_y_emoji_pred)
gbt_accuracy_sub = accuracy_score(y_subtest, gbt_y_sub_pred)
gbt_accuracy_sub_emoji = accuracy_score(y_subtest, gbt_y_sub_emoji_pred)
print("Gradient Boosting Accuracy single-modal:", gbt_accuracy)
print("Gradient Boosting Accuracy multi-modal:", gbt_accuracy_emoji)
print("Gradient Boosting Accuracy single-modal in subtest:", gbt_accuracy_sub)
print("Gradient Boosting Accuracy multi-modal in subtest:", gbt_accuracy_sub_emoji)

Gradient Boosting Accuracy single-modal: 0.746
Gradient Boosting Accuracy multi-modal: 0.7475
Gradient Boosting Accuracy single-modal in subtest: 0.7949640287769785
Gradient Boosting Accuracy multi-modal in subtest: 0.7949640287769785
