<a href="https://colab.research.google.com/github/mamonalsalihy/Emotion_Detection/blob/main/Models/SVM_github_copy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Imports

In [None]:
import  numpy as np
import  os
import  pandas as pd
from    sklearn.feature_extraction.text import TfidfVectorizer
from    sklearn.metrics import precision_recall_fscore_support, accuracy_score, confusion_matrix, classification_report
from    sklearn.model_selection import train_test_split 
from    sklearn.preprocessing import LabelEncoder
from    sklearn.svm import LinearSVC
import  seaborn as sns
import  matplotlib.pyplot as plt
from    tqdm.auto import tqdm
from    sklearn.pipeline import Pipeline

#Setup paths

In [None]:
train_path = './train.csv'
valid_path = './valid.csv'
test_path = './test.csv'

# read in data
train = pd.read_csv(train_path)
valid = pd.read_csv(valid_path)
test  = pd.read_csv(test_path)

#SVM classifier

In [None]:
class SVM_Classifier:

  """
  class def for SVM classifier.
  """
  
  def __init__(self, X_train, y_train, liwc=False):
  
    """
    method for training and evalutating SVM classifier. input data
    is expected to be textutal, and will be vectorizered using
    tf-idf.
      paramtrs:
        X_train: type: iterable(str)
          the input data to train svm on, each text sample is 
          expected to be in raw str form. 
        y_train: type: iterable(str or int)
          the output/labels the svm to traget during
          training.
      return: none
    """

    # svm and tf-idf instances
    _svm = LinearSVC()
    _tfidf_vec = TfidfVectorizer(sublinear_tf=True,
                                 norm='l1',
                                 encoding='utf-8',
                                 stop_words=None)

    # defined pipeline
    if liwc: self.svm = _svm
    else:    self.svm = Pipeline([('tfidf', _tfidf_vec), 
                                  ('svm', _svm)])
  
    # fit on provided trainining data
    self.svm.fit(X_train, y_train)

  def evaluate_svm(self, X_test, y_test, accuracy=True, 
                   confusion=True):
  
    """
    evaluate the SVM on test data. produces precision, recall,
    f1, accuracy scores per sklearn's accuracy_report. also
    produces a confusion matrix.
      paramtrs:
        X_test: type: iterable(str)
          test data where samples are in form of a raw string.
        y_test: type: iterable(str or int)
          the targets of the data.
        accuracy: type: bool
          specify whether to produce the accuracy report.
        confusion: type: bool
          specify whether to produce the confusion matrix
          on the provided test set.
      return: none
    """

    # predict on provided test data
    y_test_pred = self.svm.predict(X_test)

    # report performance on test data
    if accuracy:
      report = classification_report(y_test, y_test_pred)
      print(report)

    # produce confusion matrix on test
    if confusion:
      # get unique taget labels
      unique_labels = list(set(y_test.tolist()))

      # get confusion matrix
      conf_mat =\
        confusion_matrix(y_test, y_test_pred, labels=unique_labels)
         
      # intialize heatmap with seaborn, specify axis, plot names
      fig, _ = plt.subplots(figsize=(10,10))
      sns.heatmap(conf_mat, annot=True, fmt='d',
                  xticklabels=unique_labels, yticklabels=unique_labels)
      plt.title('SVM Confusion on Utterances Test')
      plt.ylabel('Actual')
      plt.xlabel('Predicted')
      plt.show()

#Method for extracting text, targets

In [None]:
def extract_instances(data, in_col, out_col):

  """
  method for extracting input and output data for SVM
  training or eval. data is expected to be contained
  in data frame.
    data: type: pd.df
      dataframe holding desired input, output data.
    in_col: type: str
      name of the column in provided dataframe holding
      the input data.
    out_col: type: str
      name of the column in provided dataframe holding
      the input data.
    return:
      in_data: type: iterable
        the input data.
      out_date: type: iterable
        the outpu data.
  """

  # filter data appropriately
  temp = data[[in_col, out_col]].drop_duplicates().dropna()

  # get in, out data
  in_data, out_data  = temp[in_col], temp[out_col]

  return in_data, out_data


#SVM from prompts to 32 emotion labels

In [None]:
# extract prompts, labels data
train_prompt, train_prompt_labels = extract_instances(train, 'clean_prompt', 'context')
#valid_prompt, valid_prompt_labels = extract_instances(valid, 'clean_prompt', 'context')
test_prompt, test_prompt_labels = extract_instances(test, 'clean_prompt', 'context')

In [None]:
# train svm 
clf = SVM_Classifier(train_prompt, train_prompt_labels)

In [None]:
# evalute it
#clf.evaluate_svm(valid_prompt, valid_prompt_labels)
clf.evaluate_svm(test_prompt, test_prompt_labels)

#SVM from utterances to 32 emotion labels

In [None]:
# extract utterances, emotion label
train_utter, train_utter_labels = extract_instances(train, 'clean_utterance', 'context')
#valid_utter, valid_utter_labels = extract_instances(valid, 'clean_utterance', 'context')
test_utter, test_utter_labels = extract_instances(test, 'clean_utterance', 'context')

In [None]:
# train svm 
clf = SVM_Classifier(train_utter, train_utter_labels)

In [None]:
# evalute it
#clf.evaluate_svm(valid_utter, valid_utter_labels)
clf.evaluate_svm(test_utter, test_utter_labels)

#SVM from prompts to sentiment

In [None]:
# extract prompt, sentiment from data
train_prompt, train_sent = extract_instances(train, 'clean_prompt', 'emotion_category')
#valid_prompt, valid_sent = extract_instances(valid, 'clean_prompt', 'emotion_category')
test_prompt, test_sent = extract_instances(test, 'clean_prompt', 'emotion_category')

In [None]:
# train svm 
clf = SVM_Classifier(train_prompt, train_sent)

In [None]:
# evalute it
#clf.evaluate_svm(valid_prompt, valid_sent)
clf.evaluate_svm(test_prompt, test_sent)

#SVM from utterances to sentiment

In [None]:
# extract utterances, sentiment from data
train_utter, train_sent = extract_instances(train, 'clean_utterance', 'emotion_category')
#valid_utter, valid_sent = extract_instances(valid, 'clean_utter', 'emotion_category')
test_utter, test_sent = extract_instances(test, 'clean_utterance', 'emotion_category')

In [None]:
# train svm 
clf = SVM_Classifier(train_utter, train_sent)

In [None]:
# evalute it
#clf.evaluate_svm(valid_utter, valid_sent)
clf.evaluate_svm(test_utter, test_sent)

#SVM from speaker utterances to emotion

In [None]:
# extract spekaer data
train_speaker = train[train["speaker_label"] == "speaker"]
valid_speaker = valid[valid["speaker_label"] == "speaker"]
test_speaker  = test[test["speaker_label"] == "speaker"]

# extract text, labels
train_utter, train_utter_labels = extract_instances(train_speaker, 'clean_utterance', 'context')
#valid_utter, valid_utter_labels = extract_instances(valid_speaker, 'clean_utterance', 'context')
test_utter, test_utter_labels = extract_instances(test_speaker, 'clean_utterance', 'context')

In [None]:
# train svm 
clf = SVM_Classifier(train_utter, train_utter_labels)

In [None]:
# evalute it
#clf.evaluate_svm(valid_utter, valid_utter_labels)
clf.evaluate_svm(test_utter, test_utter_labels)

#SVM from listerner utterances to emotion

In [None]:
# extract listener data
train_listener = train[train["speaker_label"] == "listener"]
valid_listener = valid[valid["speaker_label"] == "listener"]
test_listener  = test[test["speaker_label"] == "listener"]

# extract text, labels
train_utter, train_utter_labels = extract_instances(train_listener, 'clean_utterance', 'context')
#valid_utter, valid_utter_labels = extract_instances(valid_listener, 'clean_utterance', 'context')
test_utter, test_utter_labels = extract_instances(test_listener, 'clean_utterance', 'context')

In [None]:
# train svm 
clf = SVM_Classifier(train_utter, train_utter_labels)

In [None]:
# evalute it
#clf.evaluate_svm(valid_utter, valid_utter_labels)
clf.evaluate_svm(test_utter, test_utter_labels)

#SVM from listener utterances to sentiment

In [None]:
# extract listener data
train_listener = train[train["speaker_label"] == "listener"]
valid_listener = valid[valid["speaker_label"] == "listener"]
test_listener  = test[test["speaker_label"] == "listener"]

# extract text, labels
train_utter, train_sent = extract_instances(train_listener, 'clean_utterance', 'emotion_category')
#valid_utter, valid_sent = extract_instances(valid_listener, 'clean_utterance', 'emotion_category')
test_utter, test_sent = extract_instances(test_listener, 'clean_utterance', 'emotion_category')

In [None]:
# train svm 
clf = SVM_Classifier(train_utter, train_sent)

In [None]:
# evalute it
#clf.evaluate_svm(valid_utter, valid_sent)
clf.evaluate_svm(test_utter, test_sent)

#SVM from speaker utterances to sentiment

In [None]:
# extract listener data
train_listener = train[train["speaker_label"] == "listener"]
valid_listener = valid[valid["speaker_label"] == "listener"]
test_listener  = test[test["speaker_label"] == "listener"]

# extract text, labels
train_utter, train_sent = extract_instances(train_listener, 'clean_utterance', 'emotion_category')
#valid_utter, valid_sent = extract_instances(valid_listener, 'clean_utterance', 'emotion_category')
test_utter, test_sent = extract_instances(test_listener, 'clean_utterance', 'emotion_category')

In [None]:
# train svm 
clf = SVM_Classifier(train_utter, train_sent)

In [None]:
# evalute it
#clf.evaluate_svm(valid_utter, valid_sent)
clf.evaluate_svm(test_utter, test_sent)

#SVM from LIWC to emotions

In [None]:
non_liwc = 'conv_id utterance_idx	prompt	speaker_idx	utterance	selfeval	tags	clean_prompt	clean_utterance	speaker_label'.split()

In [None]:
train_liwc = train.drop(columns=non_liwc).dropna()
valid_liwc = valid.drop(columns=non_liwc).dropna()
test_liwc  = test.drop(columns=non_liwc).dropna()

In [None]:
train_labels = train_liwc.context; train_liwc_feat = train_liwc.drop(columns=['context'])#.values.tolist()
#valid_labels = valid_liwc.context; valid_liwc_feat = valid_liwc.drop(columns=['context'])
test_labels = test_liwc.context; test_liwc_feat = test_liwc.drop(columns=['context'])

In [None]:
 # train svm 
clf = SVM_Classifier(train_liwc_feat, train_labels, liwc=True)

KeyboardInterrupt: ignored

In [None]:
# evalute it
#clf.evaluate_svm(test_liwc_feat, valid_labels)
clf.evaluate_svm(test_liwc_feat, test_labels)

              precision    recall  f1-score   support

      afraid       0.05      0.14      0.08        63
       angry       0.28      0.11      0.16       451
     annoyed       0.01      0.05      0.02        42
anticipating       0.52      0.51      0.51       172
     anxious       0.11      0.31      0.16        59
apprehensive       0.09      0.30      0.14        47
     ashamed       0.02      0.11      0.04        28
      caring       0.02      1.00      0.04         4
   confident       0.43      0.29      0.34       254
     content       0.32      0.28      0.30       193
  devastated       0.12      0.14      0.13       133
disappointed       0.00      0.00      0.00         5
   disgusted       0.06      0.61      0.11        18
 embarrassed       0.01      0.12      0.01         8
     excited       0.11      0.44      0.18        52
    faithful       0.32      0.47      0.39        80
     furious       0.13      0.05      0.07       403
    grateful       0.06    

#SVM from LIWC features to sentiment

In [None]:
non_liwc = 'conv_id utterance_idx	prompt	speaker_idx	utterance	selfeval	tags	clean_prompt	clean_utterance	speaker_label'.split()

In [None]:
train_liwc = train.drop(columns=non_liwc).dropna()
valid_liwc = valid.drop(columns=non_liwc).dropna()
test_liwc  = test.drop(columns=non_liwc).dropna()

In [None]:
train_labels = train_liwc['emotion_category']; train_liwc_feat = train_liwc.drop(columns=['emotion_category']).values.tolist()
#valid_labels = valid_liwc['emotion_category']; valid_liwc_feat = valid_liwc.drop(columns=['emotion_category'])
test_labels = test_liwc['emotion_category']; test_liwc_feat = test_liwc.drop(columns=['emotion_category'])

In [None]:
 # train svm 
clf = SVM_Classifier(train_liwc_feat, train_labels, liwc=True)

In [None]:
# evalute it
#clf.evaluate_svm(test_liwc_feat, valid_labels)
clf.evaluate_svm(test_liwc_feat, test_labels)