# Importing Neccessary Libraries

In [1]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import emoji
import re
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Setting Data Paths

In [41]:
data_path_train = 'Train\eRisk2023-T3_Subject'
no_of_training_instances = 46
data_path_test = 'Test\eRisk2022-T3_Subject'
no_of_testing_instances = 28
number_of_questions = 22

# Data Cleaning Functions

1. **Making the name of the Columns Same**

In [3]:
def make_coloumns_same(data_path, no_of_instances):

    for i in range(1, no_of_instances+1):
        df = pd.read_csv(f'{data_path}{i}.csv')
        
        # If a column is called "WRITING.TEXT", rename it to "WRITING.TEXT.Element:Text"
        if "WRITING.TEXT" in df.columns:
            df.rename(columns={"WRITING.TEXT": "WRITING.TEXT.Element:Text"}, inplace=True)
            df.to_csv(f'{data_path}{i}.csv', index=False)
        else:
            continue

2. **Following Data Cleaning operations are performed:**


    1. Remove the trailing b" and the ending "
    2. Replacing unicode characters with their actual characters
    3. Replacing emojis with their text descriptions

In [4]:
def data_cleaning(data_path, no_of_instances):

    for i in range(1, no_of_instances + 1):

        df = pd.read_csv(f'{data_path}{i}.csv')

        for index, row in df.iterrows():

            text_value = row['WRITING.TEXT.Element:Text']

            if isinstance(text_value, str):

                if ((text_value[2] == "'" or text_value[2] == '"') and (text_value[-2] == "'" or text_value[-2] == '"') and (text_value[0] == ' ' and text_value[1] == 'b' and text_value[-1] == ' ')):
                    df.at[index, 'WRITING.TEXT.Element:Text'] = text_value[3:-2]
                    text_value = text_value[3:-2]

                # Replace specific substrings
                text_value = text_value.replace('\\xe2\\x80\\x99', "'")
                text_value = text_value.replace('\\xe2\\x80\\x94', "-")
                text_value = text_value.replace('\\n', " ")

                # Define the regular expression pattern to match words starting with \xf0\x9f
                pattern = r'\\xf0\\x9f\S*'
                # Replace matched words with a single space
                text_value = re.sub(pattern, ' ', text_value)

                # Replace emoji with their text description
                text_value = emoji.demojize(text_value)
                df.at[index, 'WRITING.TEXT.Element:Text'] = text_value
                
            elif isinstance(text_value, float) and np.isnan(text_value):
                # Handle NaN values if needed
                pass
        
        df.to_csv(f'{data_path}{i}.csv', index=False)

3. **Making the final DataFrame before applying models to it**


In [6]:
def make_dataframe(data_path, no_of_instances, type):
    
    data_list = []
    df_final = pd.DataFrame(columns=['Subject', 'text'])

    if type == 'train':
        subject_name = 'eRisk2023-T3_Subject'
    
    else:
        subject_name = 'eRisk2022-T3_Subject'

    for i in range(1, no_of_instances+1):
        # Read the CSV file
        df = pd.read_csv(f'{data_path}{i}.csv')
        
        # Append the data to df
        data_to_append = ""

        for index, row in df.iterrows():
            if isinstance(row['WRITING.TEXT.Element:Text'], str):
                data_to_append = data_to_append + row['WRITING.TEXT.Element:Text'] + " "

        final_data = {'Subject': f'{subject_name}{i}', 'text': data_to_append}
        data_list.append(final_data)

    df_final = pd.DataFrame(data_list)
    return df_final

# Data Preprocessing Functions

1. Lowercasing
2. Removing URLs
3. Removing Punctuations
4. Removing Stopwords
5. Lemmatization

In [7]:
def remove_urls(text):
    url_pattern = re.compile(r'https?://\S+|www\.\S+')
    return url_pattern.sub(r'', text)

In [8]:
def data_preprocessing(df):

    for index, row in df.iterrows():
        text_value = row['text']

        # lowercasing
        text_value = text_value.lower()

        # Remove URLs
        text_value = remove_urls(text_value)

        # Remove punctuation
        text_value = re.sub(r'[^\w\s]', '', text_value)

        # remove stopwords
        stop_words = set(stopwords.words('english'))
        text_value = ' '.join([word for word in text_value.split() if word not in stop_words])

        # lemmatization
        lemmatizer = WordNetLemmatizer()
        text_value = ' '.join([lemmatizer.lemmatize(word) for word in text_value.split()])
        
        # store the cleaned text
        df.at[index, 'text'] = text_value

    return df

# Storing all the labels in one Pandas DataFrame

In [9]:
def extract_labels(type):

    if type == 'train':

        label_list = []
        
        with open('golden-truth_eRisk2023_T3.txt', 'r') as file:
            for line in file:
                # go till the first space or tab is found
                index = 0
                while line[index] != ' ' and line[index] != '\t':
                    index += 1

                temp_list = []
                total_questions = 0

                # there are 22 questions, and each have an integer value separated by a space. store all these values sequentially in temp_list
                for i in range(22):
                    index += 1
                    while (line[index] == ' ' or line[index] == '\t'):
                        index += 1
                    temp_list.append(int(line[index]))
                    total_questions += 1

                label_list.append(temp_list)

    else:

        label_list = []
        
        with open('ground-truth_eRisk2022_T3.txt', 'r') as file:
            for line in file:
                # go till the first space or tab is found
                index = 0
                while line[index] != ' ' and line[index] != '\t':
                    index += 1

                temp_list = []
                total_questions = 0

                # there are 22 questions, and each have an integer value separated by a space. store all these values sequentially in temp_list
                for i in range(22):
                    index += 1
                    while (line[index] == ' ' or line[index] == '\t'):
                        index += 1
                    temp_list.append(int(line[index]))
                    total_questions += 1

                label_list.append(temp_list)

    return label_list

In [15]:
labels_train = extract_labels('train')
labels_test = extract_labels('test')

# Defining the Accuracy Metrics

In [63]:
def mean_zero_one_error(y_true, y_pred):

    individual_errors = []

    for i in range(len(y_true)):
        no_of_misclassifications = 0
        
        for j in range(len(y_true[i])):
            if y_true[i][j] != y_pred[i][j]:
                no_of_misclassifications += 1

        individual_errors.append(no_of_misclassifications/22.0)

    return np.mean(individual_errors)

In [65]:
def mean_absolute_error(y_true, y_pred):

    individual_errors = []

    for i in range(len(y_true)):
        total_error = 0
        
        for j in range(len(y_true[i])):
            total_error += abs(y_true[i][j] - y_pred[i][j])
            
        individual_errors.append(total_error/22.0)

    return np.mean(individual_errors)

In [67]:
def macroaveraged_mean_absolute_error(y_true, y_pred):

    individual_errors = []

    for i in range(len(y_true)):
        total_error = [0, 0, 0, 0, 0, 0, 0]
        total_points = [0, 0, 0, 0, 0, 0, 0]
        
        for j in range(len(y_true[i])):
            total_error[y_true[i][j]] += abs(y_true[i][j] - y_pred[i][j])
            total_points[y_true[i][j]] += 1

        final_error = 0

        for k in range(7):
            if total_points[k] != 0:
                final_error += total_error[k]/(total_points[k]*1.0)
        
        individual_errors.append(final_error/7.0)

    return np.mean(individual_errors)

In [69]:
def restrained_subscale(y_true, y_pred):
    
    indices = [0,1,2,3,4]
    total_error = 0

    for i in range(len(y_true)):

        system_score = 0
        real_score = 0

        for j in indices:
            system_score += y_pred[i][j]
            real_score += y_true[i][j]
        
        system_score = system_score/(len(indices)*1.0)
        real_score = real_score/(len(indices)*1.0)

        total_error += (system_score - real_score)**2

    total_error = total_error/(len(y_true)*1.0)
    total_error = np.sqrt(total_error)

    return total_error
    

In [70]:
def eating_concern_subscale(y_true, y_pred):
    
    indices = [6,8,12,13,14]
    total_error = 0

    for i in range(len(y_true)):

        system_score = 0
        real_score = 0

        for j in indices:
            system_score += y_pred[i][j]
            real_score += y_true[i][j]
        
        system_score = system_score/(len(indices)*1.0)
        real_score = real_score/(len(indices)*1.0)

        total_error += (system_score - real_score)**2

    total_error = total_error/(len(y_true)*1.0)
    total_error = np.sqrt(total_error)

    return total_error

In [72]:
def shape_concern_subscale(y_true, y_pred):
    
    indices = [5,7,9,10,16,19,20,21]
    total_error = 0

    for i in range(len(y_true)):

        system_score = 0
        real_score = 0

        for j in indices:
            system_score += y_pred[i][j]
            real_score += y_true[i][j]
        
        system_score = system_score/(len(indices)*1.0)
        real_score = real_score/(len(indices)*1.0)

        total_error += (system_score - real_score)**2

    total_error = total_error/(len(y_true)*1.0)
    total_error = np.sqrt(total_error)

    return total_error

In [73]:
def weight_concern_subscale(y_true, y_pred):

    indices = [7,11,15,17,18]
    total_error = 0

    for i in range(len(y_true)):

        system_score = 0
        real_score = 0

        for j in indices:
            system_score += y_pred[i][j]
            real_score += y_true[i][j]
        
        system_score = system_score/(len(indices)*1.0)
        real_score = real_score/(len(indices)*1.0)

        total_error += (system_score - real_score)**2

    total_error = total_error/(len(y_true)*1.0)
    total_error = np.sqrt(total_error)

    return total_error

In [74]:
def global_ED(y_true, y_pred):

    indices_r = [0,1,2,3,4]
    indices_e = [6,8,12,13,14]
    indices_s = [5,7,9,10,16,19,20,21]
    indices_w = [7,11,15,17,18]

    size_lists = [5,5,8,5]

    total_error = 0

    for i in range(len(y_true)):

        system_score = [0, 0, 0, 0]
        real_score = [0, 0, 0, 0]
        total_error_system = 0
        total_error_real = 0

        for j in indices_r:
            system_score[0] += y_pred[i][j]
            real_score[0] += y_true[i][j]
        
        for j in indices_e:
            system_score[1] += y_pred[i][j]
            real_score[1] += y_true[i][j]
        
        for j in indices_s:
            system_score[2] += y_pred[i][j]
            real_score[2] += y_true[i][j]
        
        for j in indices_w:
            system_score[3] += y_pred[i][j]
            real_score[3] += y_true[i][j]

        for k in range(4):
            system_score[k] = system_score[k]/(size_lists[k]*1.0)
            real_score[k] = real_score[k]/(size_lists[k]*1.0)

            total_error_system += system_score[k]
            total_error_real += real_score[k]
        
        total_error_system = total_error_system/(4.0)
        total_error_real = total_error_real/(4.0)

        total_error += (total_error_system - total_error_real)**2

    total_error = total_error/(len(y_true)*1.0)
    total_error = np.sqrt(total_error)

    return total_error

In [75]:
def find_all_metrics(y_true, y_pred):

    print("Mean Zero-One Error: ", mean_zero_one_error(y_true, y_pred))
    print("Mean Absolute Error: ", mean_absolute_error(y_true, y_pred))
    print("Macroaveraged Mean Absolute Error: ", macroaveraged_mean_absolute_error(y_true, y_pred))
    print("Restrained Subscale: ", restrained_subscale(y_true, y_pred))
    print("Eating Concern Subscale: ", eating_concern_subscale(y_true, y_pred))
    print("Shape Concern Subscale: ", shape_concern_subscale(y_true, y_pred))
    print("Weight Concern Subscale: ", weight_concern_subscale(y_true, y_pred))
    print("Global ED: ", global_ED(y_true, y_pred))

# Machine Learning Models

**Multinomial Naive Bayes**

In [48]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer


def multinomial_naves_bayes(question_number, labels_train, labels_test, df_train, df_test):

    nb = Pipeline([('vect', CountVectorizer()),
               ('tfidf', TfidfTransformer()),
               ('clf', MultinomialNB()),
              ])
    
    # Train the model
    X_train = df_train['text']
    y_train = []

    for i in range(len(labels_train)):
        y_train.append(labels_train[i][question_number])

    nb.fit(X_train, y_train)

    # Test the model
    X_test = df_test['text']
    y_pred = nb.predict(X_test)

    return y_pred

**Linear Support Vector Machine**

In [77]:
from sklearn.linear_model import SGDClassifier

def linear_SVM(question_number, labels_train, labels_test, df_train, df_test):

    sgd = Pipeline([('vect', CountVectorizer()),
                ('tfidf', TfidfTransformer()),
                ('clf', SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, random_state=42, max_iter=5, tol=None)),
               ])

    # Train the model
    X_train = df_train['text']
    y_train = []

    for i in range(len(labels_train)):
        y_train.append(labels_train[i][question_number])
    
    sgd.fit(X_train, y_train)

    # Test the model
    X_test = df_test['text']
    y_pred = sgd.predict(X_test)

    return y_pred

**Logistic Regression**

In [83]:
from sklearn.linear_model import LogisticRegression

def logistic_regression(question_number, labels_train, labels_test, df_train, df_test):

    logreg = Pipeline([('vect', CountVectorizer()),
                ('tfidf', TfidfTransformer()),
                ('clf', LogisticRegression(n_jobs=1, C=1e5)),
               ])

    # Train the model
    X_train = df_train['text']
    y_train = []

    for i in range(len(labels_train)):
        y_train.append(labels_train[i][question_number])
    
    logreg.fit(X_train, y_train)

    # Test the model
    X_test = df_test['text']
    y_pred = logreg.predict(X_test)

    return y_pred

# Preprocessing Training Data

In [35]:
make_coloumns_same(data_path_train, no_of_training_instances)
data_cleaning(data_path_train, no_of_training_instances)
df_train = make_dataframe(data_path_train, no_of_training_instances, 'train')
df_train = data_preprocessing(df_train)

df_train.head()

Unnamed: 0,Subject,text
0,eRisk2023-T3_Subject1,idk mate since 1796 using vaccine still waitin...
1,eRisk2023-T3_Subject2,loved ideaconcept feel like film left many thi...
2,eRisk2023-T3_Subject3,dont need certification start working h motor ...
3,eRisk2023-T3_Subject4,leangains method rept fitness workout tracker ...
4,eRisk2023-T3_Subject5,think opinion system led anxiety havent change...


In [36]:
df_train

Unnamed: 0,Subject,text
0,eRisk2023-T3_Subject1,idk mate since 1796 using vaccine still waitin...
1,eRisk2023-T3_Subject2,loved ideaconcept feel like film left many thi...
2,eRisk2023-T3_Subject3,dont need certification start working h motor ...
3,eRisk2023-T3_Subject4,leangains method rept fitness workout tracker ...
4,eRisk2023-T3_Subject5,think opinion system led anxiety havent change...
5,eRisk2023-T3_Subject6,volunteer waited late 30 family took much ther...
6,eRisk2023-T3_Subject7,find self thinking food lot wanting eat etc ac...
7,eRisk2023-T3_Subject8,wale country catalonia 3xe2x98xbaxefxb8x8f def...
8,eRisk2023-T3_Subject9,function word everything task content word muc...
9,eRisk2023-T3_Subject10,em polypharmacy much 80yo gramma taking idea s...


In [37]:
df_train.to_csv('train.csv', index=False)

# Preprocessing Testing Data

In [38]:
make_coloumns_same(data_path_test, no_of_testing_instances)
data_cleaning(data_path_test, no_of_testing_instances)
df_test = make_dataframe(data_path_test, no_of_testing_instances, 'test')
df_test = data_preprocessing(df_test)

df_test.head()

Unnamed: 0,Subject,text
0,eRisk2022-T3_Subject1,thats thats post fucking utterly completely lo...
1,eRisk2022-T3_Subject2,shes wrong 25 warm rtightpussy thats spicy gir...
2,eRisk2022-T3_Subject3,feel way ill sometimes starve im feeling stres...
3,eRisk2022-T3_Subject4,yoga vvvv helpful slowly concentrate movement ...
4,eRisk2022-T3_Subject5,im experience ive owned patent leather suede b...


In [39]:
df_test

Unnamed: 0,Subject,text
0,eRisk2022-T3_Subject1,thats thats post fucking utterly completely lo...
1,eRisk2022-T3_Subject2,shes wrong 25 warm rtightpussy thats spicy gir...
2,eRisk2022-T3_Subject3,feel way ill sometimes starve im feeling stres...
3,eRisk2022-T3_Subject4,yoga vvvv helpful slowly concentrate movement ...
4,eRisk2022-T3_Subject5,im experience ive owned patent leather suede b...
5,eRisk2022-T3_Subject6,one find kidnapping inpregnation thing unsette...
6,eRisk2022-T3_Subject7,home rholup 175 favorite place visit paris wen...
7,eRisk2022-T3_Subject8,youre friend could honest directly tell making...
8,eRisk2022-T3_Subject9,oh fun ive lost equivalent beagle 16kg yup gnu...
9,eRisk2022-T3_Subject10,wash hair often greasy hair absorbs scent much...


In [40]:
df_test.to_csv('test.csv', index=False)

# Multinomial Naives Bayes

In [50]:
multinomial_naive_bayes_predictions = []

for i in range(0, number_of_questions):
    print(f'Question {i+1}')
    multinomial_naive_bayes_predictions.append(multinomial_naves_bayes(i, labels_train, labels_test, df_train, df_test))

Question 1
Question 2
Question 3
Question 4
Question 5
Question 6
Question 7
Question 8
Question 9
Question 10
Question 11
Question 12
Question 13
Question 14
Question 15
Question 16
Question 17
Question 18
Question 19
Question 20
Question 21
Question 22


In [61]:
multinomial_naive_bayes_predictions_np = np.array(multinomial_naive_bayes_predictions)
multinomial_naive_bayes_predictions_np = multinomial_naive_bayes_predictions_np.T
print(multinomial_naive_bayes_predictions_np.shape)

(28, 22)


In [62]:
labels_test_np = np.array(labels_test)
print(labels_test_np.shape)

(28, 22)


In [76]:
find_all_metrics(labels_test_np, multinomial_naive_bayes_predictions_np)

Mean Zero-One Error:  0.6753246753246753
Mean Absolute Error:  2.340909090909091
Macroaveraged Mean Absolute Error:  1.7449807990523618
Restrained Subscale:  2.5793133083937563
Eating Concern Subscale:  3.0173308924090034
Shape Concern Subscale:  1.9142324086394824
Weight Concern Subscale:  1.9142324086394824
Global ED:  1.8380655410065536


# Linear Support Vector Machine

In [78]:
linear_support_vector_machine_predictions = []

for i in range(0, number_of_questions):
    print(f'Question {i+1}')
    linear_support_vector_machine_predictions.append(linear_SVM(i, labels_train, labels_test, df_train, df_test))

Question 1
Question 2
Question 3
Question 4
Question 5
Question 6
Question 7
Question 8
Question 9
Question 10
Question 11
Question 12
Question 13
Question 14
Question 15
Question 16
Question 17
Question 18
Question 19
Question 20
Question 21
Question 22


In [79]:
linear_support_vector_machine_predictions_np = np.array(linear_support_vector_machine_predictions)
linear_support_vector_machine_predictions_np = linear_support_vector_machine_predictions_np.T
print(linear_support_vector_machine_predictions_np.shape)

(28, 22)


In [82]:
find_all_metrics(labels_test_np, linear_support_vector_machine_predictions_np)

Mean Zero-One Error:  0.7012987012987012
Mean Absolute Error:  1.974025974025974
Macroaveraged Mean Absolute Error:  1.560444493074058
Restrained Subscale:  1.9867417691141587
Eating Concern Subscale:  1.6274432182326448
Shape Concern Subscale:  1.4177446878757824
Weight Concern Subscale:  1.4177446878757824
Global ED:  1.360584437350624


# Logistic Regression

In [84]:
logistic_regression_predictions = []

for i in range(0, number_of_questions):
    print(f'Question {i+1}')
    logistic_regression_predictions.append(logistic_regression(i, labels_train, labels_test, df_train, df_test))

Question 1
Question 2
Question 3
Question 4
Question 5
Question 6
Question 7
Question 8
Question 9
Question 10
Question 11
Question 12
Question 13
Question 14
Question 15
Question 16
Question 17
Question 18
Question 19
Question 20
Question 21
Question 22


In [85]:
logistic_regression_predictions_np = np.array(logistic_regression_predictions)
logistic_regression_predictions_np = logistic_regression_predictions_np.T
print(logistic_regression_predictions_np.shape)

(28, 22)


In [86]:
find_all_metrics(labels_test_np, logistic_regression_predictions_np)

Mean Zero-One Error:  0.6720779220779222
Mean Absolute Error:  2.043831168831169
Macroaveraged Mean Absolute Error:  1.6271578180144235
Restrained Subscale:  2.431636720987973
Eating Concern Subscale:  2.1480888515807983
Shape Concern Subscale:  1.6234883078464981
Weight Concern Subscale:  1.6234883078464981
Global ED:  1.5417501686684796


# Summary Table

In [87]:
summary_df = pd.DataFrame(columns=['Model', 'Mean Zero-One Error', 'Mean Absolute Error', 'Macroaveraged Mean Absolute Error', 'Restrained Subscale', 'Eating Concern Subscale', 'Shape Concern Subscale', 'Weight Concern Subscale', 'Global ED'])

summary_df = summary_df.append({'Model': 'Multinomial Naive Bayes', 'Mean Zero-One Error': mean_zero_one_error(labels_test_np, multinomial_naive_bayes_predictions_np), 'Mean Absolute Error': mean_absolute_error(labels_test_np, multinomial_naive_bayes_predictions_np), 'Macroaveraged Mean Absolute Error': macroaveraged_mean_absolute_error(labels_test_np, multinomial_naive_bayes_predictions_np), 'Restrained Subscale': restrained_subscale(labels_test_np, multinomial_naive_bayes_predictions_np), 'Eating Concern Subscale': eating_concern_subscale(labels_test_np, multinomial_naive_bayes_predictions_np), 'Shape Concern Subscale': shape_concern_subscale(labels_test_np, multinomial_naive_bayes_predictions_np), 'Weight Concern Subscale': weight_concern_subscale(labels_test_np, multinomial_naive_bayes_predictions_np), 'Global ED': global_ED(labels_test_np, multinomial_naive_bayes_predictions_np)}, ignore_index=True)
summary_df = summary_df.append({'Model': 'Linear Support Vector Machine', 'Mean Zero-One Error': mean_zero_one_error(labels_test_np, linear_support_vector_machine_predictions_np), 'Mean Absolute Error': mean_absolute_error(labels_test_np, linear_support_vector_machine_predictions_np), 'Macroaveraged Mean Absolute Error': macroaveraged_mean_absolute_error(labels_test_np, linear_support_vector_machine_predictions_np), 'Restrained Subscale': restrained_subscale(labels_test_np, linear_support_vector_machine_predictions_np), 'Eating Concern Subscale': eating_concern_subscale(labels_test_np, linear_support_vector_machine_predictions_np), 'Shape Concern Subscale': shape_concern_subscale(labels_test_np, linear_support_vector_machine_predictions_np), 'Weight Concern Subscale': weight_concern_subscale(labels_test_np, linear_support_vector_machine_predictions_np), 'Global ED': global_ED(labels_test_np, linear_support_vector_machine_predictions_np)}, ignore_index=True)
summary_df = summary_df.append({'Model': 'Logistic Regression', 'Mean Zero-One Error': mean_zero_one_error(labels_test_np, logistic_regression_predictions_np), 'Mean Absolute Error': mean_absolute_error(labels_test_np, logistic_regression_predictions_np), 'Macroaveraged Mean Absolute Error': macroaveraged_mean_absolute_error(labels_test_np, logistic_regression_predictions_np), 'Restrained Subscale': restrained_subscale(labels_test_np, logistic_regression_predictions_np), 'Eating Concern Subscale': eating_concern_subscale(labels_test_np, logistic_regression_predictions_np), 'Shape Concern Subscale': shape_concern_subscale(labels_test_np, logistic_regression_predictions_np), 'Weight Concern Subscale': weight_concern_subscale(labels_test_np, logistic_regression_predictions_np), 'Global ED': global_ED(labels_test_np, logistic_regression_predictions_np)}, ignore_index=True)

  summary_df = summary_df.append({'Model': 'Multinomial Naive Bayes', 'Mean Zero-One Error': mean_zero_one_error(labels_test_np, multinomial_naive_bayes_predictions_np), 'Mean Absolute Error': mean_absolute_error(labels_test_np, multinomial_naive_bayes_predictions_np), 'Macroaveraged Mean Absolute Error': macroaveraged_mean_absolute_error(labels_test_np, multinomial_naive_bayes_predictions_np), 'Restrained Subscale': restrained_subscale(labels_test_np, multinomial_naive_bayes_predictions_np), 'Eating Concern Subscale': eating_concern_subscale(labels_test_np, multinomial_naive_bayes_predictions_np), 'Shape Concern Subscale': shape_concern_subscale(labels_test_np, multinomial_naive_bayes_predictions_np), 'Weight Concern Subscale': weight_concern_subscale(labels_test_np, multinomial_naive_bayes_predictions_np), 'Global ED': global_ED(labels_test_np, multinomial_naive_bayes_predictions_np)}, ignore_index=True)
  summary_df = summary_df.append({'Model': 'Linear Support Vector Machine', 'Mea

In [92]:
summary_df

Unnamed: 0,Model,Mean Zero-One Error,Mean Absolute Error,Macroaveraged Mean Absolute Error,Restrained Subscale,Eating Concern Subscale,Shape Concern Subscale,Weight Concern Subscale,Global ED
0,Multinomial Naive Bayes,0.675325,2.340909,1.744981,2.579313,3.017331,1.914232,1.914232,1.838066
1,Linear Support Vector Machine,0.701299,1.974026,1.560444,1.986742,1.627443,1.417745,1.417745,1.360584
2,Logistic Regression,0.672078,2.043831,1.627158,2.431637,2.148089,1.623488,1.623488,1.54175
