In [None]:
import pandas as pd
train_1 = pd.read_csv(f"../assets/annotated-corpus/train/1_emb.tsv",delimiter='\t',header=None).values[:,1:-1]
train_2 = pd.read_csv(f"../assets/annotated-corpus/train/2_emb.tsv",delimiter='\t',header=None).values[:,1:-1]
train_3 = pd.read_csv(f"../assets/annotated-corpus/train/3_emb.tsv",delimiter='\t',header=None).values[:,1:-1]
train_4 = pd.read_csv(f"../assets/annotated-corpus/train/4_emb.tsv",delimiter='\t',header=None).values[:,1:-1]

test_1 = pd.read_csv(f"../assets/annotated-corpus/test/1_emb.tsv",delimiter='\t',header=None).values[:,1:-1]
test_2 = pd.read_csv(f"../assets/annotated-corpus/test/2_emb.tsv",delimiter='\t',header=None).values[:,1:-1]
test_3 = pd.read_csv(f"../assets/annotated-corpus/test/3_emb.tsv",delimiter='\t',header=None).values[:,1:-1]
test_4 = pd.read_csv(f"../assets/annotated-corpus/test/4_emb.tsv",delimiter='\t',header=None).values[:,1:-1]


In [None]:
import numpy as np

In [None]:
X = np.row_stack((train_1,train_2,train_3,train_4))

In [None]:
X_test = np.row_stack((test_1,test_2,test_3,test_4))

y_test1=np.zeros((test_1.shape[0],1))
y_test2=np.ones((test_2.shape[0],1))
y_test3=np.ones((test_3.shape[0],1))*2
y_test4=np.ones((test_4.shape[0],1))*3
y_test = np.row_stack((y_test1,y_test2,y_test3,y_test4))

In [None]:
y1=np.zeros((train_1.shape[0],1))
y2=np.ones((train_2.shape[0],1))
y3=np.ones((train_3.shape[0],1))*2
y4=np.ones((train_4.shape[0],1))*3
y = np.row_stack((y1,y2,y3,y4))

In [None]:
def confusion_matrix(y_true, y_pred, num_classes):
    cm = np.zeros((num_classes, num_classes), dtype=int)

    for true_label, pred_label in zip(y_true, y_pred):
        cm[int(true_label)][int(pred_label)] += 1

    return cm

In [None]:
from statistics import mean
def calculate_metrics(confusion_matrix):
    num_classes = confusion_matrix.shape[0]
    accuracy = np.trace(confusion_matrix) / np.sum(confusion_matrix)
    
    precision = np.zeros(num_classes, dtype=np.float32)
    recall = np.zeros(num_classes, dtype=np.float32)
    f1_score = np.zeros(num_classes, dtype=np.float32)

    for i in range(num_classes):
        true_positive = confusion_matrix[i, i]
        false_positive = np.sum(confusion_matrix[:, i]) - true_positive
        false_negative = np.sum(confusion_matrix[i, :]) - true_positive

        precision[i] = true_positive / (true_positive + false_positive) if (true_positive + false_positive) != 0 else 0
        recall[i] = true_positive / (true_positive + false_negative) if (true_positive + false_negative) != 0 else 0

        f1_score[i] = 2 * (precision[i] * recall[i]) / (precision[i] + recall[i]) if (precision[i] + recall[i]) != 0 else 0

    return [accuracy, mean(precision), mean(recall), mean(f1_score)]

In [None]:
from timeit import default_timer as timer
from sklearn.multiclass import OneVsRestClassifier
all_tests = []
from sklearn import svm
for kernel in ['linear', 'poly', 'rbf', 'sigmoid']:
    for clf_type in ["ovr","ovo"]:
        for iter_num in [100,500,1000]:
            name = f"{kernel}_{clf_type}_{iter_num}"
            start = timer()
            if clf_type=="ovr":
                clf = OneVsRestClassifier(svm.SVC(kernel=kernel,max_iter=iter_num),n_jobs=-1)
            else:
                clf = svm.SVC(kernel=kernel,max_iter=iter_num)
            clf.fit(X, y.ravel())
            end = timer()
            test = clf.predict(X_test)
            cm = confusion_matrix(y_test.ravel(),test,4)
            test_res = calculate_metrics(cm)
            test_res.append(end - start)
            print((name,test_res))
            all_tests.append((name,test_res))

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Given data
data = all_tests

# Convert the data to a DataFrame
df = pd.DataFrame(data, columns=['Model', 'Metrics'])
df = pd.concat([df, pd.DataFrame(df['Metrics'].to_list(), columns=['accuracy', 'precision', 'recall', 'F1', 'Time'])], axis=1)

# Melt the DataFrame for better plotting with seaborn
df_metrics_melted = pd.melt(df, id_vars=['Model'], value_vars=['accuracy', 'precision', 'recall', 'F1'], var_name='Metric', value_name='Value')

# Set seaborn style
sns.set(style="whitegrid")

# Plot the bar plot for metrics
plt.figure(figsize=(40, 12))
ax1 = sns.barplot(x='Model', y='Value', hue='Metric', data=df_metrics_melted, palette='viridis')
plt.title('Bar Plot for Metrics by Model (excluding Time)')
plt.xlabel('Model')
plt.ylabel('Value')

# Plot the bar plot for time separately
df_time = df[['Model', 'Time']]
plt.figure(figsize=(42, 12))
ax2 = sns.barplot(x='Model', y='Time', data=df_time, color='orange')
plt.title('Bar Plot for Time by Model')
plt.xlabel('Model')
plt.ylabel('Time (seconds)')

plt.show()

In [None]:
clf = svm.SVC(kernel="rbf",max_iter=5000)
clf.fit(X, y.ravel()) 
test = clf.predict(X_test)
cm = confusion_matrix(y_test.ravel(),test,4) 
test_res = calculate_metrics(cm)
print(test_res)

In [None]:
import numpy as np
from sklearn.decomposition import PCA
from timeit import default_timer as timer
from sklearn.multiclass import OneVsRestClassifier
all_tests = []
from sklearn import svm
results = []
for num_comp in [256,128,64,32,16,8,4,2]:
    for whitten in [True,False]:
        name = f"{num_comp}_{whitten}"
        pca = PCA(n_components=num_comp,whiten=whitten)
        new_X = pca.fit_transform(X)
        new_X_test = pca.transform(X_test)
        clf = svm.SVC(kernel="rbf",max_iter=5000)
        clf.fit(new_X, y.ravel()) 
        test = clf.predict(new_X_test)
        cm = confusion_matrix(y_test.ravel(),test,4) 
        test_res = calculate_metrics(cm)
        print((name,test_res))
        results.append((name,test_res))

In [None]:
results

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Given data
data2 = results

# Convert the data to a DataFrame
df = pd.DataFrame(data2, columns=['Model', 'Metrics'])
df = pd.concat([df, pd.DataFrame(df['Metrics'].to_list(), columns=['accuracy', 'precision', 'recall', 'F1'])], axis=1)

# Melt the DataFrame for better plotting with seaborn
df_metrics_melted = pd.melt(df, id_vars=['Model'], value_vars=['accuracy', 'precision', 'recall', 'F1'], var_name='Metric', value_name='Value')

# Set seaborn style
sns.set(style="whitegrid")

# Plot the bar plot for metrics
plt.figure(figsize=(40, 12))
ax1 = sns.barplot(x='Model', y='Value', hue='Metric', data=df_metrics_melted, palette='viridis')
plt.title('Bar Plot for Metrics by Model (excluding Time)')
plt.xlabel('Model')
plt.ylabel('Value')

plt.show()

In [None]:
results