In [1]:
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score, balanced_accuracy_score
import random
from collections import Counter

# Multiclass

In [2]:
def _get_f1_score(y, ypred):
    score = f1_score(y, ypred, average='macro')
    print(score)

In [3]:
n_eng = 556
zero_eng = 365
one_eng = 15
two_eng = 63
three_eng = 113

n_ger = 503
zero_ger = 330
one_ger = 10
two_ger = 86
three_ger = 77

y_eng = zero_eng*[0] + one_eng*[1] + two_eng*[2] + three_eng*[3]
y_ger = zero_ger*[0] + one_ger*[1] + two_ger*[2] + three_ger*[3]

#### Classify every data point as 0 /not reviewed  (majority class)

In [4]:
y_pred_eng = n_eng*[0]
_get_f1_score(y_eng, y_pred_eng)

y_pred_ger = n_ger*[0]
_get_f1_score(y_ger, y_pred_ger)

0.1981541802388708
0.19807923169267708


#### Assign each class with possibility 1/4

In [5]:
classes = [0,1,2,3]
y_pred_eng = random.choices(classes, k=n_eng)
Counter(y_pred_eng)
_get_f1_score(y_eng, y_pred_eng)

y_pred_ger = random.choices(classes, k=n_ger)
Counter(y_pred_ger)
_get_f1_score(y_ger, y_pred_ger)

0.17915154787037185
0.19565850059534695


#### Class probability = class frequency

In [6]:
classes = [0,1,2,3]
weights_eng = [zero_eng/n_eng, one_eng/n_eng, two_eng/n_eng, three_eng/n_eng]
y_pred_eng = random.choices(classes, weights_eng, k=n_eng)
_get_f1_score(y_eng, y_pred_eng)

weights_ger = [zero_ger/n_ger, one_ger/n_ger, two_ger/n_ger, three_ger/n_ger]
y_pred_ger = random.choices(classes, weights_ger, k=n_ger)
_get_f1_score(y_ger, y_pred_ger)

0.252381851447272
0.22805956924754633


#### Class probability of two major classes

In [7]:
classes = [0,3]
weights_eng = [zero_eng/(zero_eng + three_eng), three_eng/(zero_eng + three_eng)]
y_pred_eng = random.choices(classes, weights_eng, k=n_eng)
_get_f1_score(y_eng, y_pred_eng)

classes = [0,2]
weights_ger = [zero_ger/(zero_ger + three_ger), two_ger/(zero_ger + two_ger)]
y_pred_ger = random.choices(classes, weights_ger, k=n_ger)
_get_f1_score(y_ger, y_pred_ger)

0.2309107465230903
0.21326667523087683


#### p(0) = 0.5, p(2)=p(3)=0.25

In [8]:
classes = [0,2,3]
weights_eng = [0.5, 0.25, 0.25]
y_pred_eng = random.choices(classes, weights_eng, k=n_eng)
_get_f1_score(y_eng, y_pred_eng)

weights_ger = [0.5, 0.25, 0.25]
y_pred_ger = random.choices(classes, weights_ger, k=n_ger)
_get_f1_score(y_ger, y_pred_ger)

0.24935878823073948
0.2636675283803529


# Accuracy

## Twoclass 

In [9]:
def _get_accuracy(y, ypred):
    print(accuracy_score(y, ypred))

In [10]:
n_eng = 556
zero_eng = 365
one_eng = 191

n_ger = 503
zero_ger = 330
one_ger = 173

y_eng = zero_eng*[0] + one_eng*[1]
y_ger = zero_ger*[0] + one_ger*[1]

#### Classify every data point as 0 /not reviewed  (majority class)

In [11]:
y_pred_eng = n_eng*[0]
_get_accuracy(y_eng, y_pred_eng)

y_pred_ger = n_ger*[0]
_get_accuracy(y_ger, y_pred_ger)

0.6564748201438849
0.6560636182902585


#### Assign each class with possibility 1/2

In [12]:
classes = [0,1]
y_pred_eng = random.choices(classes, k=n_eng)
Counter(y_pred_eng)
_get_accuracy(y_eng, y_pred_eng)

y_pred_ger = random.choices(classes, k=n_ger)
Counter(y_pred_ger)
_get_accuracy(y_ger, y_pred_ger)

0.5287769784172662
0.4970178926441352


#### Class probability = class frequency

In [13]:
classes = [0,1]
weights_eng = [zero_eng/n_eng, one_eng/n_eng]
y_pred_eng = random.choices(classes, weights_eng, k=n_eng)
_get_accuracy(y_eng, y_pred_eng)

weights_ger = [zero_ger/n_ger, one_ger/n_ger]
y_pred_ger = random.choices(classes, weights_ger, k=n_ger)
_get_accuracy(y_ger, y_pred_ger)

0.5701438848920863
0.5765407554671969


## Library

In [14]:
n_eng = 603
zero_eng = 146
one_eng = 457

n_ger = 546
zero_ger = 240
one_ger = 306

y_eng = zero_eng*[0] + one_eng*[1]
y_ger = zero_ger*[0] + one_ger*[1]

#### Classify every data point as 1/featured (majority class)

In [15]:
y_pred_eng = n_eng*[1]
_get_accuracy(y_eng, y_pred_eng)

y_pred_ger = n_ger*[1]
_get_accuracy(y_ger, y_pred_ger)

0.75787728026534
0.5604395604395604


#### Assign each class with possibility 1/2

In [16]:
classes = [0,1]
y_pred_eng = random.choices(classes, k=n_eng)
Counter(y_pred_eng)
_get_accuracy(y_eng, y_pred_eng)

y_pred_ger = random.choices(classes, k=n_ger)
Counter(y_pred_ger)
_get_accuracy(y_ger, y_pred_ger)

0.4560530679933665
0.48717948717948717


#### Class probability = class frequency

In [17]:
classes = [0,1]
weights_eng = [zero_eng/n_eng, one_eng/n_eng]
y_pred_eng = random.choices(classes, weights_eng, k=n_eng)
_get_accuracy(y_eng, y_pred_eng)

weights_ger = [zero_ger/n_ger, one_ger/n_ger]
y_pred_ger = random.choices(classes, weights_ger, k=n_ger)
_get_accuracy(y_ger, y_pred_ger)

0.6235489220563848
0.46703296703296704


# Balanced Accuracy

## Twoclass 

In [18]:
def _get_balanced_accuracy(y, ypred):
    print(balanced_accuracy_score(y, ypred))

zero_eng = 365
one_eng = 191
n_eng = 556

zero_ger = 332
one_ger = 171
n_ger = 503

y_eng = zero_eng*[0] + one_eng*[1]
y_ger = zero_ger*[0] + one_ger*[1]

#### Classify every data point as 0 /not reviewed  (majority class)

In [19]:
y_pred_eng = n_eng*[0]
_get_balanced_accuracy(y_eng, y_pred_eng)

y_pred_ger = n_ger*[0]
_get_balanced_accuracy(y_ger, y_pred_ger)

0.5
0.5


#### Assign each class with possibility 1/2

In [20]:
classes = [0,1]
y_pred_eng = random.choices(classes, k=n_eng)
print(Counter(y_pred_eng))
_get_balanced_accuracy(y_eng, y_pred_eng)

y_pred_ger = random.choices(classes, k=n_ger)
print(Counter(y_pred_ger))
_get_balanced_accuracy(y_ger, y_pred_ger)

Counter({0: 279, 1: 277})
0.5432403356522987
Counter({1: 253, 0: 250})
0.49109596279856266


#### Class probability = class frequency

In [21]:
classes = [0,1]
weights_eng = [zero_eng/n_eng, one_eng/n_eng]
y_pred_eng = random.choices(classes, weights_eng, k=n_eng)
_get_balanced_accuracy(y_eng, y_pred_eng)

weights_ger = [zero_ger/n_ger, one_ger/n_ger]
y_pred_ger = random.choices(classes, weights_ger, k=n_ger)
_get_balanced_accuracy(y_ger, y_pred_ger)

0.49904611633077534
0.4870975128584514


## Library

In [22]:
zero_eng = 146
one_eng = 457
n_eng = 603

zero_ger = 246
one_ger = 300
n_ger = 546

y_eng = zero_eng*[0] + one_eng*[1]
y_ger = zero_ger*[0] + one_ger*[1]

#### Classify every data point as 1/featured (majority class)

In [23]:
y_pred_eng = n_eng*[1]
_get_balanced_accuracy(y_eng, y_pred_eng)

y_pred_ger = n_ger*[1]
_get_balanced_accuracy(y_ger, y_pred_ger)

0.5
0.5


#### Assign each class with possibility 1/2

In [24]:
classes = [0,1]
y_pred_eng = random.choices(classes, k=n_eng)
Counter(y_pred_eng)
_get_balanced_accuracy(y_eng, y_pred_eng)

y_pred_ger = random.choices(classes, k=n_ger)
Counter(y_pred_ger)
_get_balanced_accuracy(y_ger, y_pred_ger)

0.4926036389796469
0.49926829268292683


#### Class probability = class frequency

In [25]:
classes = [0,1]
weights_eng = [zero_eng/n_eng, one_eng/n_eng]
y_pred_eng = random.choices(classes, weights_eng, k=n_eng)
_get_balanced_accuracy(y_eng, y_pred_eng)

weights_ger = [zero_ger/n_ger, one_ger/n_ger]
y_pred_ger = random.choices(classes, weights_ger, k=n_ger)
_get_balanced_accuracy(y_ger, y_pred_ger)

0.46212643505890105
0.5216666666666667
