In [28]:
import os
import random 

import numpy as np
from sklearn.metrics import classification_report
from sklearn.metrics import matthews_corrcoef
import pandas as pd

def seed_everything(seed: int):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.RandomState(seed)
    np.random.seed(seed)
    # torch.manual_seed(seed)
    # torch.cuda.manual_seed(seed)
    # torch.backends.cudnn.deterministic = True

seed_everything(42)

In [31]:
class Baseline:

    def __init__(self, path: str, path_valid=None, seed=42):
        
        self.path = path
        self.train = pd.read_json(path_or_buf=path, lines=True)
        self.seed = seed
        if path_valid:
            self.valid = pd.read_json(path_or_buf=path_valid, lines=True)
        else:
            self.valid = None
    
    def all_methods(self):

        if self.valid is not None:
            test_size = len(self.valid)
            y_true = list(self.valid.label)
        else:
            print("There are no Validation/Test set in this task")
            print("Making Predictions for Train dataset")
            test_size = len(self.train)
            y_true = self.train.label
            
        print()
        print(f"Making Prediction based on Majority Class")
        y_pred = self.majority_class(test_size=test_size)
        self.show_report(y_true, y_pred)

        if 'lidirus' in self.path.lower():
            print(f" Matthews Correlation: {self.show_mc(y_true, y_pred)}")

        print()
        print(f"Making Prediction based on Random Choice")
        y_pred = self.random_choice(test_size=test_size)
        self.show_report(y_true, y_pred)

        if 'lidirus' in self.path.lower():
            print(f" Matthews Correlation: {self.show_mc(y_true, y_pred)}")
        
        print()
        print(f"Making Prediction based on Random Choice Considered Classes Distribution")
        y_pred = self.random_balanced_choice(test_size=test_size)
        self.show_report(y_true, y_pred)

        if 'lidirus' in self.path.lower():
            print(f" Matthews Correlation: {self.show_mc(y_true, y_pred)}")


    def show_report(self, y_true, y_pred):
        print(classification_report(y_true, y_pred))

    def show_mc(self, y_true, y_pred):
        return matthews_corrcoef(y_true, y_pred)
    
    
    def majority_class(self, test_size):
        """
        Make prediction based on majority class of train dataset
        test_size: how many predictions should be made
        return: List of predictions
        """

        prediction = self.train.label.mode()[0]
        y_pred = [prediction] * test_size
        return y_pred

    def random_choice(self, test_size):
        """
        Make random predictions
        label: label column in df (str)
        test_size: how many predictions should be made
        return: List of predictions
        """
        options = sorted(self.train.label.unique())
        np.random.seed(self.seed)
        y_pred = np.random.choice(options, size=test_size)
        return y_pred


    def random_balanced_choice(self, test_size):
        """
        Make random predictions with calculated probabilities
        label: label column in df (str)
        test_size: how many predictions should be made
        return: List of predictions
        """
        frequences = dict(self.train.label.value_counts(normalize=True))

        labels = []
        probs = []
        for key, value in frequences.items():
            labels.append(key)
            probs.append(value)
        np.random.seed(self.seed)
        y_pred = np.random.choice(labels, size=test_size, p=probs)
        return y_pred

# LiDiRus

In [23]:
%%capture
%%bash
# change url if you want to work with a different RSG dataset
wget -q --show-progress "https://russiansuperglue.com/ru/tasks/download/LiDiRus" -O temp.zip
unzip temp.zip -d data

# remove unnecessary directories and files
rm temp.zip
rm -r data/__MACOSX
rm -r sample_data/

Archive:  temp.zip
   creating: data/LiDiRus/
  inflating: data/LiDiRus/.DS_Store  
   creating: data/__MACOSX/
   creating: data/__MACOSX/LiDiRus/
  inflating: data/__MACOSX/LiDiRus/._.DS_Store  
  inflating: data/LiDiRus/LiDiRus.jsonl  
  inflating: data/__MACOSX/LiDiRus/._LiDiRus.jsonl  
  inflating: data/__MACOSX/._LiDiRus  



     0K .......... .......... .......... .......... ......    100%  340K=0.1srm: cannot remove 'sample_data/': No such file or directory


In [32]:
lidirus = Baseline(path='data/LiDiRus/LiDiRus.jsonl')
lidirus.all_methods()

There are no Validation/Test set in this task
Making Predictions for Train dataset

Making Prediction based on Majority Class
                precision    recall  f1-score   support

    entailment       0.00      0.00      0.00       459
not_entailment       0.58      1.00      0.74       645

      accuracy                           0.58      1104
     macro avg       0.29      0.50      0.37      1104
  weighted avg       0.34      0.58      0.43      1104

 Matthews Correlation: 0.0

Making Prediction based on Random Choice
                precision    recall  f1-score   support

    entailment       0.43      0.51      0.47       459
not_entailment       0.60      0.51      0.55       645

      accuracy                           0.51      1104
     macro avg       0.51      0.51      0.51      1104
  weighted avg       0.53      0.51      0.52      1104

 Matthews Correlation: 0.02389225560145845

Making Prediction based on Random Choice Considered Classes Distribution
          

  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


# RCB

In [33]:
%%capture
%%bash
# change url if you want to work with a different RSG dataset
wget -q --show-progress "https://russiansuperglue.com/tasks/download/RCB" -O temp.zip
unzip temp.zip -d data

# remove unnecessary directories and files
rm temp.zip
rm -r data/__MACOSX
rm -r sample_data/

In [34]:
rcb = Baseline(path='data/RCB/train.jsonl',
               path_valid='data/RCB/val.jsonl')
rcb.all_methods()


Making Prediction based on Majority Class
               precision    recall  f1-score   support

contradiction       0.00      0.00      0.00        30
   entailment       0.00      0.00      0.00        74
      neutral       0.53      1.00      0.69       116

     accuracy                           0.53       220
    macro avg       0.18      0.33      0.23       220
 weighted avg       0.28      0.53      0.36       220


Making Prediction based on Random Choice
               precision    recall  f1-score   support

contradiction       0.12      0.30      0.17        30
   entailment       0.35      0.32      0.34        74
      neutral       0.58      0.39      0.47       116

     accuracy                           0.35       220
    macro avg       0.35      0.34      0.33       220
 weighted avg       0.44      0.35      0.38       220


Making Prediction based on Random Choice Considered Classes Distribution
               precision    recall  f1-score   support

contradic

  _warn_prf(average, modifier, msg_start, len(result))


# PARus

In [35]:
%%capture
%%bash
# change url if you want to work with a different RSG dataset
wget -q --show-progress "https://russiansuperglue.com/tasks/download/PARus" -O temp.zip
unzip temp.zip -d data

# remove unnecessary directories and files
rm temp.zip
rm -r data/__MACOSX
rm -r sample_data/

In [36]:
parus = Baseline(path='data/PARus/train.jsonl',
               path_valid='data/PARus/val.jsonl')
parus.all_methods()


Making Prediction based on Majority Class
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        55
           1       0.45      1.00      0.62        45

    accuracy                           0.45       100
   macro avg       0.23      0.50      0.31       100
weighted avg       0.20      0.45      0.28       100


Making Prediction based on Random Choice
              precision    recall  f1-score   support

           0       0.50      0.40      0.44        55
           1       0.41      0.51      0.46        45

    accuracy                           0.45       100
   macro avg       0.46      0.46      0.45       100
weighted avg       0.46      0.45      0.45       100


Making Prediction based on Random Choice Considered Classes Distribution
              precision    recall  f1-score   support

           0       0.51      0.44      0.47        55
           1       0.42      0.49      0.45        45

    accuracy           

  _warn_prf(average, modifier, msg_start, len(result))


#TERRa

In [37]:
%%capture
%%bash
# change url if you want to work with a different RSG dataset
wget -q --show-progress "https://russiansuperglue.com/tasks/download/TERRa" -O temp.zip
unzip temp.zip -d data

# remove unnecessary directories and files
rm temp.zip
rm -r data/__MACOSX
rm -r sample_data/

In [38]:
terra = Baseline(path='data/TERRa/train.jsonl',
               path_valid='data/TERRa/val.jsonl')
terra.all_methods()


Making Prediction based on Majority Class
                precision    recall  f1-score   support

    entailment       0.50      1.00      0.67       153
not_entailment       0.00      0.00      0.00       154

      accuracy                           0.50       307
     macro avg       0.25      0.50      0.33       307
  weighted avg       0.25      0.50      0.33       307


Making Prediction based on Random Choice
                precision    recall  f1-score   support

    entailment       0.52      0.52      0.52       153
not_entailment       0.52      0.51      0.52       154

      accuracy                           0.52       307
     macro avg       0.52      0.52      0.52       307
  weighted avg       0.52      0.52      0.52       307


Making Prediction based on Random Choice Considered Classes Distribution
                precision    recall  f1-score   support

    entailment       0.47      0.48      0.47       153
not_entailment       0.47      0.46      0.47     

  _warn_prf(average, modifier, msg_start, len(result))


# RUSSE

In [39]:
%%capture
%%bash
# change url if you want to work with a different RSG dataset
wget -q --show-progress "https://russiansuperglue.com/tasks/download/RUSSE" -O temp.zip
unzip temp.zip -d data

# remove unnecessary directories and files
rm temp.zip
rm -r data/__MACOSX
rm -r sample_data/

In [40]:
russe = Baseline(path='data/RUSSE/train.jsonl',
               path_valid='data/RUSSE/val.jsonl')
russe.all_methods()


Making Prediction based on Majority Class
              precision    recall  f1-score   support

       False       0.63      1.00      0.77      5366
        True       0.00      0.00      0.00      3139

    accuracy                           0.63      8505
   macro avg       0.32      0.50      0.39      8505
weighted avg       0.40      0.63      0.49      8505


Making Prediction based on Random Choice
              precision    recall  f1-score   support

       False       0.63      0.50      0.56      5366
        True       0.37      0.50      0.42      3139

    accuracy                           0.50      8505
   macro avg       0.50      0.50      0.49      8505
weighted avg       0.53      0.50      0.51      8505


Making Prediction based on Random Choice Considered Classes Distribution
              precision    recall  f1-score   support

       False       0.63      0.64      0.63      5366
        True       0.36      0.35      0.35      3139

    accuracy           

  _warn_prf(average, modifier, msg_start, len(result))


# RWSD

In [41]:
%%capture
%%bash
# change url if you want to work with a different RSG dataset
wget -q --show-progress "https://russiansuperglue.com/tasks/download/RWSD" -O temp.zip
unzip temp.zip -d data

# remove unnecessary directories and files
rm temp.zip
rm -r data/__MACOSX
rm -r sample_data/

In [42]:
russe = Baseline(path='data/RWSD/train.jsonl',
               path_valid='data/RWSD/val.jsonl')
russe.all_methods()


Making Prediction based on Majority Class
              precision    recall  f1-score   support

       False       0.55      1.00      0.71       113
        True       0.00      0.00      0.00        91

    accuracy                           0.55       204
   macro avg       0.28      0.50      0.36       204
weighted avg       0.31      0.55      0.39       204


Making Prediction based on Random Choice
              precision    recall  f1-score   support

       False       0.59      0.54      0.56       113
        True       0.49      0.54      0.51        91

    accuracy                           0.54       204
   macro avg       0.54      0.54      0.54       204
weighted avg       0.54      0.54      0.54       204


Making Prediction based on Random Choice Considered Classes Distribution
              precision    recall  f1-score   support

       False       0.54      0.50      0.52       113
        True       0.43      0.46      0.44        91

    accuracy           

  _warn_prf(average, modifier, msg_start, len(result))


# DaNetQA

In [43]:
%%capture
%%bash
# change url if you want to work with a different RSG dataset
wget -q --show-progress "https://russiansuperglue.com/tasks/download/DaNetQA" -O temp.zip
unzip temp.zip -d data

# remove unnecessary directories and files
rm temp.zip
rm -r data/__MACOSX
rm -r sample_data/

In [44]:
danetqa = Baseline(path='data/DaNetQA/train.jsonl',
               path_valid='data/DaNetQA/val.jsonl')
danetqa.all_methods()


Making Prediction based on Majority Class
              precision    recall  f1-score   support

       False       0.00      0.00      0.00       409
        True       0.50      1.00      0.67       412

    accuracy                           0.50       821
   macro avg       0.25      0.50      0.33       821
weighted avg       0.25      0.50      0.34       821


Making Prediction based on Random Choice
              precision    recall  f1-score   support

       False       0.50      0.48      0.49       409
        True       0.50      0.51      0.51       412

    accuracy                           0.50       821
   macro avg       0.50      0.50      0.50       821
weighted avg       0.50      0.50      0.50       821


Making Prediction based on Random Choice Considered Classes Distribution
              precision    recall  f1-score   support

       False       0.50      0.40      0.44       409
        True       0.50      0.60      0.55       412

    accuracy           

  _warn_prf(average, modifier, msg_start, len(result))
