In [48]:
from transformers import AutoModelForMaskedLM, AutoModelForSequenceClassification,AutoTokenizer
import torch
from datasets import load_dataset,concatenate_datasets,Dataset
import math
from torch.utils.data import DataLoader
from transformers import default_data_collator
from torch.optim import AdamW
from transformers import get_scheduler
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import preprocessing
import argparse
import preprocessing
import pickle
from sklearn.preprocessing import LabelEncoder
from transformers import get_linear_schedule_with_warmup
import random
import numpy as np

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [4]:
model_checkpoint = "KBLab/bert-base-swedish-cased"
model =  AutoModelForSequenceClassification.from_pretrained(model_checkpoint)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at KBLab/bert-base-swedish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
model_finetuned = AutoModelForSequenceClassification.from_pretrained("finetuning_hugging_python-finetuned-imdb/checkpoint-920384")
model_finetuned=model_finetuned.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at finetuning_hugging_python-finetuned-imdb/checkpoint-920384 and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
tokenizer= AutoTokenizer.from_pretrained(model_checkpoint)

In [7]:
def filter_NaN(subset,example):
    return example[subset] is not None


In [65]:
def subset(dataset,nb_obs,label_name):
    df = dataset.to_pandas()

    # Calculer le nombre d'observations pour chaque étiquette
    grouped_data = df.groupby(label_name)

    # Calculer le nombre d'observations par étiquette pour obtenir une répartition uniforme
    total_samples = nb_obs
    samples_per_label = total_samples // len(grouped_data.groups)

    # Créer une liste pour stocker les observations échantillonnées
    sampled_data = []

    # Prélever aléatoirement les observations pour chaque groupe de label
    for group_label, group_data in grouped_data.groups.items():
        group_dataset=dataset.select(group_data)
        label_data = group_dataset.shuffle(seed=np.random.randint(1, 1000)).select(range(min(len(group_data), samples_per_label)))
        sampled_data.extend(label_data)

    # Mélanger les observations pour obtenir un ordre aléatoire
    np.random.shuffle(sampled_data)

    # Créer un Dataset Hugging Face à partir des observations échantillonnées
    sampled_dataset = Dataset.from_dict({key: [example[key] for example in sampled_data] for key in sampled_data[0]})
    
    return sampled_dataset

In [9]:
def tokenize_function(examples):
    result = tokenizer(examples["Note"],padding=True, truncation=True,max_length=512)
    return result

In [10]:
def evaluate(model, loader):
    loss, accuracy = 0.0, []
    model.eval()
    for batch in tqdm(loader, total=len(loader)):
        input_ids = batch["input_ids"].to(device)
        input_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)
        output = model(input_ids,
            token_type_ids=None, 
            attention_mask=input_mask, 
            labels=labels)
        loss += output.loss.item()
        preds_batch = torch.argmax(output.logits, axis=1)
        batch_acc = torch.mean((preds_batch == labels).float())
        accuracy.append(batch_acc)
        
    accuracy = torch.mean(torch.tensor(accuracy))
    return loss, accuracy


In [11]:
data_files = {"train": "swerick_data_party_train.pkl", "test": "swerick_data_party_test.pkl"}
party_dataset = load_dataset("pandas",data_files=data_files)
print(party_dataset)

DatasetDict({
    train: Dataset({
        features: ['protocole', 'Note', 'id', 'party', 'gender'],
        num_rows: 3378877
    })
    test: Dataset({
        features: ['protocole', 'Note', 'id', 'party', 'gender'],
        num_rows: 725974
    })
})


In [12]:
data_files = {"valid": "swerick_data_party_valid.pkl"}
party_valid_dataset = load_dataset("pandas",data_files=data_files)
print(party_valid_dataset)

DatasetDict({
    valid: Dataset({
        features: ['protocole', 'Note', 'id', 'party', 'gender'],
        num_rows: 725974
    })
})


In [50]:
party_dataset["train"]=party_dataset["train"].filter(lambda x : filter_NaN("party",x))
party_dataset["test"]=party_dataset["test"].filter(lambda x : filter_NaN("party",x))

Filter:   0%|          | 0/3167502 [00:00<?, ? examples/s]

Filter:   0%|          | 0/676000 [00:00<?, ? examples/s]

In [51]:
party_valid_dataset["valid"]=party_valid_dataset["valid"].filter(lambda x : filter_NaN("party",x))

Filter:   0%|          | 0/676000 [00:00<?, ? examples/s]

In [17]:
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(party_dataset["train"]["gender"])
label_names = label_encoder.classes_
label_dict={ i : label_names[i] for i in  range(len(label_names))}
print(label_dict)

{0: 'man', 1: 'woman'}


In [18]:
print(label_names)

['man' 'woman']


In [19]:
with open("labels_gender.pkl", "wb") as fp:   
   pickle.dump(label_names, fp)

In [22]:
with open("labels.pkl","rb") as f :
    label_names=pickle.load(f)

print(label_names.tolist())

['"vänstern"', 'Andra kammarens center', 'Andra kammarens frihandelsparti', 'Bondeförbundet', 'Centern (partigrupp 1873-1882)', 'Centern (partigrupp 1885-1887)', 'Centerpartiet', 'Det förenade högerpartiet', 'Ehrenheimska partiet', 'Folkpartiet', 'Folkpartiet (1895–1900)', 'Friesenska diskussionsklubben', 'Frihandelsvänliga centern', 'Frisinnade folkpartiet', 'Frisinnade försvarsvänner', 'Frisinnade landsföreningen', 'Första kammarens konservativa grupp', 'Första kammarens ministeriella grupp', 'Första kammarens minoritetsparti', 'Första kammarens moderata parti', 'Första kammarens nationella parti', 'Första kammarens protektionistiska parti', 'Gamla lantmannapartiet', 'Högerns riksdagsgrupp', 'Högerpartiet', 'Högerpartiet de konservativa', 'Jordbrukarnas fria grupp', 'Junkerpartiet', 'Kilbomspartiet', 'Kommunistiska partiet', 'Kristdemokraterna', 'Lantmanna- och borgarepartiet inom andrakammaren', 'Lantmannapartiet', 'Lantmannapartiets filial', 'Liberala riksdagspartiet', 'Liberala sa

In [12]:
party_dataset["train"]=party_dataset["train"].map(lambda example :{"party_labels" : label_encoder.transform([example["party"]])[0]})
party_dataset["test"]=party_dataset["test"].map(lambda example :{"party_labels" : label_encoder.transform([example["party"]])[0]})

Map:   0%|          | 0/3167750 [00:00<?, ? examples/s]

Map:   0%|          | 0/676215 [00:00<?, ? examples/s]

In [18]:
party_valid_dataset["valid"]=party_valid_dataset["valid"].map(lambda example :{"party_labels" : label_encoder.transform([example["party"]])[0]})

Map:   0%|          | 0/676215 [00:00<?, ? examples/s]

In [14]:
import train_party_detection

In [66]:
party_train_datasets = subset(party_dataset["train"],5000,"party")
party_test_datasets = subset(party_dataset["test"],5000,"party")
party_valid_datasets = subset(party_valid_dataset["valid"],5000,"party")

AttributeError: 'Dataset' object has no attribute 'groupby'

In [45]:
i,j,m=random.randint(0,len(party_train_datasets)),random.randint(0,len(party_test_datasets)),random.randint(0,len(party_valid_datasets))
train_set = party_train_datasets[i]
test_set = party_test_datasets[j]
valid_set = party_valid_datasets[m]


In [None]:
print(party_train_datasets)
print(party_test_datasets)
print(party_valid_datasets)

Dataset({
    features: ['protocole', 'Note', 'id', 'party', 'gender'],
    num_rows: 5000
})
Dataset({
    features: ['protocole', 'Note', 'id', 'party', 'gender'],
    num_rows: 5000
})
Dataset({
    features: ['protocole', 'Note', 'id', 'party', 'gender'],
    num_rows: 5000
})


In [17]:
party_dataset = concatenate_datasets([train_set,test_set,valid_set])

In [72]:
tokenized_train_datasets = train_set.map(tokenize_function,batched=True )
tokenized_test_datasets = test_set.map(tokenize_function,batched=True )
tokenized_valid_datasets = valid_set.map(tokenize_function,batched=True )
tokenized_train_datasets

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

Dataset({
    features: ['protocole', 'Note', 'id', 'party', 'gender', 'party_labels', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 1000
})

In [73]:
tokenized_train_datasets=tokenized_train_datasets.remove_columns(["protocole","id","party","gender","Note"])
tokenized_test_datasets=tokenized_test_datasets.remove_columns(["protocole","id","party","gender","Note"])
tokenized_valid_datasets=tokenized_valid_datasets.remove_columns(["protocole","id","party","gender","Note"])

In [74]:
tokenized_train_datasets=tokenized_train_datasets.rename_column("party_labels","labels")
tokenized_test_datasets=tokenized_test_datasets.rename_column("party_labels","labels")
tokenized_valid_datasets=tokenized_valid_datasets.rename_column("party_labels","labels")





In [75]:
tokenized_train_datasets.set_format(type="torch",columns=["input_ids","labels","attention_mask"])
tokenized_test_datasets.set_format(type="torch",columns=["input_ids","labels","attention_mask"])
tokenized_valid_datasets.set_format(type="torch",columns=["input_ids","labels","attention_mask"])

In [76]:
batch_size = 64
num_workers=4

train_loader = DataLoader(
        tokenized_train_datasets,
        shuffle=True,
        batch_size = batch_size,
        num_workers = num_workers
    )

valid_loader = DataLoader(
        tokenized_valid_datasets,
        shuffle=False,
        batch_size = batch_size,
        num_workers = num_workers
    )

# Not used atm
test_loader = DataLoader(
        tokenized_test_datasets,
        shuffle=False,
        batch_size = batch_size,
        num_workers = num_workers
    )

In [80]:
n_epochs =10
model = AutoModelForSequenceClassification.from_pretrained(
        model_checkpoint,
        num_labels=len(label_dict),
        id2label=label_dict).to("cpu")

optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()), lr=2e-5)
num_training_steps = len(train_loader) * n_epochs
num_warmup_steps = num_training_steps // 10

# Linear warmup and step decay
scheduler = get_linear_schedule_with_warmup(
    optimizer = optimizer,
    num_warmup_steps = num_warmup_steps,
    num_training_steps = num_training_steps
    )

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at KBLab/bert-base-swedish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [82]:
device="cpu"
train_losses = []
valid_losses = []
best_valid_loss = float('inf')


for epoch in range(n_epochs):
    print(f"Epoch {epoch} starts!")
    train_loss = 0
    model.train()
    for batch in tqdm(train_loader, total=len(train_loader)):
        model.zero_grad()   

        input_ids = batch["input_ids"].to(device)
        input_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)
        output = model(input_ids,
                token_type_ids=None, 
                attention_mask=input_mask, 
                labels=labels)
        loss = output.loss
        train_loss += loss.item()

        loss.backward()
        optimizer.step()
        scheduler.step()
    
    # Evaluation
    valid_loss, valid_accuracy = evaluate(model, valid_loader)

    train_losses.append(train_loss)
    valid_losses.append(valid_loss)

    train_loss_avg = train_loss * batch_size / len(train_loader)
    valid_loss_avg = valid_loss * batch_size / len(valid_loader)

    print(f'Training Loss: {train_loss_avg:.3f}')
    print(f'Validation Loss: {valid_loss_avg:.3f}')
    print(f'Validation accuracy: {valid_accuracy}')

    # Store best model

    if valid_loss < best_valid_loss:
        print("Best validation loss so far")
        best_valid_loss = valid_loss
 
    else:
            print("Not the best validation loss so far")

Epoch 0 starts!


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/16 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

KeyError: 0

In [85]:
valid_loss, valid_accuracy = evaluate(model, valid_loader)

train_losses.append(train_loss)
valid_losses.append(valid_loss)

train_loss_avg = train_loss * batch_size / len(train_loader)
valid_loss_avg = valid_loss * batch_size / len(valid_loader)

print(f'Training Loss: {train_loss_avg:.3f}')
print(f'Validation Loss: {valid_loss_avg:.3f}')
print(f'Validation accuracy: {valid_accuracy}')

# Store best model

if valid_loss < best_valid_loss:
    print("Best validation loss so far")
    best_valid_loss = valid_loss
else:
        print("Not the best validation loss so far")

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

: 

In [3]:
import pandas as pd

df=pd.read_pickle("swerick_data_party_train.pkl")
df = df.rename(columns={"Note":"content","party" : "tag"})
df.to_csv("swerick_data_party_train.csv")

In [7]:
df=pd.read_csv("swerick_data_party_train.csv")
df =df.dropna(subset="tag")
df

KeyError: ['party']

In [11]:
df =df.dropna(subset="tag")
df.to_csv("swerick_data_party_train.csv")

In [47]:
df=train_set.to_pandas()
df = df.rename(columns={"Note":"content","gender" : "tag"})
df.to_csv("swerick_subsetdata_gender_train.csv")

In [10]:
label_names = sorted(list(set(df["tag"])))

In [39]:
label_names

array(['"vänstern"', 'Andra kammarens center',
       'Andra kammarens frihandelsparti', 'Bondeförbundet',
       'Centern (partigrupp 1873-1882)', 'Centern (partigrupp 1885-1887)',
       'Centerpartiet', 'Det förenade högerpartiet',
       'Ehrenheimska partiet', 'Folkpartiet', 'Folkpartiet (1895–1900)',
       'Friesenska diskussionsklubben', 'Frihandelsvänliga centern',
       'Frisinnade folkpartiet', 'Frisinnade försvarsvänner',
       'Frisinnade landsföreningen',
       'Första kammarens konservativa grupp',
       'Första kammarens ministeriella grupp',
       'Första kammarens minoritetsparti',
       'Första kammarens moderata parti',
       'Första kammarens nationella parti',
       'Första kammarens protektionistiska parti',
       'Gamla lantmannapartiet', 'Högerns riksdagsgrupp', 'Högerpartiet',
       'Högerpartiet de konservativa', 'Jordbrukarnas fria grupp',
       'Junkerpartiet', 'Kilbomspartiet', 'Kommunistiska partiet',
       'Kristdemokraterna',
       'Lantman

In [4]:
# Convertir la liste de noms de libellés en une chaîne séparée par des espaces avec chaque nom entouré de guillemets simples
label_names_str = " ".join([f'"{name}"' for name in label_names.tolist()])

$label_names_str

SyntaxError: invalid syntax (643114865.py, line 4)

In [6]:
!echo $label_names_str


vänstern Andra kammarens center Andra kammarens frihandelsparti Bondeförbundet Centern (partigrupp 1873-1882) Centern (partigrupp 1885-1887) Centerpartiet Det förenade högerpartiet Ehrenheimska partiet Folkpartiet Folkpartiet (1895–1900) Friesenska diskussionsklubben Frihandelsvänliga centern Frisinnade folkpartiet Frisinnade försvarsvänner Frisinnade landsföreningen Första kammarens konservativa grupp Första kammarens ministeriella grupp Första kammarens minoritetsparti Första kammarens moderata parti Första kammarens nationella parti Första kammarens protektionistiska parti Gamla lantmannapartiet Högerns riksdagsgrupp Högerpartiet Högerpartiet de konservativa Jordbrukarnas fria grupp Junkerpartiet Kilbomspartiet Kommunistiska partiet Kristdemokraterna Lantmanna- och borgarepartiet inom andrakammaren Lantmannapartiet Lantmannapartiets filial Liberala riksdagspartiet Liberala samlingspartiet Liberalerna Medborgerlig samling (1964–1968) Miljöpartiet Moderaterna Nationella framstegsparti

In [44]:
!python3 train_binary_bert.py --data_path "swerick_subsetdata_party_train.csv" --label_names $label_names_str 

['vänstern', 'Andra kammarens center', 'Andra kammarens frihandelsparti', 'Bondeförbundet', 'Centern (partigrupp 1873-1882)', 'Centern (partigrupp 1885-1887)', 'Centerpartiet', 'Det förenade högerpartiet', 'Ehrenheimska partiet', 'Folkpartiet', 'Folkpartiet (1895–1900)', 'Friesenska diskussionsklubben', 'Frihandelsvänliga centern', 'Frisinnade folkpartiet', 'Frisinnade försvarsvänner', 'Frisinnade landsföreningen', 'Första kammarens konservativa grupp', 'Första kammarens ministeriella grupp', 'Första kammarens minoritetsparti', 'Första kammarens moderata parti', 'Första kammarens nationella parti', 'Första kammarens protektionistiska parti', 'Gamla lantmannapartiet', 'Högerns riksdagsgrupp', 'Högerpartiet', 'Högerpartiet de konservativa', 'Jordbrukarnas fria grupp', 'Junkerpartiet', 'Kilbomspartiet', 'Kommunistiska partiet', 'Kristdemokraterna', 'Lantmanna- och borgarepartiet inom andrakammaren', 'Lantmannapartiet', 'Lantmannapartiets filial', 'Liberala riksdagspartiet', 'Liberala saml

In [4]:
!python3 train_binary_bert.py --model_filename "trained_hugging_face_party_classification" --base_model "finetuning_hugging_whitespace-finetuned-imdb/checkpoint-343500" --data_path "swerick_subsetdata_party_train.csv"

[32m14:31:33 [INFO] [37m(train-bert)[0m: Load and save tokenizer...[0m
[32m14:31:33 [INFO] [37m(train-bert)[0m: Preprocess datasets...[0m
[32m14:31:34 [INFO] [37m(train-bert)[0m: Labels: tensor([8, 8, 8, 8, 8, 3, 3, 3, 4, 3, 3, 8, 3, 3, 3, 3, 8, 3, 1, 8, 8, 8, 6, 8,
        8, 8, 8, 8, 3, 3, 3, 8, 2, 3, 8, 3, 3, 3, 8, 8, 8, 8, 3, 3, 8, 8, 3, 2,
        3, 8, 8, 8, 8, 8, 8, 3, 6, 8, 3, 3, 8, 3, 8, 3, 8, 8, 8, 3, 4, 8, 8, 8,
        2, 3, 8, 3, 3, 8, 3, 3, 8, 2, 8, 8, 3, 3, 8, 3, 8, 8, 2, 3, 7, 3, 4, 8,
        8, 8, 8, 3, 8, 8, 8, 3, 3, 8, 8, 8, 8, 8, 3, 3, 3, 8, 3, 4, 3, 3, 6, 8,
        4, 8, 3, 3, 8, 8, 8, 3, 3, 8, 8, 3, 3, 8, 8, 3, 6, 8, 3, 8, 8, 8, 8, 8,
        8, 8, 8, 8, 4, 8, 4, 8, 8, 3, 3, 8, 3, 8, 3, 8, 8, 3, 3, 8, 3, 7, 8, 8,
        8, 3, 8, 3, 8, 4, 3, 2, 8, 8, 3, 3, 3, 8, 3, 2, 8, 8, 7, 8, 8, 2, 4, 8,
        3, 8, 8, 8, 3, 8, 8, 8, 3, 3, 7, 8, 3, 3, 8, 8, 3, 8, 3, 8, 3, 3, 4, 8,
        3, 8, 8, 8, 3, 8, 8, 3, 3, 3, 8, 8, 2, 8, 8, 2, 8, 8, 3, 8, 1, 6, 8, 8,
  