In [85]:
import pandas as pd
import numpy as np
import ast
from ftfy import fix_and_explain, apply_plan, fix_text, fix_encoding_and_explain, fix_encoding
from typing import List
from pathlib import Path
from tweets_cleaning import ekphrasis_clean
from confusion_matrix_utils import plot_confusion_matrix
from math import isnan

from IPython.display import display

In [154]:
files_path = Path('../labels/raw')
base_models_path = Path('/media/discoD/World_Bank/Nigeria/hate_speech')
multi_label_files = ['haaya_tweets_round_2_labeled.csv', 
                     'ibrahim_tweets_multi_labeled.csv', 
                     'manu_tweets_round_2_labelled.csv']
fourth_path = base_models_path / 'fourth_test'

In [164]:
def load_multilabel_annotations(source_path: Path, clean: bool, drop_labels: bool = True) -> pd.DataFrame:
    
    multi_label_columns = ['christian|christians', 'muslim|muslims|islam|islamic', 
                           'northern|northerner|northerners|arewa|almajiri', 'southern|southerner|southerners', 
                           'hausa|hausas', 'fulani|fulanis', 'yoruba|yorubas', 'igbo|ibo|ibos|igbos', 
                           'women|woman|girl|girls|female|females', 
                           'lgbt|lgbtq|lgbtq+|gay|gays|lesbian|lesbians|transgender|transgenders', 
                           'herdsmen|herdsman', 'eastern|easterner|easterners|biafra']
    use_cols = ['text', 'class', 'index'] + multi_label_columns
    
    dataframe = pd.read_csv(source_path, lineterminator='\n', usecols=use_cols)
    dataframe = dataframe.fillna(0)
    dataframe['original_file'] = source_path
    
    if clean:
        dataframe['raw_text'] = dataframe['text']
        dataframe['text'] = np.vectorize(ekphrasis_clean)(dataframe['text'], False)
    
    raw_len = len(dataframe)
    print(f'Loaded {len(dataframe)} from {source_path}')
    dropped = len(dataframe[dataframe["class"].isnull()])
    unsures = len(dataframe[dataframe["class"] == 3])
    dataframe = dataframe[dataframe["class"].isin([0,1,2])]
    try:
        assert (dropped + unsures + len(dataframe)) == raw_len
    except AssertionError as ae:
        print(f'Empty: {dropped}, Unsure: {unsures}, Remaining: {len(dataframe)}, Raw: {raw_len}')
        raise ae
    print(f'Dropping {dropped} empties from {source_path}')
    print(f'Dropping {unsures} unsures from {source_path}')

    dataframe["labels"] = dataframe.apply(lambda row: ','.join([str(label) for label in row[multi_label_columns].tolist()]), axis=1)
    if drop_labels:
        for column in multi_label_columns:
            del dataframe[column]
#     del dataframe['class']
    
    print(dataframe.columns)
    
    return dataframe

def load_multilabel_annotations_from_folder(files_folder: Path, output_path: Path, version: int, 
                                            clean: bool, drop_labels: bool = True) -> pd.DataFrame:
    dataframes = []
    output_path.mkdir(exist_ok=True)
    for file in multi_label_files:
        dataframes.append(load_multilabel_annotations(source_path=files_folder / file, clean=clean, 
                                                      drop_labels=drop_labels))
    full_df = pd.concat(dataframes, ignore_index=True)
    cleaned_str = '_cleaned' if clean else ''
    output_name = output_path / f'multilabels{cleaned_str}_v{version}.tsv'
    full_df.to_csv(output_name, index=None, sep='\t')
    print(f'Kept {len(full_df)} labels for training and testing.')
    return full_df

def load_annotations(output_path: Path, version: int, clean: bool) -> pd.DataFrame:
    dataframes = []
    output_path.mkdir(exist_ok=True)
    total_unsure = 0
    total_empty = 0
    total_raw = 0
    for file in files_path.glob("*.csv"):
        file_path = str(file)
        print(file_path)
        class_column = 'second_class' if 'niyati' in file_path else 'class'
        use_cols = ['text', class_column, 'index']
        try:
            dataframe = pd.read_csv(file_path, usecols=use_cols, header=0, lineterminator='\n')
        except UnicodeDecodeError:
            dataframe = pd.read_csv(file_path, usecols=use_cols, header=0, lineterminator='\n', encoding='iso-8859-1')
        dataframe = dataframe.rename({class_column: 'labels'}, axis=1)
        dataframe['original_file'] = file_path
        if clean:
            dataframe['raw_text'] = dataframe['text']
            dataframe['text'] = np.vectorize(ekphrasis_clean)(dataframe['text'], False)
        raw_len = len(dataframe)
        print(f'Loaded {len(dataframe)} from {file_path}')
        total_raw += len(dataframe)
        dropped = len(dataframe[dataframe["labels"].isnull()])
        unsures = len(dataframe[dataframe["labels"] == 3])
        total_unsure += unsures
        total_empty += dropped
        dataframe = dataframe[dataframe["labels"].isin([0,1,2])]
        try:
            assert (dropped + unsures + len(dataframe)) == raw_len
        except AssertionError as ae:
            print(f'Empty: {dropped}, Unsure: {unsures}, Remaining: {len(dataframe)}, Raw: {raw_len}')
            raise ae
        print(f'Dropping {dropped} empties from {file_path}')
        print(f'Dropping {unsures} unsures from {file_path}')
        dataframes.append(dataframe)
    full_df = pd.concat(dataframes, ignore_index=True)
    if clean:
        output_name = output_path / f'labels_cleaned_v{version}.csv'
    else:
        output_name = output_path / f'labels_v{version}.csv'
    full_df.to_csv(output_name, index=None)
    print(f'Dropped a total of {total_unsure} unsures and {total_empty} empty labels, keeping {len(full_df)} out of {total_raw} labels.')
    return full_df

def plot_scores_confusion_matrix(input_path: str, output_name: str):
    eval_scores = pd.read_csv(input_path)
    gold = eval_scores['labels']
    predicted = []
    for idx, row in eval_scores.iterrows():
        scores = ast.literal_eval(row['score'])
        predicted.append(np.argmax(scores))
    plot_confusion_matrix(gold, predicted, output_path=f'{output_name}.pdf', none_class=None, size=16)
    
def get_predicted_label(row) -> float:
    return np.argmax(ast.literal_eval(row['score']))

def load_scores(input_path: str) -> pd.DataFrame:
    eval_scores = pd.read_csv(input_path)
    eval_scores['predicted'] = eval_scores.apply(lambda row: get_predicted_label(row), axis=1)
    return eval_scores

def display_full(dataframe: pd.DataFrame):
    pd.set_option("display.max_colwidth", None)
    pd.set_option("display.max_columns", None)
    display(dataframe)
    pd.reset_option("display.max_colwidth")
    pd.reset_option("display.max_columns")

def display_text(dataframe: pd.DataFrame, index: int):
    display_full(dataframe.iloc[index]["text"])
    
def bundle_labels(dataframe: pd.DataFrame, label_list: List, output_path: Path) -> pd.DataFrame:
    bundled_df = dataframe.copy()
    bundled_df['labels'] = bundled_df.apply(lambda row: 1 if row['labels'] in label_list else 0, axis = 1)
    bundled_df.to_csv(output_path, index=None)
    return bundled_df

def dump_prediction_errors(scores_path: Path, output_path: str):
    scores_df = load_scores(scores_path / 'scores.csv')
    errors_df = scores_df[scores_df['predicted'] != scores_df['labels']]
    output_cols = ['labels', 'raw_text', 'score', 'predicted'] if 'cleaned' in str(scores_path) else ['labels', 'text', 'score', 'predicted']
    display_full(errors_df[output_cols])
    errors_df.to_csv(output_path, line_terminator='\n', index=None)

In [166]:
ml_df = pd.read_csv(fourth_path / 'multilabels_v4.tsv', sep='\t')
# ml_df['labels'] = ml_df.apply(lambda row: ast.literal_eval(row['labels']), axis=1) 
ml_df

Unnamed: 0,index,class,text,christian|christians,muslim|muslims|islam|islamic,northern|northerner|northerners|arewa|almajiri,southern|southerner|southerners,hausa|hausas,fulani|fulanis,yoruba|yorubas,igbo|ibo|ibos|igbos,women|woman|girl|girls|female|females,lgbt|lgbtq|lgbtq+|gay|gays|lesbian|lesbians|transgender|transgenders,herdsmen|herdsman,eastern|easterner|easterners|biafra,original_file,labels
0,420,0,RT @DrOlufunmilayo: What #EndSARS is NOT:\nIt ...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,../labels/raw/haaya_tweets_round_2_labeled.csv,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0"
1,104,2,@MaziNnamdiKanu Aturu hausa. I thought you sai...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,../labels/raw/haaya_tweets_round_2_labeled.csv,"1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0"
2,226,2,@channelstv @sunrisedailynow @chamberlainusoh ...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,../labels/raw/haaya_tweets_round_2_labeled.csv,"0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0"
3,66,2,Your one of the lazy almajiris Buhari talked a...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,../labels/raw/haaya_tweets_round_2_labeled.csv,"0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0"
4,54,2,@olise_c @USEmbassyAbuja @Mazianozie @NigAirFo...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,../labels/raw/haaya_tweets_round_2_labeled.csv,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
704,78,2,@adamugarba Some of u the educated northerners...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,../labels/raw/manu_tweets_round_2_labelled.csv,"0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0"
705,540,2,"“When it comes to relationships, Yoruba boys a...",0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,../labels/raw/manu_tweets_round_2_labelled.csv,"0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0"
706,168,1,@gloria_adagbon @MBuhari @OlumideIDOWU Lol... ...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,../labels/raw/manu_tweets_round_2_labelled.csv,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0"
707,26,0,@Nazirdanhajiya It happens all the time why do...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,../labels/raw/manu_tweets_round_2_labelled.csv,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0"


In [165]:
all_ml_df = load_multilabel_annotations_from_folder(files_folder=files_path, output_path=fourth_path, 
                                                    version=4, clean=False, drop_labels=False)
all_ml_clean_df = load_multilabel_annotations_from_folder(files_folder=files_path, output_path=fourth_path, 
                                                    version=4, clean=True, drop_labels=False)

Loaded 279 from ../labels/raw/haaya_tweets_round_2_labeled.csv
Dropping 0 empties from ../labels/raw/haaya_tweets_round_2_labeled.csv
Dropping 56 unsures from ../labels/raw/haaya_tweets_round_2_labeled.csv
Index(['index', 'class', 'text', 'christian|christians',
       'muslim|muslims|islam|islamic',
       'northern|northerner|northerners|arewa|almajiri',
       'southern|southerner|southerners', 'hausa|hausas', 'fulani|fulanis',
       'yoruba|yorubas', 'igbo|ibo|ibos|igbos',
       'women|woman|girl|girls|female|females',
       'lgbt|lgbtq|lgbtq+|gay|gays|lesbian|lesbians|transgender|transgenders',
       'herdsmen|herdsman', 'eastern|easterner|easterners|biafra',
       'original_file', 'labels'],
      dtype='object')
Loaded 240 from ../labels/raw/ibrahim_tweets_multi_labeled.csv
Dropping 0 empties from ../labels/raw/ibrahim_tweets_multi_labeled.csv
Dropping 0 unsures from ../labels/raw/ibrahim_tweets_multi_labeled.csv
Index(['index', 'class', 'text', 'christian|christians',
    

In [150]:
all_ml_df = load_multilabel_annotations_from_folder(files_folder=files_path, output_path=fourth_path, 
                                                    version=4, clean=False, join_labels=False)
all_ml_clean_df = load_multilabel_annotations_from_folder(files_folder=files_path, output_path=fourth_path, 
                                                    version=4, clean=True, join_labels=False)

Loaded 279 from ../labels/raw/haaya_tweets_round_2_labeled.csv
Dropping 0 empties from ../labels/raw/haaya_tweets_round_2_labeled.csv
Dropping 55 unsures from ../labels/raw/haaya_tweets_round_2_labeled.csv
Index(['index', 'class', 'text', 'original_file', 'labels'], dtype='object')
Loaded 240 from ../labels/raw/ibrahim_tweets_multi_labeled.csv
Dropping 0 empties from ../labels/raw/ibrahim_tweets_multi_labeled.csv
Dropping 0 unsures from ../labels/raw/ibrahim_tweets_multi_labeled.csv
Index(['index', 'class', 'text', 'original_file', 'labels'], dtype='object')
Loaded 279 from ../labels/raw/manu_tweets_round_2_labelled.csv
Dropping 0 empties from ../labels/raw/manu_tweets_round_2_labelled.csv
Dropping 33 unsures from ../labels/raw/manu_tweets_round_2_labelled.csv
Index(['index', 'class', 'text', 'original_file', 'labels'], dtype='object')
Kept 710 labels for training and testing.
Loaded 279 from ../labels/raw/haaya_tweets_round_2_labeled.csv
Dropping 0 empties from ../labels/raw/haaya_twe

In [152]:
all_ml_df['class'].value_counts()

2    333
0    250
1    127
Name: class, dtype: int64

In [75]:
all_ml_df.iloc[0]['labels'], len(all_ml_df.iloc[0]['labels'])

([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 12)

In [96]:
df = pd.read_csv(fourth_path / 'multilabels_v4.csv')
df['labels'] = df.apply(lambda row: ast.literal_eval(row['labels']), axis=1)
num_labels = len(df.iloc[0]['labels'])
train_df = df.sample(frac=0.7, random_state=0)
eval_df = df.drop(train_df.index).reset_index(drop=True)

In [102]:
eval_df['labels'].to_list()

[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0

In [107]:
_list = eval_df['labels'].to_list()[4]
np.argwhere(_list == np.amax(_list))

array([[8]])

In [127]:
[element[0] for element in np.argwhere(np.array(_list) == 1)]

[8]

In [128]:
_labels = []
for eval_labels in eval_df['labels'].to_list():
#     print(eval_labels)
#     print(np.amax(eval_labels))
#     print(np.argwhere(eval_labels == 1))
    _labels.append([element[0] for element in np.argwhere(np.array(eval_labels) == 1).tolist()])
_labels

[[],
 [],
 [],
 [],
 [8],
 [],
 [],
 [5, 10],
 [],
 [5, 10],
 [],
 [],
 [],
 [],
 [3],
 [],
 [],
 [2, 5],
 [5, 10],
 [5, 10],
 [],
 [],
 [],
 [5],
 [8],
 [],
 [],
 [],
 [],
 [4],
 [1, 5, 10],
 [8],
 [],
 [],
 [],
 [],
 [],
 [5, 10],
 [5, 10],
 [7],
 [5, 10],
 [],
 [5, 10],
 [5, 10],
 [],
 [7],
 [7, 8],
 [],
 [],
 [],
 [],
 [],
 [7],
 [8, 9],
 [],
 [1],
 [1],
 [],
 [],
 [],
 [2],
 [],
 [],
 [1, 2],
 [],
 [2],
 [],
 [8],
 [5],
 [],
 [],
 [],
 [],
 [],
 [6],
 [],
 [2],
 [],
 [1],
 [1],
 [2],
 [],
 [],
 [5],
 [5],
 [2],
 [],
 [2],
 [5],
 [],
 [5],
 [2],
 [4],
 [],
 [6],
 [],
 [],
 [8],
 [],
 [7],
 [],
 [],
 [],
 [10],
 [],
 [],
 [],
 [],
 [],
 [4],
 [],
 [7],
 [4],
 [],
 [],
 [],
 [],
 [8],
 [1],
 [2],
 [7],
 [],
 [],
 [],
 [2],
 [2],
 [],
 [],
 [],
 [5],
 [11],
 [],
 [7],
 [],
 [],
 [],
 [6],
 [],
 [6],
 [],
 [7],
 [],
 [],
 [6],
 [],
 [1],
 [0],
 [0],
 [],
 [],
 [5],
 [5],
 [],
 [],
 [5, 10],
 [5, 6],
 [],
 [],
 [],
 [],
 [],
 [],
 [6],
 [4],
 [4, 5],
 [],
 [],
 [],
 [4, 5],
 [4],
 [],
 

In [147]:
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import f1_score, precision_recall_fscore_support, label_ranking_average_precision_score

mlb = MultiLabelBinarizer()

In [137]:
mlb.fit_transform(eval_df['labels'].to_list())

array([[1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 1],
       [1, 0],
       [1, 0],
       [1, 1],
       [1, 0],
       [1, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 1],
       [1, 0],
       [1, 0],
       [1, 1],
       [1, 1],
       [1, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 1],
       [1, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 1],
       [1, 1],
       [1, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 1],
       [1, 1],
       [1, 1],
       [1, 1],
       [1, 0],
       [1, 1],
       [1, 1],
       [1, 0],
       [1, 1],
       [1, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 1],
       [1, 1],
       [1, 0],
       [1, 1],
       [1, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 1],
       [1, 0],
       [1, 0],
       [1, 1],
       [1, 0],
       [1, 1],
       [1,

In [130]:
_mlb_labels = mlb.fit_transform(_labels)
_mlb_labels.shape

(213, 12)

In [131]:
for _mlb_label in _mlb_labels:
    print(_mlb_label)

[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 1 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 1 0 0 0 0 1 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 1 0 0 0 0 1 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 1 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 1 0 0 1 0 0 0 0 0 0]
[0 0 0 0 0 1 0 0 0 0 1 0]
[0 0 0 0 0 1 0 0 0 0 1 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 1 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 1 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 1 0 0 0 0 0 0 0]
[0 1 0 0 0 1 0 0 0 0 1 0]
[0 0 0 0 0 0 0 0 1 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 1 0 0 0 0 1 0]
[0 0 0 0 0 1

In [138]:
y_true = [[1,2,0,1], [0,4], [3], [1,2]]
y_pred = [[1,1,0,1], [1,4], [2], [1,3]]

In [140]:
actual = mlb.fit_transform(y_true)
pred = mlb.fit_transform(y_pred)

In [141]:
print(actual)

[[1 1 1 0 0]
 [1 0 0 0 1]
 [0 0 0 1 0]
 [0 1 1 0 0]]


In [142]:
print(pred)

[[1 1 0 0 0]
 [0 1 0 0 1]
 [0 0 1 0 0]
 [0 1 0 1 0]]


In [144]:
f1_score(actual, pred, average = "samples")

0.45

In [146]:
precision_recall_fscore_support(
        y_true=actual,
        y_pred=pred,
        average='samples')

(0.5, 0.41666666666666663, 0.45, None)

In [148]:
label_ranking_average_precision_score(y_true, y_pred)

  array = np.asarray(array, order=order, dtype=dtype)
  y_true = check_array(y_true, ensure_2d=False)


ValueError: Unable to convert array of bytes/strings into decimal numbers with dtype='numeric'