## Setup

### Imports

In [None]:
import pandas as pd
from tqdm import tqdm
from sklearn.dummy import DummyClassifier

tqdm.pandas()

import sys
sys.path.append('../src/')
from models.classification_methods import process_classification 

### definitions

In [None]:
model_name = 'neuralmind/bert-base-portuguese-cased'

In [None]:
random_seed = 42

In [None]:
path_raw_data = '../data/raw/'
path_processed_data = '../data/processed/'
path_results_cr = '../reports/classification_reports/'
path_test_results = '../reports/test_results/'

In [None]:
list_target = ['ig','bo', 'cl', 'co', 'gl', 'lu']

## Classification

In [None]:
classifier_name = 'dummy'

### Top mentioned timelines

In [None]:
corpus_name = 'top_mentioned_timelines'

In [None]:
# create a list of tuples with (data_train, data_test, target)

list_tuples_top_ment = []

for target in tqdm(list_target):
    
    path_data_train = path_raw_data + f'train_r3_{target}_top_mentioned_timelines.csv'
    path_data_test = path_raw_data + f'test_r3_{target}_top_mentioned_timelines.csv'

    data_train = pd.read_csv(
        path_data_train, 
        sep = ';', 
        encoding='utf-8-sig'
        )
    data_test = pd.read_csv(
        path_data_test, 
        sep = ';', 
        encoding='utf-8-sig'
        )
    
    list_tuples_top_ment.append((data_train, data_test, target))

In [None]:
# get results

classifier = DummyClassifier()

df_cr, df_test_results = process_classification(
        estimator = classifier,
        data_tuples = list_tuples_top_ment
)

df_cr.to_csv(path_results_cr + f'{classifier_name}_classifier_{corpus_name}_classification_report.csv')
df_test_results.to_csv(path_test_results + f'{classifier_name}_classifier_{corpus_name}_test_results.csv')

df_cr[df_cr['class'] == 'macro avg'].sort_values('f1-score')

### Users

In [None]:
# create a list of tuples with (data_train, data_test, target)

list_tuples_users = []

for target in tqdm(list_target):
    
    path_data_train = path_raw_data + f'r3_{target}_train_users.csv'
    path_data_test = path_raw_data + f'r3_{target}_train_users.csv'

    data_train = pd.read_csv(
        path_data_train, 
        sep = ';', 
        encoding='utf-8-sig'
        )
    data_test = pd.read_csv(
        path_data_test, 
        sep = ';', 
        encoding='utf-8-sig'
        )
    
    list_tuples_users.append((data_train, data_test, target))

#### Timelines

In [None]:
corpus_name = 'users_timeline'

In [None]:
# get results

classifier = DummyClassifier()

df_cr, df_test_results = process_classification(
        estimator = classifier,
        data_tuples = list_tuples_users,
        X_cols=['Timeline']
)

df_cr.to_csv(path_results_cr + f'{classifier_name}_classifier_{corpus_name}_classification_report.csv')
df_test_results.to_csv(path_test_results + f'{classifier_name}_classifier_{corpus_name}_test_results.csv')

df_cr[df_cr['class'] == 'macro avg'].sort_values('f1-score')

#### Stance

In [None]:
corpus_name = 'users_stance'

In [None]:
# get results

classifier = DummyClassifier()

df_cr, df_test_results = process_classification(
        estimator = classifier,
        data_tuples = list_tuples_users,
        X_cols=['Stance']
)

df_cr.to_csv(path_results_cr + f'dummy_classifier_{corpus_name}_classification_report.csv')
df_test_results.to_csv(path_test_results + f'dummy_classifier_{corpus_name}_test_results.csv')

df_cr[df_cr['class'] == 'macro avg'].sort_values('f1-score')