In [2]:
import json
import pandas as pd
import numpy as np
import re
from sklearn.metrics import accuracy_score, f1_score, precision_score
from collections import Counter

In [None]:
with open('/g100_work/IscrC_mental/data/user_classification/M3_results_de_bio_image.json') as f:
    r = json.load(f)
r

In [10]:
test_set = pd.read_pickle('/g100_work/IscrC_mental/data/user_classification/german_data/data_for_models_german_data.pkl')
german_all = pd.read_pickle('/g100_work/IscrC_mental/data/user_classification/trained_models/de/all_models_test.pkl')
xlm_de = pd.read_pickle('/g100_work/IscrC_mental/data/user_classification/trained_models/de/xlm_test.pkl')

m3_label_dict = {0:'0_18',1:'19_29',2:'30_39',3:'40_100'}

def json_to_df(data_json):
    rows = []
    for user_id, data in data_json.items():
        row = {}
        row['user_id'] = user_id
        row['m3_prob_female'] = data['gender'].get('female', 0)
        row['m3_prob_male'] = data['gender'].get('male', 0)
        age_classes = list(data['age'].values())
        for idx, age_value in enumerate(age_classes):
            row[f'm3_prob_{m3_label_dict[idx]}'] = age_value
        rows.append(row)
    df = pd.DataFrame(rows)
    df['user_id'] = df['user_id'].astype('float64')
    return df

with open('/g100_work/IscrC_mental/data/user_classification/M3_results_de_bio_image.json') as f:
    r = json.load(f)
m3_de = json_to_df(r)

german_all.user_id = german_all.user_id.astype('float')
xlm_de.user_id = xlm_de.user_id.astype('float')
m3_de.user_id = m3_de.user_id.astype('float')

print(xlm_de.shape)
print(m3_de.shape)
print()

xlm_de = german_all.merge(xlm_de, on='user_id', how='inner')
m3_de = german_all.merge(m3_de, on='user_id', how='inner')

print(xlm_de.shape)
print(m3_de.shape)
print()

xlm_de = xlm_de[xlm_de.user_id.isin(m3_de.user_id)]

print(xlm_de.shape)
print(m3_de.shape)
print()

test_set = test_set[test_set.user_id.isin(m3_de.user_id)]

print(test_set.shape)


(606, 7)
(420, 7)

(388, 21)
(384, 21)

(384, 21)
(384, 21)

(384, 5)


In [11]:
def twitter_features(
    df,
    include_bio=True,
    include_tweets=True,
    label_name='is_male',
    ):
    # Read the gold labels
    if label_name == 'is_male':
        gold_labels = df[label_name].tolist()
        gold_labels = ['male' if label == True else 'female' for label in gold_labels]
    if label_name == 'age':
        gold_labels = df[label_name].astype(int).tolist()
    if label_name == 'age_interval':
        # define age classes
        age_intervals = [0, 20, 30, 40, 100]
        age_labels = [0, 1, 2, 3]
        # Discretize the 'age' column into four classes
        gold_labels = pd.cut(df['age'], bins=age_intervals, labels=age_labels, right=False).astype('int').tolist()

    return _, gold_labels

In [12]:
def aggregate_performace(p1, p2, gold_labels, class_means=[]):
    
    gold_labels = np.array(gold_labels)

    # compute predictions
    pred1 = p1.argmax(axis=1) 
    pred2 = p2.argmax(axis=1)
    
    print(Counter(pred1))
    print(Counter(pred2))
    
    # assign majority class as default label
    majority_class = Counter(gold_labels).most_common()[0][0]
    rows_with_same_p1=np.where(np.all(p1 == 1/len(np.unique(gold_labels)), axis=1))[0]
    rows_with_same_p2=np.where(np.all(p2 == 1/len(np.unique(gold_labels)), axis=1))[0]
    if rows_with_same_p1.size > 0:
        print(f'{rows_with_same_p1.size} system1 predictions substituted with majority_class')
        pred1[rows_with_same_p1] = majority_class
    if rows_with_same_p2.size > 0:
        print(f'{rows_with_same_p2.size} system2 predictions substituted with majority_class')
        pred2[rows_with_same_p2] = majority_class
    
    print('**********')

    # compute acc of models 1 and 2
    acc1 = accuracy_score(gold_labels, pred1)
    acc2 = accuracy_score(gold_labels, pred2)

    # compute f1 of models 1 and 2
    f11 = f1_score(gold_labels, pred1, average=None)
    f12 = f1_score(gold_labels, pred2, average=None)
    
    
    if len(class_means) > 0:
        total_error1 = 0
        total_error2 = 0
        for i in range(len(gold_labels)):
            error1 = abs(class_means[pred1[i]] - class_means[gold_labels[i]])
            error2 = abs(class_means[pred2[i]] - class_means[gold_labels[i]])
            total_error1 += error1
            total_error2 += error2
        mean_error1 = total_error1 / len(gold_labels)
        mean_error2 = total_error2 / len(gold_labels)

    print('System 1')
    print('Ac:', acc1*100)
    print('F1:', f11.mean()*100)
    if len(class_means) > 0:
        print('MAE:', mean_error1)
    print('----------')
    print('System 2')
    print('Ac:', acc2*100)
    print('F1:', f12.mean()*100)
    if len(class_means) > 0:
        print('MAE:', mean_error2)
    print('----------')

    # compute aggregated predictions
    p_agg_1 = p1 + p2
    pred_agg_1 = p_agg_1.argmax(axis=1)
    acc_agg_1 = accuracy_score(gold_labels, pred_agg_1)
    f1agg_1 = f1_score(gold_labels, pred_agg_1, average=None)
    if len(class_means) > 0:
        total_error = 0
        for i in range(len(gold_labels)):
            error = abs(class_means[pred_agg_1[i]] - class_means[gold_labels[i]])
            total_error += error
        mean_error = total_error / len(gold_labels)
    print('Avg prediction system ')
    print('Ac:', acc_agg_1*100)
    print('F1:', f1agg_1.mean()*100)
    if len(class_means) > 0:
        print('MAE:', mean_error)
    print('----------')
    
    p_agg_2 = f11.mean()*p1 + f12.mean()*p2
    pred_agg_2 = p_agg_2.argmax(axis=1)
    acc_agg_2 = accuracy_score(gold_labels, pred_agg_2)
    f1agg_2 = f1_score(gold_labels, pred_agg_2, average=None)
    if len(class_means) > 0:
        total_error = 0
        for i in range(len(gold_labels)):
            error = abs(class_means[pred_agg_2[i]] - class_means[gold_labels[i]])
            total_error += error
        mean_error = total_error / len(gold_labels)
    print('F1mean-weighted prediction system ')
    print('Ac:', acc_agg_2*100)
    print('F1:', f1agg_2.mean()*100)
    if len(class_means) > 0:
        print('MAE:', mean_error)
    print('----------')

    p_agg_3 = f11*p1 + f12*p2
    pred_agg_3 = p_agg_3.argmax(axis=1)
    acc_agg_3 = accuracy_score(gold_labels, pred_agg_3)
    f1agg_3 = f1_score(gold_labels, pred_agg_3, average=None)
    if len(class_means) > 0:
        total_error = 0
        for i in range(len(gold_labels)):
            error = abs(class_means[pred_agg_3[i]] - class_means[gold_labels[i]])
            total_error += error
        mean_error = total_error / len(gold_labels)
    print('F1-weighted prediction system ')
    print('Ac:', acc_agg_3*100)
    print('F1:', f1agg_3.mean()*100)
    if len(class_means) > 0:
        print('MAE:', mean_error)
    print('----------')
    
    return pred_agg_1, pred_agg_2, pred_agg_3

# DE gender XLM+M3

In [20]:
# Assuming xlm_ita_gender is a Pandas Series or NumPy array
p1 = np.array(xlm_de[['prob_male','prob_female']])
p2 = np.array(m3_de[['m3_prob_male','m3_prob_female']])

gold_labels = (~(test_set.is_male).astype(bool)).astype(int)

gender_pred_agg_avg, gender_pred_agg_f1mean, _ = aggregate_performace(p1, p2, gold_labels)

Counter({0: 290, 1: 94})
Counter({0: 365, 1: 19})
**********
System 1
Ac: 81.77083333333334
F1: 74.2014742014742
----------
System 2
Ac: 79.94791666666666
F1: 56.10907417578339
----------
Avg prediction system 
Ac: 84.375
F1: 75.39302802460699
----------
F1mean-weighted prediction system 
Ac: 83.59375
F1: 74.7808229174268
----------
F1-weighted prediction system 
Ac: 85.15625
F1: 69.98642478094533
----------


# DE age XLM+M3

In [25]:
# Assuming xlm_ita_gender is a Pandas Series or NumPy array
p1 = np.array(xlm_de[['prob_0_18', 'prob_19_29', 'prob_30_39', 'prob_40_100']])
p2 = np.array(m3_de[['m3_prob_0_18','m3_prob_19_29','m3_prob_30_39','m3_prob_40_100']])

_ , gold_labels = twitter_features(test_set, label_name='age_interval')

age_pred_agg_avg, age_pred_agg_f1mean, _ = aggregate_performace(
    p1,
    p2,
    gold_labels
)

Counter({3: 195, 1: 107, 0: 51, 2: 31})
Counter({0: 233, 1: 63, 3: 54, 2: 34})
**********
System 1
Ac: 56.25
F1: 43.65400144936413
----------
System 2
Ac: 34.11458333333333
F1: 27.36473214285714
----------
Avg prediction system 
Ac: 60.416666666666664
F1: 49.0437030226263
----------
F1mean-weighted prediction system 
Ac: 58.854166666666664
F1: 48.13972978652393
----------
F1-weighted prediction system 
Ac: 62.239583333333336
F1: 43.78739730830527
----------


In [27]:
age_pred_agg_avg

array([3, 0, 1, 3, 3, 3, 3, 2, 3, 0, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 0,
       1, 3, 1, 3, 3, 3, 1, 1, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 1, 2, 3, 1,
       3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 0, 3, 3, 3, 0, 3, 3,
       3, 3, 3, 3, 3, 2, 3, 3, 3, 0, 3, 3, 3, 3, 1, 1, 1, 2, 1, 1, 3, 3,
       3, 3, 3, 3, 0, 3, 2, 2, 1, 2, 2, 1, 3, 1, 2, 2, 3, 3, 1, 2, 0, 1,
       3, 2, 3, 3, 1, 3, 2, 1, 3, 3, 3, 3, 3, 3, 2, 1, 2, 1, 3, 1, 1, 3,
       1, 1, 3, 0, 1, 0, 0, 3, 3, 0, 3, 1, 3, 3, 0, 3, 3, 1, 1, 3, 3, 3,
       3, 1, 3, 1, 1, 3, 1, 1, 3, 1, 3, 3, 3, 3, 0, 3, 3, 3, 1, 0, 3, 3,
       0, 3, 3, 3, 3, 3, 0, 1, 0, 3, 0, 3, 3, 3, 0, 3, 0, 3, 3, 3, 1, 3,
       3, 3, 0, 3, 3, 1, 3, 2, 3, 3, 3, 3, 3, 0, 3, 1, 3, 3, 1, 3, 1, 3,
       1, 3, 1, 3, 1, 0, 3, 3, 1, 0, 0, 0, 1, 1, 1, 0, 1, 3, 0, 0, 0, 3,
       3, 0, 0, 0, 2, 0, 0, 3, 1, 0, 3, 0, 1, 3, 1, 3, 0, 0, 1, 1, 3, 3,
       0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 0, 0, 0, 3, 3, 1, 1, 2, 0, 0,
       1, 1, 1, 1, 0, 0, 1, 3, 3, 1, 1, 3, 1, 1, 1,

In [30]:
pd.DataFrame(
    {
        "user_id": test_set.user_id,
        "gender_avg_agg": gender_pred_agg_avg,
        "gender_f1_agg": gender_pred_agg_f1mean,
        "age_avg_agg": age_pred_agg_avg,
        "age_f1_agg": age_pred_agg_f1mean
    }
).to_pickle('/g100_work/IscrC_mental/data/user_classification/trained_models/de/xlm_m3_untrained_agg_test.pkl')