# Semantic Evaluation - Experiment 05
The objective of this trial is to expand the SCA_index (i.e., Semantic Content Analysis Index) to a full word embedding, setting a subjective or objective load for each word.

## Introduction

### Libraries

In [1]:
## Data analysis packages:
import pandas as pd
import numpy as np
from math import isnan  #Verifies if a given value is numerical.

In [2]:
## Visualization packages:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

### Definitions

In [3]:
## Forcing Pandas to display any number of elements
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_seq_items', None)
pd.set_option('display.width', 2000)
pd.set_option('display.max_colwidth', None)

In [4]:
## Based on: https://stackoverflow.com/questions/25351968/how-can-i-display-full-non-truncated-dataframe-information-in-html-when-conver
def print_full(x):
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 2000)
    pd.set_option('display.float_format', '{:20,.2f}'.format)
    pd.set_option('display.max_colwidth', None)
    print(x)
    pd.reset_option('display.max_rows')
    pd.reset_option('display.max_columns')
    pd.reset_option('display.width')
    pd.reset_option('display.float_format')
    pd.reset_option('display.max_colwidth')

## Exploring the SpaCy Word Embeddings: 
Also using Spacy library: https://spacy.io/
> !pip install -U spacy  
> !python -m spacy download en_core_web_sm  
> !python -m spacy download en_core_web_lg

Some instructions on how to use it:  
https://spacy.io/usage/spacy-101

In [5]:
## Importing SpaCy library:
import spacy

# Load English tokenizer, tagger, parser and NER
nlp = spacy.load("en_core_web_lg")




In [6]:
## We check below that this model has 514.157 keys and vectors, respectively.
nlp.meta['vectors']

{'width': 300,
 'vectors': 514157,
 'keys': 514157,
 'name': 'en_vectors',
 'mode': 'default'}

In [7]:
## Again, checking the number of keys.
nlp.vocab.vectors.n_keys

514157

In [9]:
## Getting the word embedding: data (i.e., the matrix containing the vector values for each word)
word_embedding = nlp.vocab.vectors.data

## Verifying the shape of the word embedding matrix:
word_embedding.shape

(514157, 300)

--- 
### Finding the words associated with the embedding:

In [10]:
## Extracting the words associated with each index:
index = nlp.vocab.vectors.keys()
words_associated = [nlp.vocab[i].text for i in index]

In [11]:
## Checking the word in position 514156, wich is "Lahouaiej":
words_associated[514156]

'Lahouaiej'

In [12]:
## Finding the respective row (index) for a given word:
rows = nlp.vocab.vectors.find(keys=["cat", "dog", "Lahouaiej"])
rows

array([  3201,   1147, 514156], dtype=int32)

---
## SCA - Glasgow Norms
* Read the SCA from Glasgow Norms;  
* Import F_s and F_o from the previous study;  
* Train the MLP classifier.

In [15]:
df_factors = pd.read_csv('../data/df_factors.csv', sep=';')
df_factors.head()

Unnamed: 0,words,F_Objectivity,F_Subjectivity,F_Context
0,abattoir,0.512527,0.380603,0.960466
1,abbey,0.714765,0.240456,0.696198
2,abbreviate,0.286952,0.171052,0.767043
3,abdicate,0.144736,0.3843,0.863127
4,abdication,0.167654,0.334086,0.896733


In [22]:
SCA_words = [word for word in df_factors.words]

In [24]:
SCA_embedding_rows =  nlp.vocab.vectors.find(keys=SCA_words)

In [29]:
len(SCA_embedding_rows)

5553

> Separating the SCA-GlasgowNorms data into train and test:

In [94]:
from sklearn.model_selection import train_test_split

# Separar os dados em conjuntos de treino (70%) e teste (30%)
train_df, test_df = train_test_split(df_factors, test_size=0.3, random_state=42)

In [95]:
# Função para criar os conjuntos de treino e resposta
def create_data(dataframe):
    X = {}
    Y = {}
    
    for index, row in dataframe.iterrows():
        word = row['words']
        f_objectivity = row['F_Objectivity']
        f_subjectivity = row['F_Subjectivity']

        if word in nlp.vocab:
            indice = nlp.vocab.strings[word]
            vetor_embedding = word_embedding[index]
            X[word] = vetor_embedding
            Y[word] = {'F_Objectivity': f_objectivity, 'F_Subjectivity': f_subjectivity}

    return pd.DataFrame.from_dict(X, orient='index'), pd.DataFrame.from_dict(Y, orient='index')

In [100]:
# Creating train and test datasets:
X_train, Y_train = create_data(train_df)
X_test, Y_test = create_data(test_df)

# Exibir as dimensões dos conjuntos de treino e teste
print("Train data dimension:")
print("X_train:", X_train.shape)
print("Y_train:", Y_train.shape)

print("\nTest data dimension:")
print("X_test:", X_test.shape)
print("Y_test:", Y_test.shape)

Train data dimension:
X_train: (3288, 300)
Y_train: (3288, 2)

Test data dimension:
X_test: (1392, 300)
Y_test: (1392, 2)


In [101]:
X_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299
airplane,-3.900300,3.93430,0.65395,1.29990,2.83170,1.508200,6.75800,5.364900,-2.36660,-3.08060,7.3809,0.22831,-2.47630,0.017361,-0.68943,-2.390400,-0.060711,-3.533500,1.810800,-2.0253,-1.97780,-1.41780,-1.9544,0.10393,-2.83050,-3.57070,-2.80240,-0.88663,-3.289400,1.66370,5.91670,-0.091764,-2.84720,-5.14470,-2.39620,1.18960,-3.490000,1.823300,4.96860,2.07140,-2.244400,5.36050,-0.95761,4.29600,-0.21680,0.62048,1.73920,-5.99340,-2.89730,3.93680,-2.401700,1.49590,-0.054549,-6.26560,-4.5234,0.53169,1.63590,0.89828,1.03740,2.017600,-2.9216,1.64230,-3.3777,-0.49631,3.28690,2.7791,-2.59510,-3.8540,1.46620,2.8515,4.42560,3.98090,-0.73355,-0.119020,0.89358,0.70556,-2.02380,2.77660,0.26854,4.13280,-5.96310,-1.10990,1.34010,-1.51770,0.316740,-3.018500,-5.17970,-3.0217,-0.04403,0.049687,-1.47610,4.24500,3.33700,-1.33770,5.05770,1.07830,-0.016471,-2.56510,-1.393600,0.63940,1.57260,-0.011109,-1.69850,4.3501,-5.49180,5.04830,-2.20180,-0.41809,-0.78404,-0.66416,1.15280,1.23260,-1.30860,0.31578,-1.19400,3.92370,0.46197,1.06130,-0.319790,-2.38780,2.12210,-1.80540,-1.42700,0.53254,-3.0410,-2.34490,-1.62040,-2.96710,4.63050,0.16084,-4.8255,-0.29363,3.13940,-1.95800,-1.29290,4.14060,-0.038602,1.524500,5.9246,-2.75440,-2.19690,-0.31086,4.74790,-1.76610,-0.22267,-0.300000,-1.75270,-0.050056,-2.55650,1.28360,0.41696,2.88090,0.355010,-0.87239,-1.04280,0.307970,4.1562,-1.008100,-1.14420,-1.09840,-1.91330,-1.54030,-3.62340,0.42826,-5.00650,-0.49866,-4.22030,2.79070,-0.38192,3.9372,-1.97130,3.24010,-0.78560,-0.079338,2.72100,-2.73960,-4.29080,-1.70190,-2.51120,0.14885,-2.91820,-0.40600,6.545700,-3.012900,-0.27173,-0.104410,-5.43240,-1.326000,3.23230,3.2203,1.94580,1.15240,-1.384000,2.16370,-2.54230,2.771200,-0.78878,-3.597100,-0.001075,2.14810,-2.43480,-8.08430,-3.99480,-3.24140,0.19680,1.93320,-1.234800,3.02840,-3.30660,0.92006,0.59873,0.294280,-1.83340,1.32660,1.281800,4.80140,0.95566,-2.15410,-1.82020,-0.559350,-2.19780,-1.86960,-0.83113,-0.29340,-3.46630,-3.49460,-1.11120,1.393900,2.5032,1.39520,0.29229,-2.8486,0.038245,2.59330,0.27597,2.113800,1.97000,5.78300,-7.13120,-1.47090,-1.117400,-0.45468,-1.92740,-1.14920,1.184000,-3.34210,-2.28830,1.93360,3.3205,5.47850,1.66630,-2.19340,-7.69760,1.85860,-0.22897,-2.68230,1.09740,0.35450,0.83341,0.53664,-0.57928,5.3247,1.9213,1.56950,-0.74805,-1.08930,3.24310,4.85570,-7.01540,1.43420,5.40910,-4.71290,-0.548970,-0.41088,0.66925,-2.922000,0.54973,-0.66148,-1.50200,1.3905,2.269300,1.57630,-1.49930,1.29760,3.150300,-3.51840,0.673510,3.10910,-6.3776,2.24180,-0.36937,1.10960,4.207400,1.66340,0.52245,3.5314,0.79304,0.11406,-5.10790,1.122000
coarse,2.042700,-2.06910,-1.59100,0.42295,3.51720,-0.042939,1.57450,5.945900,-7.09320,-3.81480,11.7570,4.35480,-3.72050,4.313600,0.26884,-0.443220,4.132300,1.703300,-3.872900,1.4727,-1.43550,0.94668,-3.8571,1.27040,-4.54170,-2.14030,1.29390,-1.74510,-5.526400,3.00230,2.44820,2.639000,-1.52310,2.31340,1.35940,-0.54864,2.054000,-0.170160,3.40220,-0.78957,-3.454300,0.54014,-0.11914,0.67721,-1.32260,4.33330,6.49880,-1.92460,1.47590,-0.98074,2.269100,2.51920,-0.378430,-6.23930,-1.1834,3.27730,-4.31950,2.05230,0.73059,-4.089200,4.8110,3.82850,-3.8583,0.64311,3.03840,6.5163,-2.40190,-1.0276,0.66792,2.5126,-1.84300,1.45240,-2.50840,-1.587100,-2.47370,1.26340,-2.13820,2.62900,-2.79220,3.31980,-5.65540,-2.47220,2.41710,2.82290,0.674250,1.683500,0.62230,-3.0518,4.01130,3.282600,-4.33350,-0.99384,1.03250,-3.25910,2.63790,-2.88770,4.076400,-1.01440,-3.144600,-2.40730,2.25510,1.203100,1.55070,0.4223,2.40900,8.36280,-2.88400,-6.41140,2.49140,-0.91834,3.68570,-0.40304,0.30373,-1.01040,0.70464,1.45450,-2.50800,-1.52900,-3.525500,-2.80110,-2.42530,-1.66710,0.83456,0.26350,1.4257,-6.30400,2.44460,-5.85960,1.55090,-3.20320,-5.7372,2.77660,2.84770,1.31420,-2.46830,1.96310,-1.667000,-0.841440,1.9242,-3.32250,-2.62450,0.91188,-1.65780,-1.32340,2.59840,-2.458200,-0.38224,1.955800,3.28860,4.03430,-3.19270,3.37620,-1.001400,1.33840,-0.95402,0.051455,4.5192,3.340600,-0.18517,-2.15530,-0.11305,-2.00650,-0.32349,0.58643,-3.69460,-4.56340,-3.93610,0.52761,-1.69340,-4.4783,-4.98760,-5.44350,-0.18109,0.848120,3.82640,1.24350,1.86570,2.38550,-0.98788,-3.53360,-0.92577,-1.11740,-0.008842,-7.676500,-1.09620,0.306180,-2.43420,-2.690900,3.95540,-2.2016,-1.29390,-0.75266,-0.081766,-2.01910,2.97740,-1.234600,-6.57250,-0.105870,0.433400,2.35650,-2.98590,-1.05750,-6.06500,-0.93495,8.60080,0.30004,-3.853300,1.28420,3.86150,0.87080,4.07370,-2.319200,-1.82560,3.78550,0.340980,4.11250,-0.72653,-8.58240,0.36828,-0.344450,-2.75520,2.13960,1.99320,-1.99510,0.66178,-4.08820,2.41980,0.577990,3.0488,-0.76900,-1.28570,-3.7659,-2.280100,-0.36999,7.79510,4.220400,4.64980,3.48220,2.90390,2.24390,-1.837500,-5.10100,-0.34775,2.13040,0.525040,-2.94180,-3.59040,-0.61967,1.1622,0.51956,1.90330,-2.51030,-0.48914,-1.99650,1.75590,-0.97327,-0.97464,0.55097,1.86220,0.92131,3.10860,3.0548,1.6227,1.92500,5.10540,1.39620,0.15861,5.90950,-3.23000,-3.91970,-0.68823,-1.85420,1.997700,-1.28010,-2.55400,-0.792680,3.55030,0.47723,2.05990,2.0810,3.945100,1.27120,1.19940,0.56537,7.962800,0.51540,2.638100,1.52530,-1.4806,2.36910,0.48063,-3.19270,-1.133600,-1.53970,-0.97280,-0.2494,8.29990,-3.88090,-5.23350,3.850200
persecution,0.068316,4.34330,-5.55370,4.75970,2.12330,2.133900,0.55151,-0.290250,-4.60460,-0.51456,-1.3425,2.43150,-4.22010,-2.246000,-2.50050,-0.934730,-2.199000,-2.848000,-3.149100,-3.1937,0.42370,-1.09930,-2.1370,-0.28805,-3.81980,-0.40685,0.65904,-1.16870,-0.849820,-0.84344,5.87940,-4.655900,1.95800,-2.65690,-1.80940,-3.31670,0.962420,-1.334600,1.10600,-2.04600,-2.623400,-5.04220,-5.30670,5.77370,-5.86420,5.34060,1.16000,-7.87760,5.77020,4.13320,1.204400,-4.40440,4.653500,-7.24320,2.7926,4.95930,3.69890,-0.61183,-1.70640,3.727400,-2.3768,2.59290,-1.8523,2.23890,1.48830,-3.0840,-5.29610,1.5135,-2.18440,-1.6817,-1.58140,3.41980,-6.36440,3.120600,-2.55450,4.71060,0.44177,-2.04330,-3.85550,-3.92460,-1.88430,-4.03460,1.80540,0.14297,-3.485000,-0.166340,-1.05400,-3.0761,1.08100,2.862600,-5.40030,1.49940,1.97570,-2.19440,3.90870,-3.35410,1.350700,-7.48700,3.647600,4.34480,-3.60490,0.168820,-0.48981,-6.9565,-3.92430,-0.50639,-5.37260,1.76030,1.07730,-11.20400,0.64015,2.05970,1.30070,0.58045,4.67900,5.39410,-3.78570,-0.56545,0.307490,-2.82010,-6.45040,4.05570,-3.63020,-0.18437,3.5357,-4.07680,-4.38920,2.53090,5.57150,1.08600,-2.9182,-4.27100,0.24768,-3.19250,5.33060,3.33170,-5.574900,-6.309700,1.2430,-3.78240,-3.75980,-6.36870,-0.53144,-3.36840,-0.17049,-2.790100,-1.88460,-2.993200,2.90370,2.04970,-1.10960,-3.32500,-2.308300,-3.02970,5.01520,-2.914600,3.6464,-7.413700,0.99681,-0.11761,0.47407,2.19730,-1.58140,1.71370,-0.44272,-2.57820,-4.72310,1.00800,1.99990,3.8906,-3.29840,0.97767,0.15030,3.272700,-0.83230,4.64120,11.09500,2.71180,-1.70500,-2.94950,-9.65910,-1.84710,-0.169500,-4.341600,0.60770,1.699500,-0.91934,0.583280,-1.21130,4.8081,0.72203,2.58620,-1.344100,2.93530,-4.47850,-3.161000,2.21860,-1.001600,2.202600,7.71550,-7.57370,-4.67440,-0.17288,0.82796,-3.90840,-5.25760,-5.441300,2.54460,-1.38050,0.85349,5.67780,3.000000,6.73630,2.55540,1.909700,-0.37607,4.09190,-3.40200,-3.55480,-5.484700,2.26840,0.18169,-7.13010,-0.33965,-0.77849,0.25872,-0.85297,0.044621,-1.2597,2.85650,-0.92115,-1.3267,2.274300,-8.91740,2.32480,-4.756500,-0.15451,6.79650,1.67640,-1.32770,-5.398400,2.16090,2.09240,-6.46170,2.970600,-4.02220,3.16400,3.52370,1.8952,4.20700,-4.48830,-0.73258,-9.39030,2.13960,0.69047,-2.87110,0.48702,-1.60770,3.71610,3.44150,-1.35300,6.3550,-3.6564,-2.69790,1.71680,0.37678,1.80010,1.75520,-6.87900,-5.20130,1.84370,-1.49970,0.378760,-8.52430,-3.86360,5.055000,-11.43600,4.33510,2.22680,2.4529,-5.233000,-0.80374,-2.86740,-6.58180,0.022082,-0.13392,0.076618,-5.05190,-5.9940,1.19410,-5.59980,2.51550,1.125900,0.71647,-3.38270,-1.9534,1.18160,2.24510,-1.91380,-0.074928
moment,-7.688900,0.56799,-2.18350,1.66660,3.30210,4.386700,-1.50380,5.213800,-0.37570,-1.74370,10.0440,5.34510,-4.76740,1.262700,-3.43640,1.718900,-6.290400,-1.052600,0.017089,-4.5076,-6.07950,-0.40140,-3.2510,-1.12350,-0.60938,3.05790,4.26910,2.16110,-0.095159,5.94480,-0.15644,-4.450900,1.51900,-3.21020,0.57536,-0.63007,-0.539930,-3.611300,1.08430,5.00290,-0.922590,-1.74930,-1.78280,2.38270,-2.00990,4.25600,0.96042,2.10830,0.68934,3.80630,0.015108,0.68857,-0.059503,-1.35980,-4.8023,5.69050,-0.26204,2.19090,-4.95260,5.182500,1.4434,1.42350,-3.4782,-4.96180,4.54700,2.3776,-0.75467,-5.8222,2.41730,2.7640,0.95128,1.17240,-2.86990,-1.333300,-1.73670,2.04840,-3.41090,2.23210,0.13952,3.92540,-3.27930,-1.77760,1.81430,3.91730,1.009300,4.215100,-3.18600,-5.0629,0.21069,1.450700,-1.55130,2.18650,0.15176,-4.35780,0.38870,1.04260,-1.201900,-3.75440,-1.983300,4.67910,4.79400,-3.120700,3.17340,4.5507,-4.51150,3.74700,3.84090,0.06178,-1.98630,1.69060,3.45130,1.17290,0.31355,0.77013,-0.25839,1.62780,-2.34050,0.59517,-2.287300,-0.10548,-0.44599,-3.49730,-1.89050,-1.83430,-3.4326,-1.34580,3.00660,2.51650,0.86985,0.53212,-2.3949,-3.80500,5.79310,0.82710,-2.68900,4.96250,-0.829510,3.317300,-4.8832,-1.71710,1.20790,5.58890,5.92850,0.85683,-2.41980,0.036704,-1.18760,1.106500,2.89530,4.41970,0.55039,4.41070,-0.722820,3.24640,2.12640,5.697500,3.6839,0.086178,-4.64510,-5.53280,1.59800,-0.32722,-0.99592,3.54220,-2.15680,-1.35580,0.33573,-1.81460,1.97760,-2.6072,-0.49377,1.00410,1.41860,-1.689600,2.33870,1.64520,-4.03310,2.86770,2.13270,-0.59078,0.11446,1.37830,-1.147000,0.998700,-3.86880,-3.285600,0.99650,3.026800,2.80580,8.1786,1.51190,0.76748,-0.306180,-0.20447,3.19520,0.050764,-3.83280,-4.562900,1.405100,1.20990,-0.46244,0.49520,-2.24960,-2.21270,-2.04830,-0.29449,-2.389000,-0.83991,5.98280,5.04540,0.19258,-0.093833,-5.75530,-3.19570,0.081784,-1.24730,0.89743,-0.77232,-3.86890,2.352800,-1.65810,3.58610,1.35350,-4.08790,-3.17090,-1.04000,-2.95890,4.319500,3.9136,0.66900,0.80280,-2.5065,3.333600,1.61520,-0.39701,2.602600,-0.31418,4.65460,-6.09340,1.37670,3.186300,-4.23590,-4.46750,5.67720,4.627900,0.75198,-2.02250,4.80130,7.6043,-1.32370,4.34110,-0.29942,-1.99690,1.59420,2.89660,-5.32890,-1.05400,1.28050,3.00150,-1.81870,-0.86083,1.6326,1.3456,-1.65490,1.18600,-0.93019,3.51560,-0.65960,-1.04720,-0.15340,4.06750,1.14690,0.058387,-0.20873,-2.66590,-1.395600,2.50070,3.17950,-2.57530,3.2376,4.661600,0.62102,2.00200,1.12280,-0.578350,-3.03170,3.652800,4.18340,-1.7926,-0.54218,-3.34500,1.48080,2.330200,-5.26970,-0.67720,-1.1638,-2.93030,-0.90046,-3.00530,1.682800
responsible,-0.638270,-0.20965,2.43960,-2.95510,8.56760,4.540600,1.55490,0.021014,-4.06440,1.83910,8.6355,4.31000,-2.86120,3.884300,1.95520,0.636720,3.363700,0.241690,-2.137700,-1.2866,-0.29364,1.46590,-6.3379,0.84835,-1.86210,-0.88167,2.04370,-4.66290,-4.722900,0.69482,0.13129,1.476900,-1.69590,-0.66062,-0.18362,3.07470,-3.895000,1.507200,1.97030,2.84680,0.751650,-2.57500,-0.08866,4.10960,-0.57931,0.28668,5.55460,-0.38686,0.33243,-0.70699,-3.365500,5.43010,4.317500,-2.33060,-1.1958,0.72377,-2.97470,4.50990,0.39007,-2.297800,1.7473,0.38184,-1.7837,-1.38820,-1.88730,4.5330,-2.91680,-2.8414,0.28346,6.0842,-1.33780,4.12870,-2.12630,0.218620,-0.47545,-0.28176,0.64357,0.70815,-6.30050,6.88170,-0.81048,-4.49050,-0.05582,5.45360,2.049800,-1.334300,0.39381,1.2033,4.47960,-2.556800,-0.30427,-0.43990,3.43110,-1.51180,-3.44850,-0.14396,0.211030,0.73644,-3.574100,1.00070,0.34926,-0.038084,-0.12147,-1.3263,3.09370,2.05860,-1.07850,-1.44510,1.01360,-2.00260,3.88520,-1.28300,-0.95133,-1.47540,-4.08750,-0.95433,-3.46470,-0.22175,-1.657000,-1.10830,-3.33330,-4.48360,1.83610,3.18210,-4.0156,2.08560,-1.28940,-4.70140,2.06260,0.45006,-2.8907,1.13180,-0.93994,0.73698,1.48790,-2.62120,0.481220,-2.485900,3.8359,-0.87219,-4.23500,0.91940,3.65640,0.15135,-3.91090,-2.474400,1.42920,3.874400,-0.98322,-0.29984,1.44990,2.01320,-0.509520,-3.43370,-0.70837,1.536600,2.8661,4.895800,1.86250,-0.24276,-4.91790,-1.77090,-2.59000,1.83910,-5.19630,-1.25460,-0.20290,4.42930,2.22940,-3.6492,-1.22840,1.29550,-6.56860,4.045000,3.89240,1.56860,1.96810,1.18830,-2.31960,-2.17700,0.13151,1.17640,2.504200,-2.667700,0.87327,-2.128100,-0.63756,-2.291100,-0.18833,2.2716,0.84158,-5.43240,-1.053100,-4.32280,4.74300,-0.157190,-4.26570,-0.702300,3.844900,1.31280,-2.24870,-5.79350,-2.89630,-0.61781,4.56840,-0.97401,-1.033500,0.92388,3.28770,-2.13090,-4.37290,0.651320,-0.22954,6.09210,3.593800,3.10510,2.59140,-5.04660,1.25310,0.035322,-2.16530,3.26240,-1.23440,-0.94035,-0.30661,-3.95170,3.09280,5.530200,2.3109,-1.47430,-1.96780,-3.5667,-3.994300,3.50250,1.05360,0.966360,2.24550,5.06660,-0.11901,4.78090,-3.363300,-3.40330,-2.98510,-0.45114,-0.232280,3.25600,-2.77960,-2.44590,-1.3946,-0.48330,1.62750,-5.84550,-5.06910,-1.66860,0.95694,-0.27934,-1.47030,0.55012,2.37760,0.87968,1.16200,6.4236,-2.1488,0.36583,2.09410,-1.70460,-0.90162,0.94648,-1.56380,-4.08680,0.49151,-1.37700,-1.525300,-1.87430,1.52430,-0.014179,-2.90690,3.91830,-1.21080,4.1508,4.915100,0.21788,-0.49317,-0.51851,3.933100,1.44520,3.884100,4.73100,-3.1003,1.15710,-1.75900,0.41543,0.014483,1.33840,0.57833,2.7432,-0.39032,-2.44960,1.18580,4.603100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
prawn,-0.555210,-1.20940,-1.59380,0.85357,0.11995,0.819640,1.38210,2.358900,-0.45013,0.79720,4.4177,1.49190,-1.38680,1.735400,1.59320,-0.007249,1.506900,-0.086719,-1.357800,-1.3076,1.96040,2.54860,-1.3290,-2.80820,0.19496,-3.13150,-2.24660,-2.50650,-2.861500,0.44785,0.80668,-1.417200,-0.85002,1.04690,-1.22520,-0.21808,-1.013900,0.041965,2.60150,1.55600,-0.070996,-0.57363,-0.28696,-0.12214,0.67195,0.60949,0.32463,-2.32220,-2.49200,1.98890,0.433630,1.35570,0.439420,-4.62640,-2.3616,-0.68795,0.96643,0.50996,1.49600,-0.439710,1.6498,1.11770,-1.8202,-0.37879,0.99955,-1.0961,-3.30240,-2.8575,0.28605,2.1055,0.57166,0.73573,-0.52267,1.183200,0.63967,0.15548,-3.99830,1.29710,-1.14900,-0.65285,-3.27830,0.43801,2.63580,0.41438,3.420500,-1.178200,-1.57040,-3.1601,2.18660,-0.830020,-0.25336,1.41320,2.89940,-2.84560,0.48514,-1.58330,2.266300,-2.54060,0.894110,-0.31395,1.43480,1.501900,1.38260,1.8610,-0.50059,4.21610,-0.22724,-0.32698,-0.61409,-1.12580,0.59901,-1.25530,1.20990,-0.30591,0.97642,-0.47297,-1.06940,-2.06600,1.399000,-3.58810,-1.68080,-1.63680,-2.66290,1.94300,-1.5802,-1.59290,1.22260,-2.00820,0.48073,-0.88187,-1.6929,0.62347,1.05970,-0.60525,0.36383,1.00030,-0.355750,-2.145400,3.5754,-2.42000,-1.10320,-0.56871,1.33020,0.38080,-0.77858,-2.356900,-1.01010,-1.456400,-1.41680,-1.41010,-0.91237,1.19580,-1.330800,1.71620,-2.02410,1.289600,4.7248,-0.483540,-2.54350,-0.54130,0.32934,-1.32690,-0.54886,0.42794,-2.72180,-1.29370,-1.82900,0.41575,1.42500,1.6748,2.44250,-1.09810,1.25560,0.503130,2.19230,-1.28090,-1.39880,2.02120,-1.88280,-0.39164,-1.58550,-0.36367,3.765500,0.293570,0.30051,0.080223,-0.98590,-1.920200,2.13860,2.6969,-1.33410,0.95185,-1.263900,0.21003,0.19744,-1.192300,-2.83750,0.101560,0.944520,2.57890,-1.19540,0.80411,-2.12390,0.44031,1.97040,1.08730,-0.117200,-0.14338,-1.22470,-1.87710,1.11770,-0.570940,0.56189,1.11130,-2.205400,-0.06675,1.80260,-1.01180,0.10635,0.579770,-2.48590,-0.53602,-1.30350,2.12540,0.23927,-1.13900,1.39190,1.890500,2.2045,2.44450,-1.82030,-2.8197,-0.630170,-0.11690,-0.36900,0.087937,-1.55890,2.96980,1.29060,0.56415,-3.074000,0.87650,2.27160,0.78658,-1.452400,-1.18910,-0.14552,1.11020,2.8045,2.52440,-0.41811,-0.26705,-5.74630,-0.43291,0.92065,-4.38000,0.34797,0.52697,-0.48560,0.70011,1.14140,3.1286,2.7785,3.20760,0.58293,-0.57667,1.24440,2.48680,-3.89850,1.04800,1.24410,-0.29225,-1.092200,1.07380,-0.55544,-1.909400,1.65140,-1.42600,-1.16200,2.1906,-0.144720,-0.73245,2.31740,-0.25365,5.161100,-1.22130,2.924400,0.74770,-2.9230,1.30990,1.62700,-1.79590,2.008100,-1.23140,-3.08860,-1.1762,3.87150,0.28911,-2.57810,3.099400
tweezers,-1.998400,1.01710,1.24380,-0.67552,5.93120,2.095500,9.03150,-3.656900,-1.58780,2.31650,-7.8383,-3.04760,0.53385,-4.658400,-5.50930,-0.144140,-2.957600,3.884200,-0.261430,7.2422,0.80466,6.26090,3.8815,-4.15570,8.75300,-0.36023,3.60130,-1.01360,3.390400,-3.53180,-7.93520,5.759200,-1.64000,6.84850,-1.06040,-0.34016,-1.574700,-1.868700,-2.42510,-5.18620,-1.089400,3.21340,3.45950,-3.49460,1.72360,-3.97270,0.99131,0.69573,-5.59780,3.36330,-5.347700,0.78380,2.169700,0.79255,-3.4373,-7.35500,3.03790,-1.63840,-5.24980,-2.428800,3.4233,5.22310,-4.1671,1.75210,3.43010,-1.3719,3.17720,3.3355,-1.02590,-1.0918,5.74780,-6.17680,-0.90611,-0.340530,-3.73790,2.86700,3.94590,1.07560,1.00450,-1.72910,3.21970,2.09330,0.44786,0.38864,0.092844,-7.526000,-5.14850,-2.6222,-0.66842,-0.401070,-0.63273,0.44273,-3.57870,1.88100,-3.06600,0.96110,-1.738500,2.21080,6.470400,4.43040,0.52488,0.868510,1.09790,3.8364,7.12610,-1.28140,3.61210,2.34360,4.08700,2.74710,-1.38700,3.22070,1.17870,-1.34500,-6.25450,-6.11970,1.24090,0.74903,-7.940300,1.32520,0.12838,-0.97745,5.31560,-2.22900,-1.8447,2.27250,-1.63490,1.89690,-0.38576,-2.04050,-1.6867,2.99530,2.30300,2.58140,-1.50950,-0.96319,2.648400,1.731300,2.2987,-2.23230,-0.28140,-2.49560,-2.53810,-3.74270,-1.87680,-7.723100,-0.64728,9.056800,0.53917,-5.36820,3.61550,-3.54470,-2.368200,3.67690,-0.21525,0.233910,-3.2361,-3.658500,2.16660,0.87207,-2.27130,-1.76050,-0.56142,-3.73920,-3.43280,-1.33920,3.33600,-2.57590,-4.89360,-4.2442,2.35060,-1.33260,-3.76300,-1.551900,2.77840,2.04460,1.96560,-5.09600,1.46480,1.77810,-3.98320,1.86650,-0.296200,-1.429700,4.70510,-3.904300,4.65730,0.013339,4.52800,6.1138,3.76860,0.74584,3.403800,2.45690,6.35240,3.060000,1.42010,0.398030,0.506680,2.47210,10.89100,4.09930,4.29780,-1.71060,-3.51410,-2.06170,2.905400,-3.08590,0.24649,-3.55320,-6.03000,-1.232300,1.48810,6.99350,-1.177600,0.24186,3.12120,3.61290,-2.12400,-4.258800,-4.43620,0.28104,0.56529,3.60660,7.74220,6.49820,2.31770,1.393400,-7.0152,-0.26532,2.96030,-3.0547,-1.392500,-6.36720,-0.10276,-4.742700,0.23440,3.07760,2.63210,-6.00090,-0.035202,0.12049,3.32280,-2.53680,4.885500,-1.19500,4.03170,2.91550,-2.4755,0.94430,-2.74750,0.10769,7.21170,4.77290,-1.17920,5.26350,1.46960,-3.13040,-4.69320,2.81080,-2.16570,-7.2465,-0.4450,-1.33080,0.74278,-0.82304,-2.24190,4.98540,9.51510,-0.96686,-3.95520,-3.31550,-0.251370,7.14600,6.76020,3.751200,3.24980,3.52460,-1.91650,-3.3861,-1.981000,-1.44270,-1.31170,0.33794,0.846070,-0.96828,-1.879400,3.66160,-7.6509,-0.47105,0.36527,0.97179,0.539580,0.42964,-1.24240,-4.2396,3.22420,0.67627,-0.17765,-2.991900
university,-1.068600,-10.11400,4.03640,-3.13620,6.66330,-1.798900,3.68110,7.327800,-0.94667,-2.11760,3.7071,2.96010,3.89420,-6.757900,-4.48670,2.691700,-4.460400,5.510500,-4.175700,4.4499,-1.04270,-7.46150,-6.4483,-3.93410,-10.33600,3.84610,2.51430,4.78470,-4.129000,6.99120,3.12810,0.614680,7.07690,-2.96670,-2.34340,-9.32910,-1.687600,-1.877200,0.84683,-5.67300,-12.981000,-0.92561,-5.91940,-3.14500,-2.23150,8.87240,6.78720,0.14341,2.92010,4.38660,-3.426200,-5.55780,4.288900,-0.24222,2.9603,-1.88000,-1.71170,-2.73970,2.40000,-4.745400,-7.3338,-1.04920,-7.6544,-11.14300,0.12249,2.8896,2.46180,5.1039,-6.56890,-4.7415,1.11470,-3.74690,2.34610,7.248700,3.33770,7.89670,-0.81971,-5.22830,-1.16860,-0.22470,3.76020,-1.65840,4.54910,1.30720,3.787100,1.252800,-4.13410,3.5882,-4.40810,10.561000,-9.90360,-0.17611,-6.95630,12.21300,-2.07930,10.17100,-0.209550,-7.49150,8.270100,3.48150,9.87260,-5.419800,5.96950,9.9306,10.21500,3.43180,-0.20011,3.76160,-1.01920,-5.30060,-5.32780,-0.96439,0.70093,-2.62770,-10.60600,12.25000,-4.56330,-3.30350,-0.074273,1.79680,6.02050,-5.56530,4.50280,-4.05520,-7.7767,-12.24100,-4.70380,-4.23770,-8.99950,4.39090,4.2466,-4.41830,9.48320,-0.55428,-14.00000,4.89870,0.292040,1.583300,-8.6796,1.77160,5.95060,5.11210,0.83254,3.17340,-0.81617,-7.215200,-4.75200,-2.376700,3.69630,-1.01110,-1.18700,0.39079,-11.364000,0.44649,9.56470,7.098000,1.5688,-2.253900,7.33040,-5.75790,2.47420,8.93480,3.93900,10.37700,-7.73290,7.63050,-2.52470,-2.68520,-3.93670,2.8864,-10.93700,-5.13140,6.06970,-0.217600,6.16230,12.69900,0.69175,6.98680,-6.88360,-2.50140,-4.01530,0.59352,-4.653300,-1.912300,6.85350,3.342700,-0.54042,-1.732900,-1.89180,-6.7632,3.49320,2.39900,-0.591660,-4.17540,-3.53590,5.579200,-3.76200,-7.006300,-8.743500,-0.56694,5.89390,-6.70050,3.39130,-2.17210,6.24640,-1.04130,-6.061900,3.77820,2.46180,0.69879,-3.42900,6.555000,1.16150,-3.84820,-8.639900,6.75660,3.84780,-10.10000,-5.54070,4.452200,-6.06380,-2.28500,0.86318,-0.99399,-3.92160,1.73300,-0.92894,-8.097500,4.3275,2.63300,-7.45580,-3.7734,0.470520,-1.30890,7.80510,3.152300,-3.88340,10.38400,-6.41090,10.06900,-7.518700,-0.93429,-7.32330,1.37650,12.077000,-2.60510,4.08290,7.64380,1.5573,-1.38580,3.00970,2.88590,-6.33230,1.85610,-4.39880,-4.46170,0.73151,-2.63660,9.65110,-1.93300,-5.81710,1.7027,-5.7822,-5.78010,0.99957,-0.93970,-6.58670,4.77580,1.03520,-9.00900,-0.57722,-1.68390,-1.886500,1.09890,-14.13500,-5.768600,2.61770,-9.45430,-0.27378,2.7515,5.076000,3.32590,6.23440,2.19080,-3.097600,-12.63200,8.246700,-0.63342,-14.1170,-9.10750,-1.00250,0.41538,0.093109,-1.35630,-5.99590,2.3063,-3.14430,6.11140,-5.91240,10.933000
wasteful,-5.898500,-2.82470,0.85307,-1.26600,0.80816,0.217950,5.35130,1.112400,-3.11060,-0.13387,-4.2348,-2.81970,1.05510,-4.293300,-3.84430,-1.019800,-3.657400,0.611180,-1.142200,3.7208,0.53791,-0.23637,5.7244,0.36354,2.95040,2.40210,-0.95108,1.97110,3.564500,-0.24429,-2.21570,3.356400,1.30990,6.35790,-5.97120,-5.17460,0.099442,-2.119100,0.38604,-1.42960,-0.711400,2.46850,5.97340,1.15390,1.39120,-1.00750,-4.77150,1.52450,0.35708,3.62370,-0.152060,-2.22140,0.550530,4.08640,1.8258,-5.79210,1.33070,-1.24280,-4.41500,-2.199900,2.9477,2.73850,3.7307,-0.32139,2.04040,1.9807,0.18090,1.1595,-2.38990,-3.2002,2.90930,-3.01970,-0.32339,0.006255,3.46460,0.45295,1.78530,-2.56340,-1.50400,-4.26990,-0.35531,-0.33360,-1.02870,4.97770,-0.981450,-1.983500,-1.99230,-1.2690,0.21810,-0.945760,-3.61430,-2.41690,-1.14280,0.90399,2.88950,3.15230,-2.135800,1.17070,5.211300,1.02070,2.31400,-2.728000,-1.57210,-1.4642,2.80760,-5.52380,3.75670,-1.01060,3.31980,-0.90565,-3.27050,1.77210,3.12100,0.79535,-2.54770,-5.78040,-0.66670,1.20070,-4.064200,2.23100,-4.14660,-1.45330,1.05910,0.18255,-1.5340,0.48083,0.93602,2.37270,3.23040,-1.36840,-0.6788,0.97002,-0.18370,0.68279,-0.13605,-1.88550,2.468200,-0.010213,2.5421,1.39040,-3.47800,-0.30130,-0.50026,-2.01260,2.67220,-2.829400,-0.76953,0.840990,-4.90710,-3.02280,4.37140,-1.09750,-7.085900,1.63890,-0.96322,-1.326800,-3.6022,0.608130,-0.22319,2.39830,-1.97400,-1.61550,-2.34880,-3.12220,-1.20300,1.61480,-1.86640,-1.29030,-4.41030,-5.3345,1.12620,-2.73770,-3.77910,1.977100,-0.23047,0.82517,0.10275,-0.58393,-0.30369,-0.39137,-1.68450,-2.28840,-2.109200,0.060564,3.07070,-0.128720,5.69100,0.749880,4.24340,1.4342,0.25702,4.37910,4.458300,4.80330,2.09530,-2.330100,4.88700,-0.024999,4.268500,0.65543,2.44770,1.08920,0.14664,-4.61360,0.56488,4.03910,1.034500,-3.25500,2.82740,-3.75810,-1.42250,0.791350,-1.07010,2.13090,-2.069600,-2.34830,0.33561,2.74800,3.54050,-0.421300,-4.19460,0.60109,0.90805,-0.58592,1.81030,3.47700,2.42890,-1.888000,-1.4580,-0.94369,-0.44531,-1.1296,-3.829700,-3.61310,1.15840,-4.380100,-3.03080,1.47220,5.73580,-1.94060,2.842700,-1.19690,4.29830,1.02980,4.770900,-1.95050,5.40530,-5.34150,-6.2761,3.03760,1.78590,2.04100,3.17640,2.22880,0.03576,4.11180,5.24720,-2.28430,1.41040,1.78310,0.30675,-4.1661,3.5212,-2.73180,-5.18410,1.46250,-0.94632,4.03570,0.63492,-0.59014,1.07350,-2.68880,-1.346300,2.78750,2.46090,0.250130,1.58420,-3.46610,1.61160,-3.5738,-2.515900,-2.46640,-3.03200,-1.81210,4.040300,-0.91036,-0.295610,0.46698,-5.7423,-3.85580,-1.89300,-0.19739,-2.204700,1.87600,-3.85170,-2.8258,-1.64500,-0.22011,3.19510,0.082503


In [102]:
Y_train

Unnamed: 0,F_Objectivity,F_Subjectivity
airplane,0.960395,0.463949
coarse,0.401438,0.265841
persecution,0.319377,0.678163
moment,0.139091,0.413798
responsible,0.289618,0.654414
...,...,...
prawn,0.952891,0.110865
tweezers,0.953419,0.193659
university,0.835052,0.695757
wasteful,0.200742,0.492842


#### Binarizing Y_train and Y_test
Once we run the first MLP model, the performance wasn't over 54%. 
In this Section, we will binarize the semantic factor values following the median values.

In [99]:
# Binarizing through list comprehension
Y_train['F_Objectivity'] = ['high' if f_objectivity >= 0.565 else 'low' for f_objectivity in Y_train['F_Objectivity']]
Y_train['F_Subjectivity'] = ['high' if f_subjectivity >= 0.392 else 'low' for f_subjectivity in Y_train['F_Subjectivity']]

Y_test['F_Objectivity'] = ['high' if f_objectivity >= 0.565 else 'low' for f_objectivity in Y_test['F_Objectivity']]
Y_test['F_Subjectivity'] = ['high' if f_subjectivity >= 0.392 else 'low' for f_subjectivity in Y_test['F_Subjectivity']]

In [103]:
# Binarizing through list comprehension
Y_train['F_Objectivity'] = [1 if f_objectivity >= 0.565 else 0 for f_objectivity in Y_train['F_Objectivity']]
Y_train['F_Subjectivity'] = [1 if f_subjectivity >= 0.392 else 0 for f_subjectivity in Y_train['F_Subjectivity']]

Y_test['F_Objectivity'] = [1 if f_objectivity >= 0.565 else 0 for f_objectivity in Y_test['F_Objectivity']]
Y_test['F_Subjectivity'] = [1 if f_subjectivity >= 0.392 else 0 for f_subjectivity in Y_test['F_Subjectivity']]

In [104]:
Y_train

Unnamed: 0,F_Objectivity,F_Subjectivity
airplane,1,1
coarse,0,0
persecution,0,1
moment,0,1
responsible,0,1
...,...,...
prawn,1,0
tweezers,1,0
university,1,1
wasteful,0,1


---
### Training a MLP Classifier for word semantic content

In [75]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MultiLabelBinarizer

In [113]:
# Define a new MLP architecture
model = Sequential([
    Dense(256, activation='relu', input_shape=(300,)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(2, activation='softmax')  # 2 neurons for binary classification with softmax activation
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',  # Use categorical_crossentropy for categorical data
              metrics=['accuracy'])

In [114]:
# Print model summary
model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_39 (Dense)            (None, 256)               77056     
                                                                 
 dense_40 (Dense)            (None, 128)               32896     
                                                                 
 dense_41 (Dense)            (None, 64)                8256      
                                                                 
 dense_42 (Dense)            (None, 32)                2080      
                                                                 
 dense_43 (Dense)            (None, 2)                 66        
                                                                 
Total params: 120354 (470.13 KB)
Trainable params: 120354 (470.13 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


> Como converti a saída em dados categóricos, é preciso antes utilizar OneHotEncoder:

In [124]:
## Como já foram transformados anteriormente em [0,1], preciso apenas torná-los como lista:
Y_train_array = Y_train.to_numpy()
Y_test_array = Y_test.to_numpy()

X_train_array = X_train.to_numpy()
X_test_array = X_test.to_numpy()

# Print the first few elements to verify
print(Y_train_array[:5])  # Print the first 5 elements


[[1 1]
 [0 0]
 [0 1]
 [0 1]
 [0 1]]


In [79]:
# Convert multilabel categorical labels to binary vectors
multi_label_binarizer = MultiLabelBinarizer()
Y_train_encoded = multi_label_binarizer.fit_transform(Y_train)
Y_test_encoded = multi_label_binarizer.transform(Y_test)

In [85]:
len(Y_train_encoded)

2

In [86]:
Y_train_encoded

array([[1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1],
       [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])

In [108]:
Y_train

Unnamed: 0,F_Objectivity,F_Subjectivity
airplane,1,1
coarse,0,0
persecution,0,1
moment,0,1
responsible,0,1
...,...,...
prawn,1,0
tweezers,1,0
university,1,1
wasteful,0,1


In [125]:
X_train_array

array([[ -3.9003  ,   3.9343  ,   0.65395 , ...,   0.11406 ,  -5.1079  ,
          1.122   ],
       [  2.0427  ,  -2.0691  ,  -1.591   , ...,  -3.8809  ,  -5.2335  ,
          3.8502  ],
       [  0.068316,   4.3433  ,  -5.5537  , ...,   2.2451  ,  -1.9138  ,
         -0.074928],
       ...,
       [ -1.0686  , -10.114   ,   4.0364  , ...,   6.1114  ,  -5.9124  ,
         10.933   ],
       [ -5.8985  ,  -2.8247  ,   0.85307 , ...,  -0.22011 ,   3.1951  ,
          0.082503],
       [  2.5277  ,   2.4585  ,   1.4097  , ...,  -3.3495  ,  -4.2642  ,
          0.89491 ]], dtype=float32)

In [128]:
# Train the model
history = model.fit(X_train_array, Y_train_array, epochs=50, batch_size=16, validation_split=0.2)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [129]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test_array, Y_test_array)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 45.04%


In [65]:
# Treina o modelo
history = model.fit(X_train, Y_train,
                    epochs=50,
                    batch_size=32,
                    validation_split=0.1)  # Usamos parte dos dados de treino como validação

# Avalia o modelo com os dados de teste
loss, accuracy = model.evaluate(X_test.values, Y_test.values)
print(f"Acurácia do modelo nos dados de teste: {accuracy * 100:.2f}%")

Epoch 1/50


UnimplementedError: Graph execution error:

Detected at node categorical_crossentropy/Cast defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\ipykernel_launcher.py", line 17, in <module>

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\tornado\platform\asyncio.py", line 205, in start

  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2032.0_x64__qbz5n2kfra8p0\Lib\asyncio\base_events.py", line 607, in run_forever

  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2032.0_x64__qbz5n2kfra8p0\Lib\asyncio\base_events.py", line 1922, in _run_once

  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2032.0_x64__qbz5n2kfra8p0\Lib\asyncio\events.py", line 80, in _run

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\ipykernel\kernelbase.py", line 542, in dispatch_queue

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\ipykernel\kernelbase.py", line 531, in process_one

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\ipykernel\ipkernel.py", line 359, in execute_request

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\ipykernel\kernelbase.py", line 775, in execute_request

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\ipykernel\ipkernel.py", line 446, in do_execute

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\IPython\core\interactiveshell.py", line 3051, in run_cell

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\IPython\core\interactiveshell.py", line 3106, in _run_cell

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\IPython\core\interactiveshell.py", line 3311, in run_cell_async

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\IPython\core\interactiveshell.py", line 3493, in run_ast_nodes

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\IPython\core\interactiveshell.py", line 3553, in run_code

  File "C:\Users\tiago\AppData\Local\Temp\ipykernel_30152\650847344.py", line 2, in <module>

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\keras\src\engine\training.py", line 1807, in fit

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\keras\src\engine\training.py", line 1401, in train_function

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\keras\src\engine\training.py", line 1384, in step_function

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\keras\src\engine\training.py", line 1373, in run_step

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\keras\src\engine\training.py", line 1151, in train_step

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\keras\src\engine\training.py", line 1209, in compute_loss

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\keras\src\engine\compile_utils.py", line 277, in __call__

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\keras\src\losses.py", line 143, in __call__

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\keras\src\losses.py", line 270, in call

  File "c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\keras\src\losses.py", line 2198, in categorical_crossentropy

Cast string to float is not supported
	 [[{{node categorical_crossentropy/Cast}}]] [Op:__inference_train_function_62846]

### Utilizando XGBoosting for multilabel:

### Utilizando PyCaret:

In [131]:
from pycaret.classification import *

ImportError: cannot import name '_PredictScorer' from 'sklearn.metrics._scorer' (c:\Users\tiago\OneDrive - UNIVALI\PhD\atividades de pesquisa\semantic_similarity\.venv\Lib\site-packages\sklearn\metrics\_scorer.py)