# Let's try approach semantic analysis with fuzzy logic + ml

In [1]:
# imports section
import numpy as np
import math
from numpy import dot
from numpy.linalg import norm
from gensim import corpora, models, similarities
import string

import gensim.downloader as api
from gensim.models import Word2Vec

In [None]:
expected = {}
expected['lonely'] = [0.1, 0.5, 1, 0.6, 0.1, 0.0]

In [None]:
# Define the lexicon with mood vectors
moods = {0: "Angry", 1: "Worried", 2: "Sad", 3: "Calm", 4: "Happy", 5: "Excited"}
moods_indeces = {y.lower(): x for x, y in moods.items()}

lexicon = {
    # "frustrated": [0.8, 0.4, 0.3, 0, 0, 0], 
    "anxious": [0.3, 0.9, 0.4, 0, 0, 0.1],
    # "disappointed": [0.5, 0.3, 0.8, 0.1, 0, 0], 
    "furious": [0.9, 0.3, 0.2, 0, 0, 0],
    "peaceful": [0, 0, 0, 1, 0.4, 0.2], 
    "hate": [0.9, 0.3, 0.3, 0, 0, 0],
    "joyful": [0, 0, 0, 0.2, 0.9, 0.7], 
    "unacceptable": [0.9, 0.6, 0.6, 0, 0, 0],
    "thrilled": [0, 0, 0, 0, 0.6, 1], 
    "infuriating": [0.9, 0.8, 0.6, 0, 0, 0],
    "irate": [0.9, 0.2, 0.1, 0, 0, 0], 
    "terrible": [0.9, 0.7, 0.7, 0, 0, 0],
    "nervous": [0.2, 0.8, 0.3, 0.1, 0, 0.2],
    "melancholy": [0.1, 0.4, 0.9, 0.2, 0, 0], 
    "depressed": [0.1, 0.1, 0.9, 0.2, 0, 0], 
    "gloomy": [0.1, 0.1, 0.8, 0.3, 0, 0], 
    "serene": [0, 0, 0, 0.9, 0.8, 0.9], 
    "elated": [0, 0, 0, 0.1, 0.8, 0.9],
    "ecstatic": [0, 0, 0, 0, 0.7, 1],
    "overjoyed": [0.1, 0.1, 0.1, 0.1, 1, 0.9],
    "gleeful": [0.1, 0.1, 0.1, 0.1, 0.9, 0.8],
    "cheerful": [0.1, 0.1, 0.1, 0.1, 0.8, 0.5],
    "optimistic": [0.1, 0.2, 0.1, 0.7, 0.8, 0.6],
    "buoyant": [0.1, 0.2, 0.1, 0.8, 0.7, 0.7],
    "enthusiastic": [0.2, 0.3, 0.1, 0.6, 0.8, 0.9],
    "upbeat": [0.1, 0.2, 0.1, 0.7, 0.8, 0.7],
    "festive": [0.1, 0.1, 0.1, 0.7, 0.8, 0.7],
    "playful": [0.1, 0.2, 0.1, 0.6, 0.7, 0.8],
    "vivacious": [0.1, 0.2, 0.1, 0.6, 0.8, 0.7],
    "jovial": [0.1, 0.2, 0.1, 0.7, 0.8, 0.7],
    "amused": [0.1, 0.2, 0.1, 0.6, 0.7, 0.7],
    "blissful": [0.1, 0.1, 0.1, 0.8, 0.9, 0.7],
    "grateful": [0.1, 0.1, 0.1, 0.8, 0.9, 0.7],
    "tranquil": [0.1, 0.1, 0.1, 0.9, 0.8, 0.6],
    "relaxed": [0.1, 0.1, 0.1, 0.9, 0.8, 0.6],
    "comfortable": [0.1, 0.1, 0.1, 0.8, 0.8, 0.6],
    "cozy": [0.1, 0.1, 0.1, 0.8, 0.8, 0.6],
    "warm": [0.1, 0.1, 0.1, 0.8, 0.8, 0.6],
    "inviting": [0.1, 0.1, 0.1, 0.8, 0.8, 0.6], 
    "satisfied": [0.1, 0.1, 0.2, 0.6, 0.5, 0.3],
    "pleasant": [0.1, 0.1, 0.1, 0.8, 0.8, 0.6], 
    "pleased" : [0.1, 0.1, 0.1, 0.9, 0.8, 0.3], 
    "mellow": [0.1, 0.1, 0.1, 0.8, 0.8, 0.6] , 
    "happy": [0.1, 0.1, 0.1, 0.7, 1, 0.6]
    }

In [None]:
corpus = api.load('glove-wiki-gigaword-100')

vector = corpus['computer']
print(vector)

In [24]:
data = ''

with open('../sets/adjectives.txt', 'r') as file:
    data = file.read().replace('\n', '')

adjectives = data.split(',')
print(adjectives[:20])

['adorable', 'adventurous', 'aggressive', 'agreeable', 'alert', 'alive', 'amused', 'angry', 'annoyed', 'annoying', 'anxious', 'arrogant', 'ashamed', 'attractive', 'average', 'awful', 'bad', 'beautiful', 'better', 'bewildered']


In [25]:
adjectives_vectors = {}

for adjective in adjectives:
    adjectives_vectors[adjective] = corpus[adjective]

print(adjectives_vectors)

{'adorable': array([ 0.21424 ,  0.28564 ,  0.78691 , -0.64855 , -0.68554 ,  0.61508 ,
       -0.013512,  0.40364 ,  0.44196 , -0.01193 ,  0.3294  ,  0.049576,
       -0.05206 , -0.10255 ,  0.85953 ,  0.51917 ,  0.10029 , -0.090505,
        0.40973 ,  0.79259 ,  0.078494,  0.452   ,  0.3083  , -0.26834 ,
        0.77316 ,  0.24646 , -0.67276 , -0.097082, -0.75478 ,  0.07057 ,
        0.11026 ,  0.054871,  0.51715 , -0.35623 ,  0.2862  , -0.14182 ,
       -0.063742,  0.13531 ,  0.27839 , -0.42505 ,  0.61555 ,  0.60602 ,
       -0.34267 , -0.33532 , -0.54029 ,  0.3547  , -0.01229 ,  0.60753 ,
       -0.16026 ,  0.22141 , -0.29073 , -0.58638 ,  0.64802 ,  0.12343 ,
       -0.37332 , -0.077694,  0.39012 ,  0.1339  , -0.85815 , -0.64989 ,
        0.27449 ,  0.96495 , -0.19607 ,  0.21205 , -0.025666, -0.30038 ,
        0.27718 ,  0.05365 , -0.099969, -0.15381 ,  0.58549 , -0.27354 ,
       -0.20762 , -0.16941 ,  0.42055 ,  0.76788 , -0.63877 , -0.13858 ,
        0.14152 , -0.68321 , -0.19758 

Using ML we can extend defined vectors to the whole space of words of interest.
ML - classification and regression;
Here we are going to apply some regression:
- Polynomial regression
- Support vector regression 
- Decision tree regression
- Random forest regression 
- 

Also we ca apply:
- Dimensionality reduction 
- Kohonen map
- 


In [8]:
print(lexicon["frustrated"])

[0.8, 0.4, 0.3, 0, 0, 0]


In [None]:
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split


# Define the degree of the polynomial features
degree = 3  # You can experiment with different degrees

# Create a pipeline that includes PolynomialFeatures and LinearRegression
model = Pipeline([
    ('poly', PolynomialFeatures(degree=degree)),
    ('linear', LinearRegression())
])

x_train, y_train = [], []
for key in lexicon.keys():
    x_train.append(adjectives_vectors[key])
    y_train.append(lexicon[key])

x_train_np = np.array(x_train)
y_train_np = np.array(y_train)

# Train the model
model.fit(x_train, y_train)

lexicon_trained = {}
for key in adjectives_vectors:
    result = model.predict([adjectives_vectors[key]])
    lexicon_trained[key] = result

print(lexicon_trained)

In [27]:
# moods = {0: "Angry", 1: "Worried", 2: "Sad", 3: "Calm", 4: "Happy", 5: "Excited"}


print(lexicon_trained['obnoxious'])
print(lexicon_trained['lonely'])
print(lexicon_trained['hilarious'])

[[0.39223996 0.28470221 0.211897   0.36321238 0.48638391 0.4653131 ]]
[[0.2673967  0.26368639 0.37658692 0.36251179 0.46619624 0.32825188]]
[[0.3443198  0.35014295 0.33013242 0.35069219 0.45397739 0.47249751]]


In [29]:
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Create a pipeline with StandardScaler and SVR, wrapped in MultiOutputRegressor
model = MultiOutputRegressor(make_pipeline(StandardScaler(), SVR(kernel='rbf', C=1.0)))

x_train, y_train = [], []
for key in lexicon.keys():
    x_train.append(adjectives_vectors[key])
    y_train.append(lexicon[key])

x_train_np = np.array(x_train)
y_train_np = np.array(y_train)

# Train the model
model.fit(x_train, y_train)

lexicon_trained = {}
for key in adjectives_vectors:
    result = model.predict([adjectives_vectors[key]])
    lexicon_trained[key] = result

print(lexicon_trained)

{'adorable': array([[0.28049398, 0.26668014, 0.27283056, 0.41543006, 0.58737366,
        0.48421475]]), 'adventurous': array([[0.28793654, 0.28654378, 0.25081813, 0.48273499, 0.57142011,
        0.52699778]]), 'aggressive': array([[0.41644054, 0.3300874 , 0.30368115, 0.47109372, 0.4097769 ,
        0.34862298]]), 'agreeable': array([[0.2672951 , 0.24882561, 0.21853712, 0.5875867 , 0.63913959,
        0.51363493]]), 'alert': array([[0.39721765, 0.31329024, 0.32772245, 0.47397221, 0.38963861,
        0.3178555 ]]), 'alive': array([[0.32547728, 0.2792234 , 0.31470446, 0.49983784, 0.51750436,
        0.38615087]]), 'amused': array([[0.19980667, 0.2741304 , 0.20036462, 0.50010927, 0.59973031,
        0.59996328]]), 'angry': array([[0.62976698, 0.33673664, 0.29142939, 0.2236466 , 0.18171673,
        0.17217712]]), 'annoyed': array([[0.50176156, 0.3642995 , 0.26407179, 0.26120554, 0.31403227,
        0.32672351]]), 'annoying': array([[0.49923268, 0.39671415, 0.33202583, 0.30579427, 0.36066129

In [None]:
# moods = {0: "Angry", 1: "Worried", 2: "Sad", 3: "Calm", 4: "Happy", 5: "Excited"}

print(lexicon_trained['obnoxious'])
print(lexicon_trained['lonely'])
print(lexicon_trained['hilarious'])

[[0.438714   0.31102847 0.29717447 0.35512593 0.43116512 0.38796361]]
[[0.32015741 0.28867479 0.40039786 0.41139449 0.43092495 0.30939985]]
[[0.38989201 0.32554885 0.35671486 0.41503415 0.45020433 0.41709219]]


In [33]:
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Create a pipeline with StandardScaler and SVR, wrapped in MultiOutputRegressor
model = MultiOutputRegressor(make_pipeline(StandardScaler(), SVR(kernel='rbf', C=1.0)))

x_train, y_train = [], []
for key in lexicon.keys():
    x_train.append(adjectives_vectors[key])
    y_train.append([lexicon[key][0], 0])

x_train_np = np.array(x_train)
y_train_np = np.array(y_train)

# Train the model
model.fit(x_train, y_train)

lexicon_trained = {}
for key in adjectives_vectors:
    result = model.predict([adjectives_vectors[key]])
    lexicon_trained[key] = result[0][0]

print(lexicon_trained)

{'adorable': 0.2804939785809792, 'adventurous': 0.28793654132037694, 'aggressive': 0.41644053732776987, 'agreeable': 0.26729510054987327, 'alert': 0.3972176481465778, 'alive': 0.3254772795442996, 'amused': 0.19980666636031158, 'angry': 0.6297669799732233, 'annoyed': 0.5017615622276038, 'annoying': 0.4992326775953021, 'anxious': 0.31907248501166396, 'arrogant': 0.4272231849682559, 'ashamed': 0.3612848477074781, 'attractive': 0.2916291191303876, 'average': 0.3632305542523673, 'awful': 0.5451233173967135, 'bad': 0.5160426592353777, 'beautiful': 0.26111623287549807, 'better': 0.3248738246059163, 'bewildered': 0.3394165733018932, 'black': 0.37617888701389585, 'bloody': 0.44734537918176126, 'blue': 0.34645348270685, 'blue-eyed': 0.33976745920043827, 'blushing': 0.3433574537390627, 'bored': 0.3241696119807106, 'brainy': 0.33939405991087845, 'brave': 0.3577619213628473, 'breakable': 0.34553176841887306, 'bright': 0.24043904947973926, 'busy': 0.35303516209001423, 'calm': 0.27684044858336687, 'c

In [34]:
# moods = {0: "Angry", 1: "Worried", 2: "Sad", 3: "Calm", 4: "Happy", 5: "Excited"}

print(lexicon_trained['obnoxious'])
print(lexicon_trained['lonely'])
print(lexicon_trained['hilarious'])

0.43871399620322826
0.32015740730301884
0.3898920053982028


In [38]:
lexicon_trained = {}
for key in adjectives_vectors:
    lexicon_trained[key] = [0, 0, 0, 0, 0, 0]

from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

for i in range(6):
    # Create a pipeline with StandardScaler and SVR, wrapped in MultiOutputRegressor
    model = MultiOutputRegressor(make_pipeline(StandardScaler(), SVR(kernel='rbf', C=1.0)))

    x_train, y_train = [], []
    for key in lexicon.keys():
        x_train.append(adjectives_vectors[key])
        y_train.append([lexicon[key][i], 0])

    x_train_np = np.array(x_train)
    y_train_np = np.array(y_train)

    # Train the model
    model.fit(x_train, y_train)

    for key in adjectives_vectors:
        result = model.predict([adjectives_vectors[key]])
        lexicon_trained[key][i] = result[0][0]

print(lexicon_trained)


{'adorable': [0.2804939785809792, 0.26668014253660194, 0.2728305555258477, 0.4154300568807142, 0.587373663684237, 0.4842147506961325], 'adventurous': [0.28793654132037694, 0.2865437779274545, 0.25081812588681457, 0.4827349869612562, 0.5714201072555158, 0.526997777242216], 'aggressive': [0.41644053732776987, 0.3300874015856271, 0.30368115205714125, 0.4710937178020681, 0.40977689954749535, 0.348622980928065], 'agreeable': [0.26729510054987327, 0.2488256073541267, 0.21853712372034123, 0.5875866978143545, 0.6391395852792562, 0.5136349256669569], 'alert': [0.3972176481465778, 0.3132902362349471, 0.3277224468756906, 0.4739722092647388, 0.3896386128625068, 0.3178555025677162], 'alive': [0.3254772795442996, 0.2792233977159211, 0.31470445740861314, 0.49983783629420353, 0.5175043583662877, 0.38615087392276115], 'amused': [0.19980666636031158, 0.2741304002070189, 0.20036461983147436, 0.5001092740911381, 0.5997303061285256, 0.5999632765343432], 'angry': [0.6297669799732233, 0.33673664207185217, 0.

In [39]:
# moods = {0: "Angry", 1: "Worried", 2: "Sad", 3: "Calm", 4: "Happy", 5: "Excited"}

print(lexicon_trained['obnoxious'])
print(lexicon_trained['lonely'])
print(lexicon_trained['hilarious'])

[0.43871399620322826, 0.3110284720210269, 0.2971744716197592, 0.3551259287830437, 0.4311651196038902, 0.38796361043208955]
[0.32015740730301884, 0.2886747887332214, 0.4003978575048952, 0.4113944941311295, 0.43092495284093657, 0.3093998501118935]
[0.3898920053982028, 0.32554884539953455, 0.35671485930020275, 0.41503415189479337, 0.4502043289405145, 0.4170921875029631]


TODO:
