# Let's try approach semantic analysis with fuzzy logic + ml

In [1]:
# imports section
import numpy as np
import math
from numpy import dot
from numpy.linalg import norm
from gensim import corpora, models, similarities
import string

import gensim.downloader as api
from gensim.models import Word2Vec

In [2]:
expected = {}
expected['lonely'] = [0.1, 0.5, 1, 0.6, 0.1, 0.0]

In [8]:
# Define the lexicon with mood vectors
moods = {0: "Angry", 1: "Worried", 2: "Sad", 3: "Calm", 4: "Happy", 5: "Excited"}
moods_indeces = {y.lower(): x for x, y in moods.items()}

lexicon = {
    # "frustrated": [0.8, 0.4, 0.3, 0, 0, 0], 
    "anxious": [0.3, 0.9, 0.4, 0, 0, 0.1],
    # "disappointed": [0.5, 0.3, 0.8, 0.1, 0, 0], 
    "furious": [0.9, 0.3, 0.2, 0, 0, 0],
    "peaceful": [0, 0, 0, 1, 0.4, 0.2], 
    "hate": [0.9, 0.3, 0.3, 0, 0, 0],
    "joyful": [0, 0, 0, 0.2, 0.9, 0.7], 
    "unacceptable": [0.9, 0.6, 0.6, 0, 0, 0],
    "thrilled": [0, 0, 0, 0, 0.6, 1], 
    "infuriating": [0.9, 0.8, 0.6, 0, 0, 0],
    "irate": [0.9, 0.2, 0.1, 0, 0, 0], 
    "terrible": [0.9, 0.7, 0.7, 0, 0, 0],
    "nervous": [0.2, 0.8, 0.3, 0.1, 0, 0.2],
    "melancholy": [0.1, 0.4, 0.9, 0.2, 0, 0], 
    "depressed": [0.1, 0.1, 0.9, 0.2, 0, 0], 
    "gloomy": [0.1, 0.1, 0.8, 0.3, 0, 0], 
    "serene": [0, 0, 0, 0.9, 0.8, 0.9], 
    "elated": [0, 0, 0, 0.1, 0.8, 0.9],
    "ecstatic": [0, 0, 0, 0, 0.7, 1],
    "overjoyed": [0.1, 0.1, 0.1, 0.1, 1, 0.9],
    "gleeful": [0.1, 0.1, 0.1, 0.1, 0.9, 0.8],
    "cheerful": [0.1, 0.1, 0.1, 0.1, 0.8, 0.5],
    "optimistic": [0.1, 0.2, 0.1, 0.7, 0.8, 0.6],
    "buoyant": [0.1, 0.2, 0.1, 0.8, 0.7, 0.7],
    "enthusiastic": [0.2, 0.3, 0.1, 0.6, 0.8, 0.9],
    "upbeat": [0.1, 0.2, 0.1, 0.7, 0.8, 0.7],
    "festive": [0.1, 0.1, 0.1, 0.7, 0.8, 0.7],
    "playful": [0.1, 0.2, 0.1, 0.6, 0.7, 0.8],
    "vivacious": [0.1, 0.2, 0.1, 0.6, 0.8, 0.7],
    "jovial": [0.1, 0.2, 0.1, 0.7, 0.8, 0.7],
    "amused": [0.1, 0.2, 0.1, 0.6, 0.7, 0.7],
    "blissful": [0.1, 0.1, 0.1, 0.8, 0.9, 0.7],
    "grateful": [0.1, 0.1, 0.1, 0.8, 0.9, 0.7],
    "tranquil": [0.1, 0.1, 0.1, 0.9, 0.8, 0.6],
    "relaxed": [0.1, 0.1, 0.1, 0.9, 0.8, 0.6],
    "comfortable": [0.1, 0.1, 0.1, 0.8, 0.8, 0.6],
    "cozy": [0.1, 0.1, 0.1, 0.8, 0.8, 0.6],
    "warm": [0.1, 0.1, 0.1, 0.8, 0.8, 0.6],
    "inviting": [0.1, 0.1, 0.1, 0.8, 0.8, 0.6], 
    "satisfied": [0.1, 0.1, 0.2, 0.6, 0.5, 0.3],
    "pleasant": [0.1, 0.1, 0.1, 0.8, 0.8, 0.6], 
    "pleased" : [0.1, 0.1, 0.1, 0.9, 0.8, 0.3], 
    "mellow": [0.1, 0.1, 0.1, 0.8, 0.8, 0.6] , 
    "happy": [0.1, 0.1, 0.1, 0.7, 1, 0.6]
    }

In [4]:
corpus = api.load('glove-wiki-gigaword-100')

vector = corpus['computer']
print(vector)

[-1.6298e-01  3.0141e-01  5.7978e-01  6.6548e-02  4.5835e-01 -1.5329e-01
  4.3258e-01 -8.9215e-01  5.7747e-01  3.6375e-01  5.6524e-01 -5.6281e-01
  3.5659e-01 -3.6096e-01 -9.9662e-02  5.2753e-01  3.8839e-01  9.6185e-01
  1.8841e-01  3.0741e-01 -8.7842e-01 -3.2442e-01  1.1202e+00  7.5126e-02
  4.2661e-01 -6.0651e-01 -1.3893e-01  4.7862e-02 -4.5158e-01  9.3723e-02
  1.7463e-01  1.0962e+00 -1.0044e+00  6.3889e-02  3.8002e-01  2.1109e-01
 -6.6247e-01 -4.0736e-01  8.9442e-01 -6.0974e-01 -1.8577e-01 -1.9913e-01
 -6.9226e-01 -3.1806e-01 -7.8565e-01  2.3831e-01  1.2992e-01  8.7721e-02
  4.3205e-01 -2.2662e-01  3.1549e-01 -3.1748e-01 -2.4632e-03  1.6615e-01
  4.2358e-01 -1.8087e+00 -3.6699e-01  2.3949e-01  2.5458e+00  3.6111e-01
  3.9486e-02  4.8607e-01 -3.6974e-01  5.7282e-02 -4.9317e-01  2.2765e-01
  7.9966e-01  2.1428e-01  6.9811e-01  1.1262e+00 -1.3526e-01  7.1972e-01
 -9.9605e-04 -2.6842e-01 -8.3038e-01  2.1780e-01  3.4355e-01  3.7731e-01
 -4.0251e-01  3.3124e-01  1.2576e+00 -2.7196e-01 -8

In [None]:
data = ''

with open('../sets/adjectives.txt', 'r') as file:
    data = file.read().replace('\n', '')

adjectives = data.split(',')
print(adjectives[:20])

['adorable', 'adventurous', 'aggressive', 'agreeable', 'alert', 'alive', 'amused', 'angry', 'annoyed', 'annoying', 'anxious', 'arrogant', 'ashamed', 'attractive', 'average', 'awful', 'bad', 'beautiful', 'better', 'bewildered']


In [None]:
adjectives_vectors = {}

for adjective in adjectives:
    adjectives_vectors[adjective] = corpus[adjective]

print(adjectives_vectors)

Using ML we can extend defined vectors to the whole space of words of interest.
ML - classification and regression;
Here we are going to apply some regression:
- Polynomial regression
- Support vector regression 
- Decision tree regression
- Random forest regression 
- 

Also we ca apply:
- Dimensionality reduction 
- Kohonen map
- 


In [9]:
print(lexicon["frustrated"])

KeyError: 'frustrated'

Let's try applying polynomial regression to mapping adjactives to our vector space

In [10]:
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split


# Define the degree of the polynomial features
degree = 3  # You can experiment with different degrees

# Create a pipeline that includes PolynomialFeatures and LinearRegression
model = Pipeline([
    ('poly', PolynomialFeatures(degree=degree)),
    ('linear', LinearRegression())
])

x_train, y_train = [], []
for key in lexicon.keys():
    x_train.append(adjectives_vectors[key])
    y_train.append(lexicon[key])

x_train_np = np.array(x_train)
y_train_np = np.array(y_train)

# Train the model
model.fit(x_train, y_train)

lexicon_trained = {}
for key in adjectives_vectors:
    result = model.predict([adjectives_vectors[key]])
    lexicon_trained[key] = result

print(lexicon_trained)

{'adorable': array([[0.22084917, 0.24326469, 0.18947029, 0.44065253, 0.66891085,
        0.58757899]]), 'adventurous': array([[0.22041089, 0.25171863, 0.17750381, 0.51526528, 0.62645102,
        0.62195696]]), 'aggressive': array([[0.43600485, 0.33425093, 0.20485618, 0.52502456, 0.50719599,
        0.38384225]]), 'agreeable': array([[0.23894795, 0.21754434, 0.16499963, 0.54328598, 0.66442768,
        0.56916681]]), 'alert': array([[0.39413882, 0.35440117, 0.26230091, 0.37730707, 0.40750383,
        0.31588436]]), 'alive': array([[0.30048715, 0.26523571, 0.24083722, 0.49406776, 0.61244389,
        0.47378415]]), 'amused': array([[0.1, 0.2, 0.1, 0.6, 0.7, 0.7]]), 'angry': array([[ 1.2236592 ,  0.40960584,  0.2197396 , -0.28953317, -0.4646127 ,
        -0.39062102]]), 'annoyed': array([[0.50506968, 0.34268976, 0.2272871 , 0.26303219, 0.27871341,
        0.30082348]]), 'annoying': array([[0.53265417, 0.43728858, 0.3213496 , 0.23742341, 0.29901906,
        0.28366854]]), 'anxious': array([[

In [11]:
# moods = {0: "Angry", 1: "Worried", 2: "Sad", 3: "Calm", 4: "Happy", 5: "Excited"}


print(lexicon_trained['obnoxious'])
print(lexicon_trained['lonely'])
print(lexicon_trained['hilarious'])

[[0.39223996 0.28470221 0.211897   0.36321238 0.48638391 0.4653131 ]]
[[0.2673967  0.26368639 0.37658692 0.36251179 0.46619624 0.32825188]]
[[0.3443198  0.35014295 0.33013242 0.35069219 0.45397739 0.47249751]]


Support vector regression:

In [None]:
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Create a pipeline with StandardScaler and SVR, wrapped in MultiOutputRegressor
model = MultiOutputRegressor(make_pipeline(StandardScaler(), SVR(kernel='rbf', C=1.0)))

x_train, y_train = [], []
for key in lexicon.keys():
    x_train.append(adjectives_vectors[key])
    y_train.append(lexicon[key])

x_train_np = np.array(x_train)
y_train_np = np.array(y_train)

# Train the model
model.fit(x_train, y_train)

lexicon_trained = {}
for key in adjectives_vectors:
    result = model.predict([adjectives_vectors[key]])
    lexicon_trained[key] = result

print(lexicon_trained)

In [None]:
# moods = {0: "Angry", 1: "Worried", 2: "Sad", 3: "Calm", 4: "Happy", 5: "Excited"}

print(lexicon_trained['obnoxious'])
print(lexicon_trained['lonely'])
print(lexicon_trained['hilarious'])

SVR for one parameter:

In [None]:
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Create a pipeline with StandardScaler and SVR, wrapped in MultiOutputRegressor
model = MultiOutputRegressor(make_pipeline(StandardScaler(), SVR(kernel='rbf', C=1.0)))

x_train, y_train = [], []
for key in lexicon.keys():
    x_train.append(adjectives_vectors[key])
    y_train.append([lexicon[key][0], 0])

x_train_np = np.array(x_train)
y_train_np = np.array(y_train)

# Train the model
model.fit(x_train, y_train)

lexicon_trained = {}
for key in adjectives_vectors:
    result = model.predict([adjectives_vectors[key]])
    lexicon_trained[key] = result[0][0]

print(lexicon_trained)

In [None]:
# moods = {0: "Angry", 1: "Worried", 2: "Sad", 3: "Calm", 4: "Happy", 5: "Excited"}

print(lexicon_trained['obnoxious'])
print(lexicon_trained['lonely'])
print(lexicon_trained['hilarious'])

SVR for each parameter separately:

In [12]:
lexicon_trained = {}
for key in adjectives_vectors:
    lexicon_trained[key] = [0, 0, 0, 0, 0, 0]

from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

for i in range(6):
    # Create a pipeline with StandardScaler and SVR, wrapped in MultiOutputRegressor
    model = MultiOutputRegressor(make_pipeline(StandardScaler(), SVR(kernel='rbf', C=1.0)))

    x_train, y_train = [], []
    for key in lexicon.keys():
        x_train.append(adjectives_vectors[key])
        y_train.append([lexicon[key][i], 0])

    x_train_np = np.array(x_train)
    y_train_np = np.array(y_train)

    # Train the model
    model.fit(x_train, y_train)

    for key in adjectives_vectors:
        result = model.predict([adjectives_vectors[key]])
        lexicon_trained[key][i] = result[0][0]

print(lexicon_trained)


{'adorable': [0.2804939785809792, 0.26668014253660194, 0.2728305555258477, 0.4154300568807142, 0.587373663684237, 0.4842147506961325], 'adventurous': [0.28793654132037694, 0.2865437779274545, 0.25081812588681457, 0.4827349869612562, 0.5714201072555158, 0.526997777242216], 'aggressive': [0.41644053732776987, 0.3300874015856271, 0.30368115205714125, 0.4710937178020681, 0.40977689954749535, 0.348622980928065], 'agreeable': [0.26729510054987327, 0.2488256073541267, 0.21853712372034123, 0.5875866978143545, 0.6391395852792562, 0.5136349256669569], 'alert': [0.3972176481465778, 0.3132902362349471, 0.3277224468756906, 0.4739722092647388, 0.3896386128625068, 0.3178555025677162], 'alive': [0.3254772795442996, 0.2792233977159211, 0.31470445740861314, 0.49983783629420353, 0.5175043583662877, 0.38615087392276115], 'amused': [0.19980666636031158, 0.2741304002070189, 0.20036461983147436, 0.5001092740911381, 0.5997303061285256, 0.5999632765343432], 'angry': [0.6297669799732233, 0.33673664207185217, 0.

In [13]:
# moods = {0: "Angry", 1: "Worried", 2: "Sad", 3: "Calm", 4: "Happy", 5: "Excited"}

print(lexicon_trained['obnoxious'])
print(lexicon_trained['lonely'])
print(lexicon_trained['hilarious'])

[0.43871399620322826, 0.3110284720210269, 0.2971744716197592, 0.3551259287830437, 0.4311651196038902, 0.38796361043208955]
[0.32015740730301884, 0.2886747887332214, 0.4003978575048952, 0.4113944941311295, 0.43092495284093657, 0.3093998501118935]
[0.3898920053982028, 0.32554884539953455, 0.35671485930020275, 0.41503415189479337, 0.4502043289405145, 0.4170921875029631]


TODO:
- try dimensionality reduction / kohonen maps
- fuzzy logic automated rule generation
- fl application
- validation
- text processing


In [None]:
validation_data = {'lonely' : [0, 0, 0.8, 0.5, 0, 0]}