In [1]:
from typing import List

from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI
import torch

In [2]:
import langchain as lc

import dotenv
import os

dotenv.load_dotenv()

cohere_api_key = os.getenv("COHERE_API_KEY")

In [3]:
# Creating a list of all 50 sentences as specified
sentences = [
    "Am the life of the party.",
    "Feel little concern for others.",
    "Am always prepared.",
    "Get stressed out easily.",
    "Have a rich vocabulary.",
    "Don't talk a lot.",
    "Am interested in people.",
    "Leave my belongings around.",
    "Am relaxed most of the time.",
    "Have difficulty understanding abstract ideas.",
    "Feel comfortable around people.",
    "Insult people.",
    "Pay attention to details.",
    "Worry about things.",
    "Have a vivid imagination.",
    "Keep in the background.",
    "Sympathize with others' feelings.",
    "Make a mess of things.",
    "Seldom feel blue.",
    "Am not interested in abstract ideas.",
    "Start conversations.",
    "Am not interested in other people's problems.",
    "Get chores done right away.",
    "Am easily disturbed.",
    "Have excellent ideas.",
    "Have little to say.",
    "Have a soft heart.",
    "Often forget to put things back in their proper place.",
    "Get upset easily.",
    "Do not have a good imagination.",
    "Talk to a lot of different people at parties.",
    "Am not really interested in others.",
    "Like order.",
    "Change my mood a lot.",
    "Am quick to understand things.",
    "Don't like to draw attention to myself.",
    "Take time out for others.",
    "Shirk my duties.",
    "Have frequent mood swings.",
    "Use difficult words.",
    "Don't mind being the center of attention.",
    "Feel others' emotions.",
    "Follow a schedule.",
    "Get irritated easily.",
    "Spend time reflecting on things.",
    "Am quiet around strangers.",
    "Make people feel at ease.",
    "Am exacting in my work.",
    "Often feel blue.",
    "Am full of ideas."
]

len(sentences)


50

In [29]:
import torch

# Initialize the tensor with zeros
tensor = torch.zeros((5, 50))
labels=["Extraversion", "Agreeableness",  "Conscientiousness", "Emotional Stability" , "Intellect/Imagination"]

# Populate the tensor according to the instructions
# The mapping is as follows:
# 1+: Extraversion, 2-: Agreeableness (inversed), 3+: Conscientiousness, 4-: Emotional Stability (inversed), 5+: Intellect/Imagination
# We invert Agreeableness and Emotional Stability signs because a negative sign (-) in the input implies a positive trait for these dimensions

# Mapping input to the tensor
input_mapping = {
    1: [(1, +1), (6, -1), (11, +1), (16, -1), (21, +1), (26, -1), (31, +1), (36, -1), (41, +1), (46, -1)],
    2: [(2, -1), (7, +1), (12, -1), (17, +1), (22, -1), (27, +1), (32, -1), (37, +1), (42, +1), (47, +1)],
    3: [(3, +1), (8, -1), (13, +1), (18, -1), (23, +1), (28, -1), (33, +1), (38, -1), (43, +1), (48, +1)],
    4: [(4, -1), (9, +1), (14, -1), (19, +1), (24, -1), (29, -1), (34, -1), (39, -1), (44, -1), (49, -1)],
    5: [(5, +1), (10, -1), (15, +1), (20, -1), (25, +1), (30, -1), (35, +1), (40, +1), (45, +1), (50, +1)],
}

for label, updates in input_mapping.items():
    for index, value in updates:
        if value<0:
            tensor[label-1, index-1] = -value
        else:
            tensor[label-1, index-1] = value

tensor


tensor([[1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0.,
         0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1.,
         0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0.,
         0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1.,
         0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0.,
         0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0.,
       

In [5]:
for key,value in input_mapping.items():
    for x,y in value:
        sentences[x-1] = (sentences[x-1],y)

sentences


[('Am the life of the party.', 1),
 ('Feel little concern for others.', -1),
 ('Am always prepared.', 1),
 ('Get stressed out easily.', -1),
 ('Have a rich vocabulary.', 1),
 ("Don't talk a lot.", -1),
 ('Am interested in people.', 1),
 ('Leave my belongings around.', -1),
 ('Am relaxed most of the time.', 1),
 ('Have difficulty understanding abstract ideas.', -1),
 ('Feel comfortable around people.', 1),
 ('Insult people.', -1),
 ('Pay attention to details.', 1),
 ('Worry about things.', -1),
 ('Have a vivid imagination.', 1),
 ('Keep in the background.', -1),
 ("Sympathize with others' feelings.", 1),
 ('Make a mess of things.', -1),
 ('Seldom feel blue.', 1),
 ('Am not interested in abstract ideas.', -1),
 ('Start conversations.', 1),
 ("Am not interested in other people's problems.", -1),
 ('Get chores done right away.', 1),
 ('Am easily disturbed.', -1),
 ('Have excellent ideas.', 1),
 ('Have little to say.', -1),
 ('Have a soft heart.', 1),
 ('Often forget to put things back in t

In [32]:
vector=5*torch.ones(50)
print(tensor@(vector))

tensor([50., 50., 50., 50., 50.])


In [33]:
from langchain_core.pydantic_v1 import BaseModel, Field




class correlationScore(BaseModel):
    correlation_score: float = Field(description="a correlation score between sentence1 and sentence2 following the instructions in the prompt")
    sentence1: str = Field(description="sentence1")
    sentence2: str = Field(description="sentence2")

   

In [34]:
cc=correlationScore(correlation_score=1,sentence1='yes',sentence2='no')
cc

correlationScore(correlation_score=1.0, sentence1='yes', sentence2='no')

In [35]:
from langchain_community.chat_models import ChatCohere

model = ChatCohere(cohere_api_key=cohere_api_key,temperature=0)

In [36]:
# And a query intented to prompt a language model to populate the data structure.
#sentence1 = "I hate rainy days."
#sentence2 = "nothing makes me happier than rainy weather."
def get_correlation_score(sentence1: str, sentence2: str) -> correlationScore:
    # Set up a parser + inject instructions into the prompt template.
    parser = JsonOutputParser(pydantic_object=correlationScore)

    prompt = PromptTemplate(
        template="""
    Calculate the correlation score between two sentences, where -1 indicates maximum negative correlation (opposite meanings), and 1 indicates maximum positive correlation (similar or related meanings). Provide the correlation score based on the context and meaning of the sentences.
    "{format_instructions}"

    Sentence 1: "{sentence1}"
    Sentence 2: "{sentence2}"

    Provide the correlation score.
    """,
        input_variables=["sentence1", "sentence2"],
        partial_variables={"format_instructions": parser.get_format_instructions()},
    )

    chain = prompt | model | parser

    ch=chain.invoke({"sentence1": sentence1, "sentence2": sentence2})
    return ch['correlation_score']

In [37]:
def convert_score_based_on_key(old_score: float, key: int) -> int:
    """
    Converts a score from a -1 to 1 scale to either a 1 to 5 scale or an inverted 5 to 1 scale
    based on the specified key.

    Parameters:
    - old_score: A float representing the score to convert, expected to be between -1 and 1.
    - key: An integer, where 1 indicates a direct conversion to a 1 to 5 scale, and -1 indicates
           an inverted conversion to a 5 to 1 scale.

    Returns:
    - An integer score on the specified scale.
    """
    # Define the old range
    old_min, old_max = -1, 1
    
    if key == 1:
        # Direct conversion to a 1 to 5 scale
        new_min, new_max = 1, 5
    elif key == -1:
        # Inverted conversion to a 5 to 1 scale
        new_min, new_max = 5, 1
    else:
        raise ValueError("Key must be 1 or -1")

    # Apply the linear transformation formula
    new_score = (old_score - old_min) * (new_max - new_min) / (old_max - old_min) + new_min

    # Round the result to the nearest whole number
    # Ensure the result is within the bounds of the target scale
    new_score = round(new_score)
    new_score = max(min(new_score, max(new_min, new_max)), min(new_min, new_max))

    return new_score


In [38]:
print(convert_score_based_on_key(get_correlation_score('Am the life of the party.', 'Feel little concern for others.'),-1))

5


In [39]:
def apply_correlation(input:str,sentences:List[str]):
    correlation_scores = []
    for sentence in sentences:
        correlation_score = convert_score_based_on_key(get_correlation_score(input, sentence[0]),sentence[1])
        correlation_scores.append(correlation_score)
    return torch.tensor(correlation_scores)

In [40]:
import time
import torch
from typing import List, Tuple
from tqdm import tqdm 

def apply_correlation(input: str, sentences: List[Tuple[str, int]]):
    correlation_scores = []
    call_interval = 60.0 / 19  # Calculate the interval in seconds to stay within the limit

    for sentence in tqdm(sentences, desc="Calculating correlation scores"):  # Add tqdm loading bar
        start_time = time.time()  # Record the start time of the API call

        correlation_score = convert_score_based_on_key(get_correlation_score(input, sentence[0]),sentence[1])
        correlation_scores.append(correlation_score)

        end_time = time.time()  # Record the end time of the API call
        elapsed_time = end_time - start_time  # Calculate how long the API call took

        if elapsed_time < call_interval:
            time.sleep(call_interval - elapsed_time)  # Sleep to maintain the API call rate limit

    return torch.tensor(correlation_scores)


In [None]:
sentence_scores= apply_correlation("I hate rainy days.",sentences)

In [41]:

personality_scores = tensor @ sentence_scores

personality_scores

Calculating correlation scores: 100%|██████████| 50/50 [02:47<00:00,  3.35s/it]


RuntimeError: expected scalar type Float but found Long

In [42]:
tensor=tensor.type(torch.LongTensor)

In [43]:
personality_scores = tensor @ sentence_scores

personality_scores

tensor([26, 22, 24, 35, 18])

In [44]:
def get_personnality_scores(chunks:List[str])->List[torch.Tensor]:
    personality_scores = []
    for chunk in chunks:
        sentence_scores= apply_correlation(chunk,sentences)
        personality_scores.append(tensor @ sentence_scores)
    return personality_scores


In [45]:
labels

['Extraversion',
 'Agreeableness',
 'Conscientiousness',
 'Emotional Stability',
 'Intellect/Imagination']

In [47]:
traits_scores=list(zip (labels, personality_scores.tolist()))

In [68]:
class Percentage:
    def __init__(self, value: float):
        if not (0 <= value <= 100):
            raise ValueError("Percentage value must be between 0 and 100")
        self.value = value

    def __str__(self):
        return f"{self.value}%"
    def __repr__(self):
        return f"{self.value}%"
    def __add__(self, other):
        if isinstance(other, Percentage):
            return Percentage(self.value + other.value)
        elif isinstance(other, (int, float)):
            return Percentage(self.value + other)
        else:
            raise TypeError("Unsupported operand type(s) for +: 'Percentage' and '{}'".format(type(other).__name__))
    
    def __radd__(self, other):
        return self.__add__(other)
    # lets code the mean function
    def mean(self, percentages: List["Percentage"]):
        if isinstance(percentages, list):
            values = [p.value for p in percentages]
            values.append(self.value)
            x = sum(values) / len(values)
            return Percentage(x)
        else:
            raise TypeError("Unsupported operand type(s) for +: 'Percentage' and '{}'".format(type(values).__name__))
# Example usage
p1 = Percentage(25)
p2 = Percentage(50)
p3 = Percentage(75)
p4 = Percentage(100)
p5 = Percentage(0)
p6 = Percentage(10)

p1.mean([p2, p3, p4, p5, p6])

43.333333333333336%

In [61]:
traits_percentages = [(trait, Percentage(score / 50 * 100)) for trait, score in traits_scores]
traits_percentages

[('Extraversion', 52.0%),
 ('Agreeableness', 44.0%),
 ('Conscientiousness', 48.0%),
 ('Emotional Stability', 70.0%),