In [53]:
import uuid
from math import isnan
import re
import numpy as np

In [65]:


class Graph:
    def __init__(self, sentence: str, debug: bool = False):
        self.words = sentence.split(" ")
        self.count = len([word for word in self.words if len(word) > 0])
        self.gLen = self.count
        self.nodes = [None] * (self.count * 2)  # Armazenamento para conceitos
        self.edges = [None] * (self.count * 2)  # Armazenamento para arestas
        self.conceptCount = 0  # Contador para conceitos
        self.edgeCount = 0  # Contador para arestas
        self.repeated = 0
        self.autoInc = 0  # Variável para auto-incremento
        self.debug = debug  # Variável para debug

        if debug:
            print(sentence) 
             
    def getCode(self):
        # Implementação do método getCode
        code = f"ma{self.autoInc}"
        self.autoInc += 1
        return code

    def addConcept(self, name: str, code: str, pos: str):
        for i in range(self.conceptCount):
            if self.nodes[i]['code'] == code:
                nCode = self.getCode()
                self.addConcept(name, nCode, pos)
                rName = self.getCode()
                self.addRelationIn("equality", rName, code)
                self.addRelationEx("equality", rName, nCode)
                return
        self.nodes[self.conceptCount] = {'lemma': name, 'code': code, 'pos': pos}
        self.conceptCount += 1

    def addRelationIn(self, name: str, code: str, in_relation: str):
        found = False
        for i in range(self.edgeCount):
            if self.edges[i]['code'] == code:
                found = True
                break
        if found:
            self.edges[i]['in'] = in_relation
        else:
            self.edges[self.edgeCount] = {'code': code, 'name': name, 'in': in_relation}
            self.edgeCount += 1

    def addRelationEx(self, name: str, code: str, ex: str):
        found = False
        for i in range(self.edgeCount):
            if self.edges[i]['code'] == code:
                found = True
                break
        if found:
            self.edges[i]['ex'] = ex
        else:
            self.edges[self.edgeCount] = {'code': code, 'name': name, 'ex': ex}
            self.edgeCount += 1

    def addIsRelation(self, name: str, code: str):
        found = False
        for i in range(self.edgeCount):
            if self.edges[i]['code'] == code:
                self.edges[i]['name'] = name
                return
        self.edges[self.edgeCount] = {'code': code, 'name': name}
        self.edgeCount += 1
    
    def getWeight(self, relation: str) -> int:
        if relation == "equality":
            return 1
        elif relation == "is":
            return 4
        elif relation == "rep":
            return 1
        elif relation == "for":
            return 1
        elif relation == "on":
            return 1
        elif relation == "of":
            return 1
        elif relation == "in":
            return 3
        elif relation == "experiencer":
            return 6
        elif relation == "agent":
            return 8
        elif relation == "theme":
            return 8
        else:
            return 1

    def printNodes(self):
        for i in range(self.conceptCount):
            node = self.nodes[i]
            print(f"{node['code']}   {node['lemma']}   {node['pos']}")

    def printEdges(self):
        for i in range(self.edgeCount):
            edge = self.edges[i]
            print(f"{edge['name']}\t{edge.get('in', '')}\t{edge.get('ex', '')}")

    def getNode(self, code: str):
        for i in range(self.conceptCount):
            if self.nodes[i]['code'] == code:
                return self.nodes[i]
        for i in range(self.edgeCount):
            if self.edges[i]['name'] == "equality" and self.edges[i]['ex'] == code:
                return self.getNode(self.edges[i]['in'])
        return {'lemma': code}

    def finalize(self):
        for i in range(self.edgeCount):
            if self.edges[i]['name'] == "is":
                for j in range(self.edgeCount):
                    if self.edges[i]['code'] == self.edges[j]['code']:
                        self.edges[i]['ex'] = self.edges[j]['ex']
                        self.edges[i]['in'] = self.edges[j]['in']
                        print("found")
                        break

    def checkGraph(self) -> bool:
            if self.debug:
                for i in range(self.conceptCount):
                    node = self.nodes[i]
                    if node:
                        print(f"{node['code']}   {node['lemma']}")

            for i in range(self.edgeCount):
                edge = self.edges[i]
                if edge:
                    in_node = self.getNode(edge['in'])['lemma'] if edge.get('in') else None
                    ex_node = self.getNode(edge['ex'])['lemma'] if edge.get('ex') else None

                    print(f"{in_node}  {edge['name']}   {ex_node}")

                    if edge['name'] == "equality" and in_node == ex_node:
                        self.repeated += 1

                    for j in range(self.edgeCount):
                        other_edge = self.edges[j]
                        if other_edge and edge['name'] == other_edge['name']:
                            in_node_other = self.getNode(other_edge['in'])['lemma'] if other_edge.get('in') else None
                            ex_node_other = self.getNode(other_edge['ex'])['lemma'] if other_edge.get('ex') else None

                            if in_node == in_node_other and ex_node == ex_node_other:
                                other_edge['name'] = "rep"
                                self.repeated += 1

            return True if self.repeated > 0 else False
    
    # Atualizando a função matchRelationNameSTS para usar o método getWeight da classe Graph
    def matchRelationNameSTS(r1: str, r2: str) -> float:
        if r1 == "is" and r2 == "is":
            return 0.7
        if r1 == "equality" and r2 == "equality":
            return 0.7
        if r1 == "rep":
            return 0.0
        if r1 == r2:
            return 1.0
        else:
            return 0.79 - 0.01 * (Graph.getWeight(r1) + Graph.getWeight(r2))

    # Convertendo a função matchRelationName para Python
    def matchRelationName(r1: str, r2: str) -> float:
        if r1 == "is" and r2 == "is":
            return 0.7
        if r1 == "equality" and r2 == "equality":
            return 0.7
        if r1 == "rep":
            return 0.0
        if r1 == r2:
            return 1.0
        else:
            return 0.73
        
    # Corrigindo a função stringSim uma última vez
    def stringSim(s1: str, s2: str) -> float:
        m, n = len(s1), len(s2)

        if m == 0 and n == 0: return 1.0
        if m == 0 or n == 0: return 0.0

        costs = [0] * (n + 1)

        # Initialize the array
        for k in range(n + 1):
            costs[k] = k

        for i, c1 in enumerate(s1):
            costs[0] = i + 1
            corner = i

            for j, c2 in enumerate(s2):
                upper = costs[j + 1]
                if c1 == c2:
                    costs[j + 1] = corner
                else:
                    t = min(upper, corner)
                    costs[j + 1] = min(costs[j], t) + 1
                corner = upper

        result = costs[n]
        return 1 - float(result) / max(m, n)

    def placeholder_compute_similarity(word1: str, word2: str) -> float:
        # Placeholder for the real similarity function
        return 0.5

    def is_alpha(s: str) -> bool:
        return bool(re.match('^[a-zA-Z]+$', s))

def printWordMatrix(g1, g2):
    for node in g2.nodes:
        print(f"{node['lemma']}   &   ", end='')
    print()
    
    for node1 in g1.nodes:
        print(f"{node1['lemma']}   &   ", end='')
        for node2 in g2.nodes:
            similarity = matchConcept(node1['lemma'], node2['lemma'])  # Assuming matchConcept is defined
            print(f"{node1['lemma']}  {node2['lemma']}  {similarity}  &  ", end='')
        print()


In [55]:
class GraphSTS(Graph):
    def __init__(self, sentence: str, debug: bool = False):
        super().__init__(sentence, debug)

    def matchGraph(self, g) -> float:
        matrix = np.zeros((self.edgeCount, g.edgeCount))
        
        if self.debug:
            print("     ", end="")
            for j in range(g.edgeCount):
                print(f"{g.edges[j]['name']:>12}", end="     ")
            print()

        for i in range(self.edgeCount):
            if self.debug:
                print(f"{self.edges[i]['name']:>12}", end="     ")
            
            for j in range(g.edgeCount):
                matrix[i, j] = self.matchRelation(self, self.edges[i], g, g.edges[j])
                if self.debug:
                    print(f"{matrix[i, j]:>12}", end="     ")
            if self.debug:
                print()

        sum_val = 0
        max_val = 0
        nan_count = 0
        rel = ""
        weight_sum = 0
        w = 0

        for i in range(self.edgeCount):
            max_val = 0
            for j in range(g.edgeCount):
                if matrix[i, j] > max_val:
                    max_val = matrix[i, j]
                    rel = g.edges[j]['name']
            
            if max_val == 0:
                nan_count += 1

            w = self.getWeight(rel)
            sum_val += max_val * w
            weight_sum += w

        if g.edgeCount - nan_count == 0:
            d1 = 0
        else:
            d1 = sum_val / weight_sum

        sum_val = 0
        weight_sum = 0
        nan_count = 0

        for j in range(g.edgeCount):
            max_val = 0
            for i in range(self.edgeCount):
                if matrix[i, j] > max_val:
                    max_val = matrix[i, j]
                    rel = self.edges[i]['name']
            
            if max_val == 0:
                nan_count += 1

            w = self.getWeight(rel)
            sum_val += max_val * w
            weight_sum += w

        if g.edgeCount - nan_count == 0:
            return 0

        d2 = sum_val / weight_sum

        if self.debug:
            print(f"\ndddd {d1}   {d2}   {(d1 + d2) / 2}")

        return (d1 + d2) / 2

# Testing the function with placeholder Graph objects
g1 = GraphSTS("This is a test sentence")
g2 = GraphSTS("This is other test sentence")

print(g1.matchGraph(g2))  # Should return a value (placeholder functions used)

0


In [68]:
import math

class Util:
    def __init__(self):
        self.l = ''

    def get_line(self, f):
        # Replace this with your implementation
        pass

    def get_string_line(self, f):
        # Replace this with your implementation
        pass

    def split(self, str_, del_, max_):
        # Replace this with your implementation
        pass

    def write_line(self, f, line):
        # Replace this with your implementation
        pass

    def write_to_file(self, f, line):
        # Replace this with your implementation
        pass

    @staticmethod
    def calc_pearson(x, y, size):
        sumXY, sumX2, sumY2, sumX, sumY = 0, 0, 0, 0, 0
        for i in range(size):
            sumXY += x[i] * y[i]
            sumX2 += x[i] * x[i]
            sumY2 += y[i] * y[i]
            sumX += x[i]
            sumY += y[i]
        
        p = (size * sumXY - sumX * sumY) / (math.sqrt(size * sumX2 - sumX ** 2) * math.sqrt(size * sumY2 - sumY ** 2))
        return p

    def string_sim(self, s1, s2):
        # Replace this with your implementation
        pass


In [77]:
import re

def graph_STS():
    tt = 0
    print("sts Graph")

    with open("/home/ritaalamino/workspace/Testes/Mestrado/test.txt", "r") as fres, \
            open("/home/ritaalamino/workspace/Testes/Mestrado/output.txt", "w") as fresult, \
            open("/home/ritaalamino/workspace/Testes/Mestrado/test2.txt", "r") as fout:
        
        resLine = ""
        # Regular expressions
        conceptReg = re.compile(r"c\d+:(.+?):.+instance (.+?) ")
        argReg = re.compile(r"c\d+:(.+?):.+arg (.+?) ")
        inRelReg = re.compile(r"(.*?):(.*?):.* int (.*?) ")
        exRelReg = re.compile(r"(.*?):(.*?):.* ext (.*?) ")
        inEqRelReg = re.compile(r"(.*?):equality int (.*?) ")
        exEqRelReg = re.compile(r"(.*?):equality ext (.*?) ")
        isReg = re.compile(r".* (.*?):equality \d+ . is ")
        refReg = re.compile(r".+referent (.+?) .* (.+?) ")

        # Placeholder arrays for prop and origin
        prop = [0.0] * 2000
        origin = [0.0] * 2000

        odds = 0  # Initialize odds
        
        for i in range(1500 - odds):
            line = fout.readline().strip()
            print(line)
            
            # Initialize your GraphSTS object here (Placeholder)
            g1 = GraphSTS(line)
            
            while line and line[0] != '!':
                line = fout.readline().strip()
                print(line)
                
                if line and line[0] == '%':
                    continue
                else:
                    match = conceptReg.search(line)
                    if match:
                        g1.addConcept(match.group(1), match.group(2), "")
                        pass
                    elif (match := argReg.search(line)):
                        g1.addConcept(match.group(1), match.group(2), "jj")
                        pass
                    elif (match := inRelReg.search(line)):
                        g1.addRelationIn(match.group(2), match.group(1), match.group(3))
                        pass
                    elif (match := exRelReg.search(line)):
                        g1.addRelationEx(match.group(2), match.group(1), match.group(3))
                        pass
                    elif (match := inEqRelReg.search(line)):
                        g1.addRelationIn("equality", match.group(1), match.group(2))
                        pass
                    elif (match := exEqRelReg.search(line)):
                        g1.addRelationEx("equality", match.group(1), match.group(2))
                        pass
                    elif (match := isReg.search(line)):
                        g1.addIsRelation("is", match.group(1))
                        pass
                    elif (match := refReg.search(line)):
                        temp = match.group(2)
                        if len(temp) > 3:
                            g1.addConcept(match.group(2), match.group(1), "ref")
                            pass
            # Assuming the regular expressions and file reading setup are the same as in the previous snippet

            # Placeholder for checkGraph, replace with your actual checkGraph method
            g1.checkGraph()

            # Reading next line from the file (assuming 'fout' is the file object)
            line = fout.readline().strip()
            print(line)

            # Initialize your second GraphSTS object here (placeholder)
            g2 = GraphSTS(line)

            # Continue to read lines and process them
            while line and line[0] != '!':
                line = fout.readline().strip()
                print(line)

                if line and line[0] == '%':
                    continue
                else:
                    match = conceptReg.search(line)
                    if match:
                        g2.addConcept(match.group(1), match.group(2), "")
                        pass
                    elif (match := argReg.search(line)):
                        g2.addConcept(match.group(1), match.group(2), "jj")
                        pass
                    elif (match := inRelReg.search(line)):
                        g2.addRelationIn(match.group(2), match.group(1), match.group(3))
                        pass
                    elif (match := exRelReg.search(line)):
                        g2.addRelationEx(match.group(2), match.group(1), match.group(3))
                        pass
                    elif (match := inEqRelReg.search(line)):
                        g2.addRelationIn("equality", match.group(1), match.group(2))
                        pass
                    elif (match := exEqRelReg.search(line)):
                        g2.addRelationEx("equality", match.group(1), match.group(2))
                        pass
                    elif (match := refReg.search(line)):
                        temp = match.group(2)
                        if len(temp) > 3:
                            g2.addConcept(match.group(2), match.group(1), "ref")
                            pass
                    elif (match := isReg.search(line)):
                        g2.addIsRelation("is", match.group(1))
                        pass
            g2.checkGraph()

        # Assuming you have methods matchGraph and calcPearson implemented, 
        # and file objects fres and fresult for reading and writing respectively
        for i in range(1500):
            print("**************************************")
            
            # Placeholder for matchGraph, replace it with your actual method
            # prop[i] = g1.matchGraph(g2) * 5
            prop[i] = 0.0  # Placeholder
            
            print(f"+++++  {prop[i]}")
            
            te = f"{prop[i]:.6f}"
            fresult.write(te + "\n")  # Assuming fresult is a file object open for writing
            
            # Read next line from the file (assuming fres is the file object for reading)
            line = fres.readline().strip()
            origin[i] = float(line)
            
            print(origin[i])
            
            # Placeholder for conceptCount, replace them with your actual attributes or methods
            if g1.conceptCount <= 1 or g2.conceptCount <= 1:
                odds += 1
                i -= 1  # This doesn't work the same way in Python for loops as it does in C++
            
            print(f"odds  : {odds}   i {i}")

        # End of the loop
        print(f"GO  {odds}")

        # Placeholder for calcPearson, replace it with your actual method
        # result = utility.calcPearson(prop, origin, 1430)
        result = 0.0  # Placeholder

        print(f"result is  {result}")
        
    # # Don't forget to close the files when you're done
    # fres.close()
    # fresult.close()
    # # fOldres.close()
    # fout.close()


In [78]:
graph_STS()

sts Graph
Climate change is a pressing global issue, with consequences ranging from more frequent natural disasters to shifting ecosystems. Immediate action is needed to curb greenhouse gas emissions and reduce humanity's impact on the environment.
Artificial intelligence has revolutionized various industries, from healthcare to finance. As AI technology continues to advance, it has the potential to significantly impact our everyday lives, making tasks more efficient and accurate.
The pandemic has brought about significant changes to the way we work, with remote work becoming a new norm. As a result, companies have had to adapt and invest in digital tools and infrastructure to support this new way of working.
Quantum computing has the potential to revolutionize the world of technology, allowing for more complex calculations and faster processing times. This could lead to major breakthroughs in fields such as cryptography, drug discovery, and materials science.
The rise in popularity of

ValueError: could not convert string to float: "Climate change is a pressing global issue, with consequences ranging from more frequent natural disasters to shifting ecosystems. Immediate action is needed to curb greenhouse gas emissions and reduce humanity's impact on the environment."

In [58]:

# Criando um objeto da classe Graph e realizando testes
g = Graph("This is a complex sentence", debug=True)

# Adicionando conceitos e relações
g.addConcept("dog", "001", "NN")
g.addConcept("cat", "002", "NN")
g.addConcept("animal", "003", "NN")
g.addRelationIn("is-a", "R01", "001")
g.addRelationEx("has-a", "R02", "002")
g.addIsRelation("is-a", "R01")
g.addRelationIn("is-a", "R03", "001")  # Relação repetida para teste

# Testando o método checkGraph
print("\nChecking the graph:")
result = g.checkGraph()
print(f"\nGraph has repeated relations: {result}")

This is a complex sentence

Checking the graph:
001   dog
002   cat
003   animal
dog  is-a   None
None  has-a   cat
dog  is-a   None

Graph has repeated relations: True


In [66]:
# Criando um objeto da classe Graph para testar os novos métodos
g = Graph("This is a complex sentence", debug=True)

# Adicionando conceitos e relações
g.addConcept("dog", "001", "NN")
g.addConcept("cat", "002", "NN")
g.addRelationIn("is-a", "R01", "001")
g.addRelationEx("has-a", "R02", "002")
g.addIsRelation("is-a", "R01")

# Testando os métodos
print("Nodes:")
g.printNodes()
print("Edges:")
g.printEdges()

# Testando getNode
node = g.getNode("001")
print("Node with code 001:", node)

# Testando finalize
g.finalize()


This is a complex sentence
Nodes:
001   dog   NN
002   cat   NN
Edges:
is-a	001	
has-a		002
Node with code 001: {'lemma': 'dog', 'code': '001', 'pos': 'NN'}
dog   &   cat   &   

TypeError: 'NoneType' object is not subscriptable

In [7]:

# Testing the Python conversion with debug mode on
graph_debug = Graph("This is a test sentence.", debug=True)

# Output the current state of the graph object
graph_debug.__dict__

This is a test sentence.


{'words': ['This', 'is', 'a', 'test', 'sentence.'],
 'count': 5,
 'gLen': 5,
 'nodes': [None, None, None, None, None, None, None, None, None, None],
 'edges': [None, None, None, None, None, None, None, None, None, None],
 'conceptCount': 0,
 'edgeCount': 0,
 'autoInc': 0}

In [8]:
g = Graph("This is another test sentence")
codes = [g.getCode() for _ in range(5)]  # Gera 5 códigos
codes

['ma0', 'ma1', 'ma2', 'ma3', 'ma4']

In [12]:
# Criando um objeto da classe Graph e realizando testes
g = Graph("This is a test sentence", debug=True)

# Adicionando conceitos
g.addConcept("dog", "001", "NN")
g.addConcept("cat", "002", "NN")
g.addConcept("animal", "003", "NN")

# Adicionando relações
g.addRelationIn("is-a", "R01", "001")
g.addRelationEx("has-a", "R02", "002")

# Adicionando uma relação "is"
g.addIsRelation("is-a", "R01")

# Exibindo os nós e as arestas
g.nodes[:g.conceptCount], g.edges[:g.edgeCount]

# Testando diferentes tipos de relações
test_relations = ["equality", "is", "rep", "for", "on", "of", "in", "experiencer", "agent", "theme", "unknown"]
weights = {relation: g.getWeight(relation) for relation in test_relations}
weights

This is a test sentence


{'equality': 1,
 'is': 4,
 'rep': 1,
 'for': 1,
 'on': 1,
 'of': 1,
 'in': 3,
 'experiencer': 6,
 'agent': 8,
 'theme': 8,
 'unknown': 1}