In [97]:
%run init_notebook.py
from settings import DATA_DIR

In [98]:
import numpy as np
import spacy
import os

In [99]:
from src.transitivity import *

In [100]:
article = open(os.path.join(DATA_DIR, '20230825_economist_china2.txt'), 'r+')

In [101]:
article = """
Only eight months ago China’s economy was expected to roar back to life. Zero-covid had been abandoned; the country’s shoppers and tourists allowed to roam free. Yet the rebound has fizzled out, with weak growth and deflation the result. This will not only affect its people. What happens in the world’s second-largest economy matters beyond its borders, too.

Because China is so big, its changing economic fortunes can drive overall global growth figures. But a slowing China also directly affects other countries’ prospects. Its households and companies will buy fewer goods and services than they would have otherwise, with consequences for both the producers of these goods and other consumers of them. In some places, China’s difficulties will be a source of pain. In others, though, they will bring relief.

Commodity exporters are especially exposed to China’s slowdown. The country guzzles almost a fifth of the world’s oil, half of its refined copper, nickel and zinc, and more than three-fifths of its iron ore. China’s property woes will mean it requires less of such supplies. That will be a knock for countries such as Zambia, where exports of copper and other metals to China amount to 20% of gdp, and Australia, a big supplier of coal and iron. On August 22nd the boss of bhp, the world’s biggest miner, reported the lowest annual profit for the Australian firm in three years, and warned that China’s stimulus efforts were not producing changes on the ground.

Weak spots in the West include Germany. Faltering demand from China is one reason why the country’s economy has either contracted or stagnated over the past three quarters. And some Western companies are exposed through their reliance on China for revenues. In 2021 the 200 biggest multinationals in America, Europe and Japan made 13% of their sales in the country, earning $700bn. Tesla is more exposed still, making around a fifth of its sales in China; Qualcomm, a chipmaker, makes a staggering two-thirds.

Provided the slowdown does not escalate into a full-blown crisis, the pain will remain relatively concentrated. Sales to China account for only 4-8% of business for all listed companies in America, Europe and Japan. Exports from America, Britain, France and Spain come to 1-2% of their respective outputs. Even in Germany, with an export share of nearly 4%, China would have to collapse in order to generate a sizeable hit to its economy.

Moreover, China’s struggles come at a time when the rest of the world is doing better than expected. In July the imf revised up its forecast for global growth, compared with projections in April. Most notable has been the rude health of the world’s biggest importer and China’s geopolitical rival, America, which some surveys suggest is growing at the red-hot pace of nearly 6%

When set against this backdrop, China’s slowing growth should even provide a measure of relief for the world’s consumers, since it will mean less demand for commodities, bringing down prices and import costs. That in turn will ease the task faced by the Federal Reserve and other central banks. Many have already raised rates to their highest level in decades, and would not relish having to go further still.

But what if things go badly wrong in China? Under a worst-case scenario, a property meltdown could reverberate through the world’s financial markets. A study published by the Bank of England in 2018 found that a “hard landing” in China, where economic growth fell from 7% to -1%, would cause global asset prices to fall and rich-world currencies to rise as investors rushed in the direction of safer assets. Overall, British gdp would drop by 1.2%. Although most Western financial institutions have relatively little exposure to China, there are exceptions, such as hsbc and Standard Chartered, two British banks.

A longer slowdown could lead China to turn inward, reducing its overseas investments and loans. Having become the world’s biggest bilateral creditor in 2017, it has already cut back as projects turn sour. Officials may become fussier still if they are firefighting at home. Observers will watch forthcoming celebrations of a decade of the “Belt and Road Initiative”, the label under which China splurged on bridges in Mozambique and ports in Pakistan, for signals of official intent.

Real difficulties at home would also change how the world perceives China. Rapid growth, along with generous overseas lending, boosted the country’s reputation. According to a recent survey of two dozen countries by Pew, a pollster, people in rich locations had a generally unfavourable view of China. The picture was different in much of the emerging world: Mexicans, Kenyans, Nigerians and South Africans all saw China in a more favourable light, and welcomed Chinese investment. The question is whether that will still be true in a year’s time. 
"""

In [102]:
NLP = spacy.load('en_core_web_lg')

In [103]:
doc = NLP(article)

In [104]:
all_ents = list(doc.ents)
candidates = get_candidates(all_ents)

In [105]:
all_ents

[Only eight months ago,
 China,
 Zero,
 second,
 China,
 China,
 China,
 China,
 almost a fifth,
 half,
 more than three-fifths,
 China,
 Zambia,
 China,
 20%,
 Australia,
 August 22nd,
 bhp,
 annual,
 Australian,
 three years,
 China,
 West,
 Germany,
 China,
 one,
 the past three quarters,
 Western,
 China,
 2021,
 200,
 America,
 Europe,
 Japan,
 13%,
 700bn,
 Tesla,
 around a fifth,
 China,
 Qualcomm,
 two-thirds,
 China,
 America,
 Europe,
 Japan,
 America,
 Britain,
 France,
 Spain,
 1-2%,
 Germany,
 nearly 4%,
 China,
 China,
 July,
 April,
 China,
 America,
 nearly 6%,
 China,
 the Federal Reserve,
 decades,
 China,
 the Bank of England,
 2018,
 China,
 7% to -1%,
 British,
 1.2%,
 Western,
 China,
 hsbc,
 Standard Chartered,
 two,
 British,
 China,
 2017,
 a decade,
 China,
 Mozambique,
 Pakistan,
 China,
 two dozen,
 Pew,
 China,
 Mexicans,
 Kenyans,
 Nigerians,
 South Africans,
 China,
 Chinese,
 year]

In [106]:
candidates

array([['7% to -1%', '1.2%', True],
       ['7% to -1%', '13%', True],
       ['7% to -1%', 'nearly 6%', True],
       ['7% to -1%', 'nearly 4%', True],
       ['7% to -1%', '20%', True],
       ['around a fifth', 'a decade', True],
       ['around a fifth', 'almost a fifth', True],
       ['1.2%', '13%', True],
       ['1.2%', 'nearly 6%', True],
       ['1.2%', 'nearly 4%', True],
       ['1.2%', '20%', True],
       ['two dozen', 'two', True],
       ['13%', 'nearly 6%', True],
       ['13%', 'nearly 4%', True],
       ['13%', '20%', True],
       ['13%', '1-2%', True],
       ['2017', '2018', True],
       ['2017', '2021', True],
       ['Australia', 'australian', True],
       ['July', 'August 22nd', True],
       ['July', 'April', True],
       ['nearly 6%', 'nearly 4%', True],
       ['nearly 6%', '20%', True],
       ['nearly 6%', '1-2%', True],
       ['2018', '2021', True],
       ['a decade', 'almost a fifth', True],
       ['nearly 4%', '20%', True],
       ['nearly 4%', '1

In [107]:
from src.transitivity import _transitvity_check_first_level, _transitivity_check_second_level

In [108]:
def is_matrix_transitive(M):
    return np.sum(np.sum((((M @ M) > 0) * 1) - M)) == 0

In [109]:
def _recursion_transitive_clusters(candidates: np.array, max_recursion: int = 100) -> list:
    """
    This function recursively builds a set of pairs that are linked from a list of candidates.
     For each such set of linked pairs, this function then checks transitivty
    :param candidates: np.array of tuple pairs
    :param max_recursion:
    :return:
    """
    out = [] # output
    lst_has_been_checked = [] # list containing all expression that have been checked

    # print(f'started recursion for {candidates.shape[0]}') # candidates: contains pairs of candidates which stand in relation A
    for idx in range(0, candidates.shape[0]):

        counter = 0 # recursion counter
        #word #1 A word#2 -> 1st_tb_checked for other pairs containig either #1 xor #2

        lst_tb_checked = list(candidates[idx, :2])

        # if canidates have been checked already skip
        # candidates might be related to a previously explored set, thus avoid unecessary runs

        if (lst_tb_checked[0] in lst_has_been_checked) and (lst_tb_checked [1] in lst_has_been_checked):
            continue
        # set of all pairs with relation A, which are linked to either #1 or #2

        OVERALL_SET = set()
        OVERALL_SET.add(tuple (lst_tb_checked))

        while len(lst_tb_checked) > 0:
            # 1st level: finds all pairs that contain (xor) words in lst_tb_checked 
            set_topics = _transitvity_check_first_level(candidates, lst_tb_checked) 
            # 2nd level: checks whether adding new pairs has added new words #N 
            lst_tb_checked = _transitivity_check_second_level(set_topics, OVERALL_SET) 
            OVERALL_SET.update(set_topics)
            
            ## loop
            # for new words #N, pairs containing #N words are added
            # adding new pairs might add new words 
            # the recursion stops when no new words, requiring adding new pairs occur
            
            # recursion max to avoid infinite loop, adjust as needed
            if counter > max_recursion:
                # logger.warning(f'maximal recursion depth (max_recursion} has been reached for: {candidates}')
                print(f'maximal recursion depth {max_recursion} has been reached for: {candidates}')
                break

            # tracking recursion depth
            counter += 1

            # each pair yields a set of related pairs, these can be represented in a matrix
            matrix = pd.DataFrame(
                [
                    *chain(
                        *[
                            [
                                (i[0], i[1], True), (i[1], i[0], True)
                            ] for i in OVERALL_SET
                        ]
                    )
                ],
                columns = ['level1', 'level2', 'is_relation']
            )
            matrix = matrix.pivot_table(index='level1', columns='level2', values='is_relation').fillna(0)
            np.fill_diagonal(matrix.values, True)  # fill diagonal to be True, (true by definition as word#1 == word#1)

            # check tranistivity
            is_transitive = is_matrix_transitive(matrix)
            # append output
            lst_overall_set = list(OVERALL_SET)
            lst_overall_set.sort()
            out.append((tuple(lst_overall_set), is_transitive))

            # append items to has been_checked
            lst_has_been_checked.extend(list({*chain(*[list(item) for item in lst_overall_set])}))

    return out

In [110]:
tpl_cluster = _recursion_transitive_clusters(candidates)

In [111]:
[i[0] for i in tpl_cluster if i[-1]]

[(('a decade', 'almost a fifth'),
  ('around a fifth', 'a decade'),
  ('around a fifth', 'almost a fifth')),
 (('a decade', 'almost a fifth'),
  ('around a fifth', 'a decade'),
  ('around a fifth', 'almost a fifth')),
 (('two dozen', 'two'),),
 (('2017', '2018'), ('2017', '2021'), ('2018', '2021')),
 (('2017', '2018'), ('2017', '2021'), ('2018', '2021')),
 (('Australia', 'australian'),),
 (('August 22nd', 'April'), ('July', 'April'), ('July', 'August 22nd')),
 (('August 22nd', 'April'), ('July', 'April'), ('July', 'August 22nd'))]

In [122]:
cluster = [list(z) for z in set([frozenset([*chain(*[*chain([list(i) for i in item[0]])])]) for item in tpl_cluster if item[-1]])]

In [127]:
cluster

[['July', 'April', 'August 22nd'],
 ['a decade', 'around a fifth', 'almost a fifth'],
 ['two', 'two dozen'],
 ['Australia', 'australian'],
 ['2017', '2018', '2021']]