In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import collections
import pickle
import gzip

from tf.app import use


In [3]:
A = use("q-ran/quran:clone", checkout="clone", hoist=globals())

Using TF-app in /Users/dirk/github/annotation/app-quran/code:
	repo clone offline under ~/github (local github)
Using data in /Users/dirk/github/q-ran/quran/tf/0.4:
	repo clone offline under ~/github (local github)


# Parallels

We make edges between similar ayas.

When are ayas similar?

If a certain distance metric is above a certain threshold.

We choose this metric:

* we reduce a line to the set of lemmas in it.
* the similarity between two lines is the length of the intersection divided by the length of the union of their sets times 100.

# Preparation

We pre-compute all sets for all ayas.

In [4]:
A.silentOff()

In [5]:
def makeSet(ln):
    lineSet = set()
    for s in L.d(ln, otype="word"):
        r = F.lemma.v(s)
        if r:
            lineSet.add(r)
    return lineSet

In [6]:
lines = {}

for ln in F.otype.s("aya"):
    lineSet = makeSet(ln)
    if lineSet:
        lines[ln] = lineSet

nLines = len(lines)
print(f"{nLines} ayas")

6216 ayas


# Measure

In [7]:
def sim(lSet, mSet):
    return int(round(100 * len(lSet & mSet) / len(lSet | mSet)))

# Compute all similarities

We are going to perform several tens of millions of comparisons, each of which is more than an elementary operation.

Let's measure time.

In [8]:
THRESHOLD = 60


def computeSim(limit=None):
    similarity = {}

    lineNodes = sorted(lines.keys())
    nLines = len(lineNodes)

    nComparisons = nLines * (nLines - 1) // 2

    print(f"{nComparisons} comparisons to make")
    chunkSize = nComparisons // 100

    co = 0
    b = 0
    si = 0
    p = 0

    A.indent(reset=True)

    stop = False
    for i in range(nLines):
        nodeI = lineNodes[i]
        lineI = lines[nodeI]
        for j in range(i + 1, nLines):
            nodeJ = lineNodes[j]
            lineJ = lines[nodeJ]
            s = sim(lineI, lineJ)
            co += 1
            b += 1
            if b == chunkSize:
                p += 1
                A.info(f"{p:>3}% - {co:>12} comparisons and {si:>10} similarities")
                b = 0
                if limit is not None and p >= limit:
                    stop = True
                    break

            if s < THRESHOLD:
                continue
            similarity[(nodeI, nodeJ)] = sim(lineI, lineJ)
            si += 1
        if stop:
            break

    A.info(f"{p:>3}% - {co:>12} comparisons and {si:>10} similarities")
    return similarity

We are going to run it to several % first and do some checks then.

In [9]:
similarity = computeSim(limit=4)

19316220 comparisons to make
  0.33s   1% -       193162 comparisons and          7 similarities
  0.67s   2% -       386324 comparisons and         19 similarities
  0.99s   3% -       579486 comparisons and         26 similarities
  1.33s   4% -       772648 comparisons and         29 similarities
  1.33s   4% -       772648 comparisons and         29 similarities


We check the sanity of the results.

In [10]:
print(min(similarity.values()))
print(max(similarity.values()))

60
100


In [11]:
eq = [x for x in similarity.items() if x[1] >= 100]
neq = [x for x in similarity.items() if x[1] <= 70]

In [12]:
print(len(eq))
print(len(neq))

5
12


In [13]:
print(eq[0])
print(neq[0])

((128221, 132189), 100)
((128221, 132728), 67)


In [14]:
A.plain(eq[0][0][0])
A.plain(eq[0][0][1])

In [15]:
A.plain(eq[0][0][0], fmt="text-trans-full")
A.plain(eq[0][0][1], fmt="text-trans-full")

In [16]:
for fmt in T.formats:
    print(fmt)
    A.plain(neq[0][0][0], fmt=fmt)
    A.plain(neq[0][0][1], fmt=fmt)

lex-trans-full


root-trans-full


text-orig-full


text-trans-full


Looks good.

Now the whole computation.

But if we have done this before, and nothing has changed, we load previous results from disk.

If we do not find previous results, we compute them and save the results to disk.

In [17]:
PARA_DIR = f"{A.tempDir}/parallels"


def writeResults(data, location, name):
    if not os.path.exists(location):
        os.makedirs(location, exist_ok=True)
    path = f"{location}/{name}"
    with gzip.open(path, "wb") as f:
        pickle.dump(data, f)
    print(f"Data written to {path}")


def readResults(location, name):
    path = f"{location}/{name}"
    if not os.path.exists(path):
        print(f"File not found: {path}")
        return None
    with gzip.open(path, "rb") as f:
        data = pickle.load(f)
    print(f"Data read from {path}")
    return data

In [18]:
similarity = readResults(PARA_DIR, f"sim-{A.version}.zip")
if not similarity:
    similarity = computeSim()
    writeResults(similarity, PARA_DIR, f"sim-{A.version}.zip")

File not found: /Users/dirk/github/q-ran/quran/_temp/parallels/sim-0.4.zip
19316220 comparisons to make
  0.34s   1% -       193162 comparisons and          7 similarities
  0.68s   2% -       386324 comparisons and         19 similarities
  1.03s   3% -       579486 comparisons and         26 similarities
  1.38s   4% -       772648 comparisons and         29 similarities
  1.72s   5% -       965810 comparisons and         32 similarities
  2.06s   6% -      1158972 comparisons and         34 similarities
  2.43s   7% -      1352134 comparisons and         34 similarities
  2.83s   8% -      1545296 comparisons and         34 similarities
  3.21s   9% -      1738458 comparisons and         34 similarities
  3.58s  10% -      1931620 comparisons and         38 similarities
  3.91s  11% -      2124782 comparisons and         40 similarities
  4.27s  12% -      2317944 comparisons and         42 similarities
  4.61s  13% -      2511106 comparisons and         45 similarities
  4.95s  14%

In [19]:
len(similarity)

1234

So, just over a thousand pairs of similar lines.

Let's find out which lines have the most correspondences.

In [20]:
parallels = {}

for (ln, m) in similarity:
    parallels.setdefault(ln, set()).add(m)
    parallels.setdefault(m, set()).add(ln)

print(f"{len(parallels)} out of {nLines} ayas have at least one similar aya")

812 out of 6216 ayas have at least one similar aya


In [21]:
rankedParallels = sorted(
    parallels.items(),
    key=lambda x: (-len(x[1]), x[0]),
)

In [22]:
for (ln, paras) in rankedParallels[0:10]:
    print(
        f'{len(paras):>4} siblings of {ln} = {T.text(ln)} = {T.text(ln, fmt="text-trans-full", descend=True)}'
    )

  31 siblings of 133058 = فَبِأَىِّ ءَالَآءِ رَبِّكَ تَتَمَارَىٰ = fabi>aY~i 'aAlaA^'i rab~ika tatamaAraY`
  31 siblings of 133133 = فَبِأَىِّ ءَالَآءِ رَبِّكُمَا تُكَذِّبَانِ = fabi>aY~i 'aAlaA^'i rab~ikumaA tuka*~ibaAni
  31 siblings of 133136 = فَبِأَىِّ ءَالَآءِ رَبِّكُمَا تُكَذِّبَانِ = fabi>aY~i 'aAlaA^'i rab~ikumaA tuka*~ibaAni
  31 siblings of 133138 = فَبِأَىِّ ءَالَآءِ رَبِّكُمَا تُكَذِّبَانِ = fabi>aY~i 'aAlaA^'i rab~ikumaA tuka*~ibaAni
  31 siblings of 133141 = فَبِأَىِّ ءَالَآءِ رَبِّكُمَا تُكَذِّبَانِ = fabi>aY~i 'aAlaA^'i rab~ikumaA tuka*~ibaAni
  31 siblings of 133143 = فَبِأَىِّ ءَالَآءِ رَبِّكُمَا تُكَذِّبَانِ = fabi>aY~i 'aAlaA^'i rab~ikumaA tuka*~ibaAni
  31 siblings of 133145 = فَبِأَىِّ ءَالَآءِ رَبِّكُمَا تُكَذِّبَانِ = fabi>aY~i 'aAlaA^'i rab~ikumaA tuka*~ibaAni
  31 siblings of 133148 = فَبِأَىِّ ءَالَآءِ رَبِّكُمَا تُكَذِّبَانِ = fabi>aY~i 'aAlaA^'i rab~ikumaA tuka*~ibaAni
  31 siblings of 133150 = فَبِأَىِّ ءَالَآءِ رَبِّكُمَا تُكَذِّبَانِ = fabi>aY~i 'aAlaA^

In [23]:
for (ln, paras) in rankedParallels[100:110]:
    print(
        f'{len(paras):>4} siblings of {T.text(ln)} = {T.text(ln, fmt="text-trans-full", descend=True)}'
    )

   4 siblings of إِنِّى لَكُمْ رَسُولٌ أَمِينٌ = <in~iY lakumo rasuwlN >amiynN
   4 siblings of إِنِّى لَكُمْ رَسُولٌ أَمِينٌ = <in~iY lakumo rasuwlN >amiynN
   4 siblings of إِنِّى لَكُمْ رَسُولٌ أَمِينٌ = <in~iY lakumo rasuwlN >amiynN
   4 siblings of فِى جَنَّٰتٍ وَعُيُونٍ = fiY jan~a`tK waEuyuwnK
   4 siblings of إِنِّى لَكُمْ رَسُولٌ أَمِينٌ = <in~iY lakumo rasuwlN >amiynN
   4 siblings of إِنِّى لَكُمْ رَسُولٌ أَمِينٌ = <in~iY lakumo rasuwlN >amiynN
   4 siblings of طسٓ تِلْكَ ءَايَٰتُ ٱلْقُرْءَانِ وَكِتَابٍ مُّبِينٍ = Ts^ tiloka 'aAya`tu {loquro'aAni wakitaAbK m~ubiynK
   4 siblings of تِلْكَ ءَايَٰتُ ٱلْكِتَٰبِ ٱلْحَكِيمِ = tiloka 'aAya`tu {lokita`bi {loHakiymi
   4 siblings of إِنَّا كَذَٰلِكَ نَجْزِى ٱلْمُحْسِنِينَ = <in~aA ka*a`lika najoziY {lomuHosiniyna
   4 siblings of كَذَٰلِكَ نَجْزِى ٱلْمُحْسِنِينَ = ka*a`lika najoziY {lomuHosiniyna


In [24]:
for (ln, paras) in rankedParallels[500:510]:
    print(
        f'{len(paras):>4} siblings of {T.text(ln)} = {T.text(ln, fmt="text-trans-full", descend=True)}'
    )

   1 siblings of يَأْتُوكَ بِكُلِّ سَحَّارٍ عَلِيمٍ = ya>otuwka bikul~i saH~aArK EaliymK
   1 siblings of فَلَمَّا جَآءَ ٱلسَّحَرَةُ قَالُوا۟ لِفِرْعَوْنَ أَئِنَّ لَنَا لَأَجْرًا إِن كُنَّا نَحْنُ ٱلْغَٰلِبِينَ = falam~aA jaA^'a {ls~aHarapu qaAluwA@ lifiroEawona >a}in~a lanaA la>ajorFA <in kun~aA naHonu {loga`libiyna
   1 siblings of قَالَ نَعَمْ وَإِنَّكُمْ إِذًا لَّمِنَ ٱلْمُقَرَّبِينَ = qaAla naEamo wa<in~akumo <i*FA l~amina {lomuqar~abiyna
   1 siblings of فَأُلْقِىَ ٱلسَّحَرَةُ سَٰجِدِينَ = fa>uloqiYa {ls~aHarapu sa`jidiyna
   1 siblings of قَالُوٓا۟ ءَامَنَّا بِرَبِّ ٱلْعَٰلَمِينَ = qaAluw^A@ 'aAman~aA birab~i {loEa`lamiyna
   1 siblings of رَبِّ مُوسَىٰ وَهَٰرُونَ = rab~i muwsaY` waha`ruwna
   1 siblings of قَالَ ءَامَنتُمْ لَهُۥ قَبْلَ أَنْ ءَاذَنَ لَكُمْ إِنَّهُۥ لَكَبِيرُكُمُ ٱلَّذِى عَلَّمَكُمُ ٱلسِّحْرَ فَلَسَوْفَ تَعْلَمُونَ لَأُقَطِّعَنَّ أَيْدِيَكُمْ وَأَرْجُلَكُم مِّنْ خِلَٰفٍ وَلَأُصَلِّبَنَّكُمْ أَجْمَعِينَ = qaAla 'aAmantumo lahu, qabola >ano 'aA*ana lakumo <in~ahu, 

And how many lines have just one correspondence?

We look at the tail of `rankedParallels`.

In [25]:
pairs = [(x, list(paras)[0]) for (x, paras) in rankedParallels if len(paras) == 1]
print(f"There are {len(pairs)} exclusively parallel pairs of lines")

There are 536 exclusively parallel pairs of lines


Why not make an overview of exactly how wide-spread parallel lines are?

We count how many lines have how many parallels.

In [26]:
parallelCount = collections.Counter()

buckets = (2, 10, 20, 50, 100)

bucketRep = {}
prevBucket = None
for bucket in buckets:
    if prevBucket is None:
        bucketRep[bucket] = f"       n <= {bucket:>3}"
    elif bucket == buckets[-1]:
        bucketRep[bucket] = f"       n >  {bucket:>3}"
    else:
        bucketRep[bucket] = f"{prevBucket:>3} <  n <= {bucket:>3}"
    prevBucket = bucket

for (ln, paras) in rankedParallels:
    clusterSize = len(paras) + 1
    if clusterSize > buckets[-1]:
        theBucket = buckets[-1]
    else:
        for bucket in buckets:
            if clusterSize <= bucket:
                theBucket = bucket
                break
    parallelCount[theBucket] += 1

for (bucket, amount) in sorted(
    parallelCount.items(),
    key=lambda x: (-x[0], x[1]),
):
    print(f"{amount:>4} lines have n sisters where {bucketRep[bucket]}")

  32 lines have n sisters where  20 <  n <=  50
  12 lines have n sisters where  10 <  n <=  20
 232 lines have n sisters where   2 <  n <=  10
 536 lines have n sisters where        n <=   2


# Add parallels to the TF dataset

We can add this information to the Quran dataset as an *edge feature*.

An edge feature links two nodes and may annotate that link with a value.

For parallels, we link each line to each of its parallel lines and we annotate that link with the similarity between
the two ayas. The similarity is a percentage, and we round it to integer values.

If `n1` is similar to `n2`, then `n2` is similar to `n1`.
In order to save space, we only add such links once.

We can then use
[`E.sim.b(node)`](https://annotation.github.io/text-fabric/Api/Features/#edge-features)
to find all nodes that are parallel to node.


In [27]:
metaData = {
    "": {
        "name": "AbB Old Babylonian Cuneiform",
        "editor": "Cale Johnson et. al.",
        "institute": "CDL",
        "converters": "Cale Johnson, Dirk Roorda",
        "acronym": "quran",
        "convertedBy": "Dirk Roorda and Cornelis van Lit",
        "createdBy": "Kais Dukes",
        "createdDate": "2011",
        "description": "Quran: plain text plus morphological annotations at the word level",
        "license1": "Open Source, unspecified, see http://corpus.quran.com/releasenotes.jsp",
        "license2": "Creative Commons BY-ND 3.0 Unported",
        "source1": "Morphology: Quranic Arabic Corpus 0.4 (2011) by Kais Dukes",
        "source1Url": "http://corpus.quran.com",
        "source2": "Text: Tanzil Quran Text (Uthmani, version 1.0.2)",
        "source2Url": "http://tanzil.net/docs/home",
    },
    "sim": {
        "valueType": "int",
        "edgeValues": True,
        "description": "similarity between ayas, as a percentage of the common material wrt the combined material",
    },
}

In [28]:
simData = {}
for ((f, t), d) in similarity.items():
    simData.setdefault(f, {})[t] = d

In [29]:
ghBase = os.path.expanduser("~/github")
subdir = "parallels"
path = f"{A.context.org}/{A.context.repo}/{subdir}/tf"
location = f"{ghBase}/{path}"
module = A.version

In [30]:
TF.save(
    edgeFeatures=dict(sim=simData), metaData=metaData, location=location, module=module
)

  0.00s Exporting 0 node and 1 edge and 0 config features to /Users/dirk/github/q-ran/quran/parallels/tf/0.4:
   |     0.00s T sim                  to /Users/dirk/github/q-ran/quran/parallels/tf/0.4
  0.01s Exported 0 node features and 1 edge features and 0 config features to /Users/dirk/github/q-ran/quran/parallels/tf/0.4


True

# Turn the parallels feature into a module

Here we show how to turn the new feature `sim` into a module, so that users can easily load it in a Jupyter notebook or in the TF browser.

In [31]:
%%bash
text-fabric-zip 'q-ran/quran/parallels/tf'

True
Create release data for q-ran/quran/parallels/tf
Found 2 versions
zip files end up in /Users/dirk/Downloads/q-ran-release/quran
zipping q-ran/quran                0.3 with   1 features ==> parallels-tf-0.3.zip
zipping q-ran/quran                0.4 with   1 features ==> parallels-tf-0.4.zip


I have added this file to a new release of the Quran GitHub repo.

# Use the parallels module

We load the Quran corpus again, but now with the parallels module.

In [63]:
A = use("q-ran/quran", hoist=globals(), check=True, mod="q-ran/quran/parallels/tf")

TF app is up-to-date.
Using annotation/app-quran commit f6543c213dad36050de3e90373af237e9a4f0bc1 (=latest)
  in /Users/dirk/text-fabric-data/__apps__/quran.
No new data release available online.
Using q-ran/quran/tf - 0.3 rv0.4 (=latest) in /Users/dirk/text-fabric-data.
	downloading q-ran/quran - 0.3 rv0.4
	from https://github.com/q-ran/quran/releases/download/v0.4/parallels-tf-0.3.zip ...
	unzipping ...
	saving q-ran/quran - 0.3 rv0.4
	saved q-ran/quran - 0.3 rv0.4
Using q-ran/quran/parallels/tf - 0.3 rv0.4 (=latest) in /Users/dirk/text-fabric-data


Lo and behold: you see the parallels module listed with one feature: `sim`. It is in *italics*, which indicates
it is an edge feature.

We just do a quick check here and in another notebook we study parallels a bit more, using the feature `sim`.

We count how many similar pairs their are, and how many 100% similar pairs there are.

In [64]:
query = """
aya
-sim> aya
"""
results = A.search(query)

  0.02s 1234 results


In [65]:
query = """
aya
-sim=100> aya
"""
results = A.search(query)

  0.02s 805 results


Remarkably, more than half of the pairs are 100 percent similar. Let's show just a few:

In [66]:
A.table(results, start=1, end=10, withNodes=True)

n,p,aya,aya.1
1,37:182,ٱلْحَمْدُ لِلَّهِ رَبِّ ٱلْعَٰلَمِينَ 128221,وَٱلْحَمْدُ لِلَّهِ رَبِّ ٱلْعَٰلَمِينَ 132189
2,4:68,ٱهْدِنَا ٱلصِّرَٰطَ ٱلْمُسْتَقِيمَ 128225,وَلَهَدَيْنَٰهُمْ صِرَٰطًا مُّسْتَقِيمًا 128780
3,37:118,ٱهْدِنَا ٱلصِّرَٰطَ ٱلْمُسْتَقِيمَ 128225,وَهَدَيْنَٰهُمَا ٱلصِّرَٰطَ ٱلْمُسْتَقِيمَ 132125
4,31:5,أُو۟لَٰٓئِكَ عَلَىٰ هُدًى مِّن رَّبِّهِمْ وَأُو۟لَٰٓئِكَ هُمُ ٱلْمُفْلِحُونَ 128231,أُو۟لَٰٓئِكَ عَلَىٰ هُدًى مِّن رَّبِّهِمْ وَأُو۟لَٰٓئِكَ هُمُ ٱلْمُفْلِحُونَ 131693
5,2:122,يَٰبَنِىٓ إِسْرَٰٓءِيلَ ٱذْكُرُوا۟ نِعْمَتِىَ ٱلَّتِىٓ أَنْعَمْتُ عَلَيْكُمْ وَأَنِّى فَضَّلْتُكُمْ عَلَى ٱلْعَٰلَمِينَ 128273,يَٰبَنِىٓ إِسْرَٰٓءِيلَ ٱذْكُرُوا۟ نِعْمَتِىَ ٱلَّتِىٓ أَنْعَمْتُ عَلَيْكُمْ وَأَنِّى فَضَّلْتُكُمْ عَلَى ٱلْعَٰلَمِينَ 128348
6,2:141,تِلْكَ أُمَّةٌ قَدْ خَلَتْ لَهَا مَا كَسَبَتْ وَلَكُم مَّا كَسَبْتُمْ وَلَا تُسْـَٔلُونَ عَمَّا كَانُوا۟ يَعْمَلُونَ 128360,تِلْكَ أُمَّةٌ قَدْ خَلَتْ لَهَا مَا كَسَبَتْ وَلَكُم مَّا كَسَبْتُمْ وَلَا تُسْـَٔلُونَ عَمَّا كَانُوا۟ يَعْمَلُونَ 128367
7,3:60,ٱلْحَقُّ مِن رَّبِّكَ فَلَا تَكُونَنَّ مِنَ ٱلْمُمْتَرِينَ 128373,ٱلْحَقُّ مِن رَّبِّكَ فَلَا تَكُن مِّنَ ٱلْمُمْتَرِينَ 128572
8,3:88,خَٰلِدِينَ فِيهَا لَا يُخَفَّفُ عَنْهُمُ ٱلْعَذَابُ وَلَا هُمْ يُنظَرُونَ 128388,خَٰلِدِينَ فِيهَا لَا يُخَفَّفُ عَنْهُمُ ٱلْعَذَابُ وَلَا هُمْ يُنظَرُونَ 128600
9,19:36,إِنَّ ٱللَّهَ رَبِّى وَرَبُّكُمْ فَٱعْبُدُوهُ هَٰذَا صِرَٰطٌ مُّسْتَقِيمٌ 128563,وَإِنَّ ٱللَّهَ رَبِّى وَرَبُّكُمْ فَٱعْبُدُوهُ هَٰذَا صِرَٰطٌ مُّسْتَقِيمٌ 130505
10,43:64,إِنَّ ٱللَّهَ رَبِّى وَرَبُّكُمْ فَٱعْبُدُوهُ هَٰذَا صِرَٰطٌ مُّسْتَقِيمٌ 128563,إِنَّ ٱللَّهَ هُوَ رَبِّى وَرَبُّكُمْ فَٱعْبُدُوهُ هَٰذَا صِرَٰطٌ مُّسْتَقِيمٌ 132608


There is also a lower level way to work with edge features.

We can list all edges going out from a reference node.
What we see is tuple of pairs: the target node and the similarity between the reference node and that target node.

In [70]:
refNode = 133152

E.sim.f(refNode)

((133154, 100),
 (133156, 100),
 (133158, 100),
 (133160, 100),
 (133162, 100),
 (133165, 100),
 (133167, 100),
 (133169, 100),
 (133171, 100),
 (133173, 100),
 (133175, 100),
 (133177, 100),
 (133179, 100),
 (133181, 100),
 (133183, 100),
 (133185, 100),
 (133187, 100),
 (133189, 100),
 (133191, 100),
 (133193, 100),
 (133195, 100),
 (133197, 100))

Likewise, we can observe the nodes that target the reference node:

In [71]:
E.sim.t(refNode)

((133058, 60),
 (133133, 100),
 (133136, 100),
 (133138, 100),
 (133141, 100),
 (133143, 100),
 (133145, 100),
 (133148, 100),
 (133150, 100))

Both sets of nodes are similar to the reference node and it is inconvenient to use both `.f()` and `.t()` to get the similar lines.

But there is another way:

In [72]:
E.sim.b(refNode)

((133058, 60),
 (133133, 100),
 (133136, 100),
 (133138, 100),
 (133141, 100),
 (133143, 100),
 (133145, 100),
 (133148, 100),
 (133150, 100),
 (133154, 100),
 (133156, 100),
 (133158, 100),
 (133160, 100),
 (133162, 100),
 (133165, 100),
 (133167, 100),
 (133169, 100),
 (133171, 100),
 (133173, 100),
 (133175, 100),
 (133177, 100),
 (133179, 100),
 (133181, 100),
 (133183, 100),
 (133185, 100),
 (133187, 100),
 (133189, 100),
 (133191, 100),
 (133193, 100),
 (133195, 100),
 (133197, 100))

Let's make sure that `.b()` gives the combination of `.f()` and `.t()`.

In [73]:
f = {x[0] for x in E.sim.f(refNode)}
b = {x[0] for x in E.sim.b(refNode)}
t = {x[0] for x in E.sim.t(refNode)}

# are f and t disjoint ?

print(f"the intersection of f and t is {f & t}")

# is b the union of f and t ?

print(f"t | f = b ? {f | t == b}")

the intersection of f and t is set()
t | f = b ? True
