In [1]:
import pandas as pd

# 1 Intro #

In this practical, we will see the basic pipeline to process a formal context with
FCA, and code in Python two basic FCA algorithm.
We will be using 2 toy datasets from the theoretical class for this practical:

•a 4 by 4 boolean context: shapes.csv;
•a 9 by 26 multi-valued context: banking.csv.

As those files are CSV files, it is recommended to use the pandas library to
load them, using pandas.read csv("shapes.csv", index col=0).

In [105]:
shapes = pd.read_csv("shapes.csv",index_col=0)
banking = pd.read_csv("banking.csv",index_col=0)

In [12]:
shapes

Unnamed: 0_level_0,Has 3 Vertices,Has 4 vertices,Has a direct angle,Equilateral
Shape,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Equilateral triangle,True,False,False,True
Rectangle triangle,True,False,True,False
Rectangle,False,True,True,False
Square,False,True,True,True


In [26]:
banking

Unnamed: 0_level_0,Country,BankType,Owner,Code,CorpC,FinLib,RegInt,BList,Wolfs
Case,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
ABNAmro,NLD,Universal,Public,No,0.74,0.98,144,Yes,No
Barclays,GBR,Commercial,Public,Explicit,0.14,1.0,277,Yes,Yes
BNP Paribas,FRA,Universal,Public,Implicit,0.82,1.0,75,No,No
Carnegie,SWE,Investment,Public,No,0.71,0.95,83,No,No
Citigroup,USA,Commercial,Public,Explicit,0.0,1.0,426,Yes,Yes
Coutts \& Co,GBR,Private,Public,Implicit,0.14,1.0,277,Yes,No
CS,CHE,Universal,Public,Explicit,0.44,0.95,83,Yes,Yes
Deutsche Bank,GER,Universal,Public,Explicit,0.95,0.9,45,No,Yes
Goldman Sachs,USA,Investment,Public,Explicit,0.0,1.0,426,Yes,Yes
HSBC,GBR,Commercial,Public,Implicit,0.14,1.0,277,Yes,Yes


# 2 Nominal, Ordinal, and Interordinal Scaling #


The first thing to do to apply the basic FCA algorithms on data is to transform
the multi-valued data into a boolean context. While shapes.csv is already a
Boolean context, banking.csv is not.
We will see 3 scaling methods in this practical, for some attribute g:


•nominal scaling K = (N, N, =):


1. create a Boolean attribute gi for each value i of the attribute g;

2. for each object, set the value of the attribute gi to T rue if g = i;


•ordinal scaling K = (N, N, ≤):


1. create a Boolean attribute g≤i for each value i of the attribute g;

2. for each object, set the value of the attribute g≤i to T rue if g ≤i;


•interordinal scaling K = (N, N, ≤∪≥):


1. create 2 Boolean attributes g≤i and g≥i for each value i of the at-
tribute g;

2. for each object, set the value of the attribute g≤i to T rue if g ≤ i
and similarly for g≥i if g ≥i

In [106]:
def nominal_operator(attribute, value):
    """
    """
    if attribute == value:
        return True
    else:
        return False

In [107]:
def ordinal_scaling(attribute_value, value):
    """
    """
    if value <= attribute_value:
        return True
    else:
        return False
    
    

In [41]:
def interordinal_scaling(first_att, second_att, value):
    
    if (value <= first_att) and (value >= second_att):
        return True
    else:
        return False
    
    

In [109]:
# getting unique values from each column and storing in a dictionary

from collections import defaultdict
columns_attribute = defaultdict()
for column in banking:
    print(column)
    unique = banking[column].unique()
    columns_attribute[column] = unique
    print("Unique value for column:", column, " are:")
    print(unique)
    print("---------------------------------------")

Country
Unique value for column: Country  are:
['NLD' 'GBR' 'FRA' 'SWE' 'USA' 'CHE' 'GER' 'CAN' 'ESP']
---------------------------------------
BankType
Unique value for column: BankType  are:
['Universal' 'Commercial' 'Investment' 'Private']
---------------------------------------
Owner
Unique value for column: Owner  are:
['Public' 'Private']
---------------------------------------
Code
Unique value for column: Code  are:
['No' 'Explicit' 'Implicit']
---------------------------------------
CorpC
Unique value for column: CorpC  are:
[0.74 0.14 0.82 0.71 0.   0.44 0.95 0.23 0.77]
---------------------------------------
FinLib
Unique value for column: FinLib  are:
[0.98 1.   0.95 0.9 ]
---------------------------------------
RegInt
Unique value for column: RegInt  are:
[144 277  75  83 426  45 149  53]
---------------------------------------
BList
Unique value for column: BList  are:
['Yes' 'No']
---------------------------------------
Wolfs
Unique value for column: Wolfs  are:
['No' 'Ye

In [110]:
#scaling the columns
nominal_required_columns = ["Country", "BankType", "Owner", "Code"]
ordinal_required_columns = [ "FinLib","RegInt", "CorpC"]
rows = []
new_df = pd.DataFrame([])

for column, unique in columns_attribute.items():
    
    if column in nominal_required_columns:
        for each in unique:
            key_name = column+ "_" + str(each)
            new_df[key_name] = banking.apply(lambda x: nominal_operator(each, x[column]), axis=1)
            
for column, unique in columns_attribute.items():
    if column in ordinal_required_columns:
        for each in unique:
            key_name = column+ "_<=_" + str(each)
            new_df[key_name] = banking.apply(lambda x: ordinal_scaling(each, x[column]), axis=1)
            
        new_df["BList"] = banking["BList"].apply(lambda x: True if x=="Yes" else "False")
new_df["Wolfs"] = banking["Wolfs"].apply(lambda x: True if x=="Yes" else "False")

In [111]:
new_df 

Unnamed: 0_level_0,Country_NLD,Country_GBR,Country_FRA,Country_SWE,Country_USA,Country_CHE,Country_GER,Country_CAN,Country_ESP,BankType_Universal,...,FinLib_<=_0.9,RegInt_<=_144,RegInt_<=_277,RegInt_<=_75,RegInt_<=_83,RegInt_<=_426,RegInt_<=_45,RegInt_<=_149,RegInt_<=_53,Wolfs
Case,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ABNAmro,True,False,False,False,False,False,False,False,False,True,...,False,True,True,False,False,True,False,True,False,False
Barclays,False,True,False,False,False,False,False,False,False,False,...,False,False,True,False,False,True,False,False,False,True
BNP Paribas,False,False,True,False,False,False,False,False,False,True,...,False,True,True,True,True,True,False,True,False,False
Carnegie,False,False,False,True,False,False,False,False,False,False,...,False,True,True,False,True,True,False,True,False,False
Citigroup,False,False,False,False,True,False,False,False,False,False,...,False,False,False,False,False,True,False,False,False,True
Coutts \& Co,False,True,False,False,False,False,False,False,False,False,...,False,False,True,False,False,True,False,False,False,False
CS,False,False,False,False,False,True,False,False,False,True,...,False,True,True,False,True,True,False,True,False,True
Deutsche Bank,False,False,False,False,False,False,True,False,False,True,...,True,True,True,True,True,True,True,True,True,True
Goldman Sachs,False,False,False,False,True,False,False,False,False,False,...,False,False,False,False,False,True,False,False,False,True
HSBC,False,True,False,False,False,False,False,False,False,False,...,False,False,True,False,False,True,False,False,False,True


# 3 Implement the derivation and the closure operator #

## 3.1 Derivation operator (optional) ##


Implement two functions corresponding to the derivation operator from respec-
tively the set of objects (up) and the set of attributes (down). The function
should take a Boolean context and a set of objects as input and output a set
of attributes, for the first function, and take a Boolean context and a set of
attributes as input and output a set of objects for the second function.
Reminder of the derivation operator, with gIm if there is a cross1 in the
context for attribute g of object m:


A′ def
= {m ∈M |gIm for all g ∈A},

B′ def
= {g ∈G |gIm for all m ∈B}.

In [144]:
def derivational_operator(list_obj, data, dtype="Object"):
    
    if dtype != "Object":
        result = []
        for each in list_obj:
            result.append(data.index[data[each] == True].tolist())
            
        #get index
    else:
        result = []
        for each in list_obj:
            row = data.loc[each].to_dict()
            result.append([k for k,v in row.items() if v == True])
    if len(result) == 0:
        return {}
    return set.intersection(*map(set,result))
             

In [145]:
derivational_operator(["RBC","Rothschild"], new_df)

{'CorpC_<=_0.23',
 'CorpC_<=_0.44',
 'CorpC_<=_0.71',
 'CorpC_<=_0.74',
 'CorpC_<=_0.77',
 'CorpC_<=_0.82',
 'CorpC_<=_0.95',
 'FinLib_<=_1.0',
 'RegInt_<=_277',
 'RegInt_<=_426'}

## 3.2 Closure operator ##
Implement the closure operator on the attributes of a context, by writing a
function taking a context and a set of attributes as input and returning a set of
attributes.

Reminder of the closure operator: A′′ = (A′)′.

Check that your code works correctly by computing (A′′)′′ for some set of
attribute A, which should be equal to A′′ (and to ((A′′)′′)′′ ...). Do so for
shapes.csv and banking.csv. 

In [146]:
def closure(attributes, data):
    
    objects = derivational_operator(attributes, data, dtype="Objectf")
    data = data.loc[objects]
    assert len(data) == len(objects)
    attributes = derivational_operator(list(objects), data, dtype="Object")
    return attributes

In [147]:
closure(['CorpC_<=_0.23','FinLib_<=_1.0'], new_df[['CorpC_<=_0.23','FinLib_<=_1.0']])

{'CorpC_<=_0.23', 'FinLib_<=_1.0'}

## 4 Implement AllClosure ##



In [148]:
attribute_indices = {k:i for i,k in enumerate(new_df.columns)}
indices_attribute = {i:k for i,k in enumerate(new_df.columns)}
def NextIntent(B, data, n):
    for i in range(n-1, 0, -1):
        res = set(range(i)).intersection(B)
        B_n = {indices_attribute[x] for x in res}
        B_n.add(indices_attribute[i])
        Bh = derivational_operator(derivational_operator(list(B_n), data, dtype="attribute"), data, dtype="Object" )
        Bh_n= [ attribute_indices[x] for x in Bh]
        if LexicoLessThan(B, Bh_n, i): return Bh

def LexicoLessThan(B, Bh, i):
    if i not in [e for e in Bh if e not in B]: return False
    h = set(range(i))
    if h.intersection(B) == h.intersection(Bh): return True
    else:
        return False

In [149]:
attributes = ['CorpC_<=_0.23','FinLib_<=_1.0']
B = [attribute_indices[x] for x in attributes]
NextIntent(B, new_df, len(new_df.columns))

{25, 29}
{'FinLib_<=_1.0', 'CorpC_<=_0.14', 'Wolfs', 'BList', 'CorpC_<=_0.74', 'RegInt_<=_426', 'CorpC_<=_0.23', 'CorpC_<=_0.44', 'Owner_Public', 'CorpC_<=_0.71', 'CorpC_<=_0.77', 'CorpC_<=_0.82', 'CorpC_<=_0.95'}
[29, 19, 40, 27, 18, 36, 25, 23, 13, 21, 26, 20, 24]
{25, 29}
{}
[]
{25, 29}
{'RegInt_<=_277', 'Country_CAN', 'FinLib_<=_1.0', 'CorpC_<=_0.74', 'RegInt_<=_149', 'RegInt_<=_426', 'CorpC_<=_0.23', 'BankType_Universal', 'Code_Explicit', 'CorpC_<=_0.71', 'Owner_Public', 'CorpC_<=_0.44', 'CorpC_<=_0.77', 'CorpC_<=_0.82', 'CorpC_<=_0.95'}
[33, 7, 29, 18, 38, 36, 25, 9, 16, 21, 13, 23, 26, 20, 24]
{25, 29}
{}
[]
{25, 29}
{'FinLib_<=_1.0', 'CorpC_<=_0.74', 'RegInt_<=_426', 'CorpC_<=_0.23', 'CorpC_<=_0.44', 'CorpC_<=_0.71', 'CorpC_<=_0.77', 'CorpC_<=_0.82', 'CorpC_<=_0.95'}
[29, 18, 36, 25, 23, 21, 26, 20, 24]
{25, 29}
{}
[]
{25, 29}
{}
[]
{25, 29}
{'RegInt_<=_277', 'FinLib_<=_1.0', 'CorpC_<=_0.74', 'RegInt_<=_426', 'CorpC_<=_0.23', 'CorpC_<=_0.44', 'CorpC_<=_0.71', 'CorpC_<=_0.77', '

{'CorpC_<=_0.95', 'FinLib_<=_1.0', 'RegInt_<=_426'}

## 4.1 Implement NextClosure operator ##

Implement NextClosure using the algorithm from Figure 1. Note that Up is
the derivation operator from object to attributes, and Down from attributes to
objects. This version of the algorithm is computed on the attribute indices in the
lexical order and not the attribute names, so depending on your implementation,
take care of the modifications.

## 4.2 Implement AllClosure using NextClosure operator ##
Implement AllClosure (see slide 22). It will take as an input a Boolean formal
context. It should output a list of concepts (extent-intent pairs),
Hint: you will need to store the concepts in a list instead of outputing them
each time like in 2.1. of slide 22.