In [1]:
import pandas as pd
from utils import discover_fds_with_chase, group_fds, compute_all_closures, compute_candidate_keys, find_prime_attributes, minimal_cover

## Rider

In [2]:
rider = pd.read_csv('riders.csv')
rider.head()

Unnamed: 0,bib,rider_name,dob,team,rider_country_code,rider_country_name,team_country_code,team_country_name
0,1,Tadej Pogacar,1998-09-21,UAE TEAM EMIRATES XRG,SLO,Slovenia,UAE,United Arab Emirates
1,2,Joao Almeida,1998-08-05,UAE TEAM EMIRATES XRG,POR,Portugal,UAE,United Arab Emirates
2,3,Jhonatan Narvaez,1997-03-04,UAE TEAM EMIRATES XRG,ECU,Ecuador,UAE,United Arab Emirates
3,4,Nils Politt,1994-03-06,UAE TEAM EMIRATES XRG,GER,Germany,UAE,United Arab Emirates
4,5,Pavel Sivakov,1997-07-11,UAE TEAM EMIRATES XRG,FRA,France,UAE,United Arab Emirates


In [3]:
fd_rider = discover_fds_with_chase(rider)
fd_rider = group_fds(fd_rider)

print("\nDiscovered Functional Dependencies:")
for lhs, rhs_set in fd_rider:
    print(f"{lhs} -> {rhs_set}")


Discovered Functional Dependencies:
{'rider_name'} -> {'team', 'rider_country_name', 'dob', 'team_country_code', 'bib', 'team_country_name', 'rider_country_code'}
{'bib'} -> {'team', 'rider_name', 'rider_country_name', 'dob', 'team_country_code', 'rider_country_code', 'team_country_name'}
{'rider_country_name'} -> {'rider_country_code'}
{'team', 'dob'} -> {'rider_country_code', 'rider_country_name'}
{'rider_country_code'} -> {'rider_country_name'}
{'team_country_code', 'rider_country_code', 'dob'} -> {'team'}
{'rider_country_code', 'team_country_name', 'dob'} -> {'team'}
{'team_country_code', 'rider_country_name', 'dob'} -> {'team'}
{'team_country_name', 'rider_country_name', 'dob'} -> {'team'}
{'team'} -> {'team_country_code', 'team_country_name'}
{'team_country_name'} -> {'team_country_code'}
{'team_country_code'} -> {'team_country_name'}


In [4]:
attributes_rider = rider.columns.to_list()

In [5]:
rider_all_closures = compute_all_closures(attributes_rider, fd_rider)

In [6]:
candidate_keys_rider = compute_candidate_keys(rider_all_closures, attributes_rider)

In [7]:
find_prime_attributes(candidate_keys_rider)

{'bib', 'rider_name'}

In [8]:
minimal_fds_rider = minimal_cover(fd_rider, p = 0.5)
for lhs, rhs in minimal_fds_rider:
    print(f"{lhs} -> {rhs}")

{'rider_name'} -> {'team_country_code', 'bib'}
{'bib'} -> {'rider_country_code', 'rider_name', 'dob'}
{'rider_country_name'} -> {'rider_country_code'}
{'team', 'dob'} -> {'rider_country_name'}
{'rider_country_code'} -> {'rider_country_name'}
{'team_country_name', 'rider_country_name', 'dob'} -> {'team'}
{'team'} -> {'team_country_name'}
{'team_country_name'} -> {'team_country_code'}
{'team_country_code'} -> {'team_country_name'}


## Exits

In [9]:
exits = pd.read_csv('exits.csv')
exits.head()

Unnamed: 0,bib,name,stage,reason
0,163,Bryan Coquard,14,DNS
1,66,Carlos Rodriguez,18,DNS
2,173,Cees Bol,12,DNS
3,93,Cyril Barthe,18,DNS
4,77,Danny Van Poppel,17,DNS


In [10]:
fd_exits = discover_fds_with_chase(exits)
fd_exits = group_fds(fd_exits)
print("\nDiscovered Functional Dependencies for exits.csv:")
for lhs, rhs_set in fd_exits:
    print(f"{lhs} -> {rhs_set}")


Discovered Functional Dependencies for exits.csv:
{'name'} -> {'bib', 'stage', 'reason'}
{'bib'} -> {'stage', 'name', 'reason'}


In [11]:
attributes_exits = exits.columns.to_list()
exits_all_closures = compute_all_closures(attributes_exits, fd_exits)
exits_all_closures

{('bib',): {'bib', 'name', 'reason', 'stage'},
 ('name',): {'bib', 'name', 'reason', 'stage'},
 ('stage',): {'stage'},
 ('reason',): {'reason'},
 ('bib', 'name'): {'bib', 'name', 'reason', 'stage'},
 ('bib', 'stage'): {'bib', 'name', 'reason', 'stage'},
 ('bib', 'reason'): {'bib', 'name', 'reason', 'stage'},
 ('name', 'stage'): {'bib', 'name', 'reason', 'stage'},
 ('name', 'reason'): {'bib', 'name', 'reason', 'stage'},
 ('stage', 'reason'): {'reason', 'stage'},
 ('bib', 'name', 'stage'): {'bib', 'name', 'reason', 'stage'},
 ('bib', 'name', 'reason'): {'bib', 'name', 'reason', 'stage'},
 ('bib', 'stage', 'reason'): {'bib', 'name', 'reason', 'stage'},
 ('name', 'stage', 'reason'): {'bib', 'name', 'reason', 'stage'},
 ('bib', 'name', 'stage', 'reason'): {'bib', 'name', 'reason', 'stage'}}

In [12]:
candidate_keys_exits = compute_candidate_keys(exits_all_closures, attributes_exits)

In [13]:
find_prime_attributes(candidate_keys_exits)

{'bib', 'name'}

In [14]:
minimal_fds_exits = minimal_cover(fd_exits, p = 0.5)
for lhs, rhs in minimal_fds_exits:
    print(f"{lhs} -> {rhs}")

{'name'} -> {'bib', 'reason'}
{'bib'} -> {'stage', 'name'}


## Individual

In [15]:
individual_results = pd.read_csv('individual_results.csv')
individual_results.head()

Unnamed: 0,day,rider,rank,time,bonus,penalty,start_location,start_country_code,start_country_name,finish_location,finish_country_code,finish_country_name,length,type,name,team
0,2023-07-05,101,1,13991,10,0,Lille Métropole,FRA,France,Lille Métropole,FRA,France,184.9,flat,Jasper Philipsen,ALPECIN-DECEUNINCK
1,2023-07-05,41,2,13991,6,0,Lille Métropole,FRA,France,Lille Métropole,FRA,France,184.9,flat,Biniam Girmay,INTERMARCHÉ - WANTY
2,2023-07-05,228,3,13991,4,0,Lille Métropole,FRA,France,Lille Métropole,FRA,France,184.9,flat,Soren Waerenskjold,UNO-X MOBILITY
3,2023-07-05,187,4,13991,0,0,Lille Métropole,FRA,France,Lille Métropole,FRA,France,184.9,flat,Anthony Turgis,TOTALENERGIES
4,2023-07-05,118,5,13991,0,0,Lille Métropole,FRA,France,Lille Métropole,FRA,France,184.9,flat,Matteo Trentin,TUDOR PRO CYCLING TEAM


In [16]:
fd_individual = discover_fds_with_chase(individual_results)
fd_individual = group_fds(fd_individual)
print("\nDiscovered Functional Dependencies for individual_results.csv:")
for lhs, rhs_set in fd_individual:
    print(f"{lhs} -> {rhs_set}")


Discovered Functional Dependencies for individual_results.csv:
{'name', 'day'} -> {'penalty', 'bonus', 'time', 'rank'}
{'day', 'rank'} -> {'penalty', 'bonus', 'time'}
{'rider', 'day'} -> {'penalty', 'bonus', 'time', 'rank'}
{'name', 'finish_location'} -> {'penalty', 'bonus', 'time', 'rank'}
{'finish_location', 'rank'} -> {'penalty', 'bonus', 'time'}
{'finish_location', 'rider'} -> {'penalty', 'bonus', 'time', 'rank'}
{'rank', 'length'} -> {'bonus'}
{'start_location', 'name'} -> {'penalty', 'bonus', 'time', 'rank'}
{'name', 'time'} -> {'penalty', 'bonus', 'type'}
{'start_location', 'rank'} -> {'penalty', 'bonus', 'time'}
{'time', 'rank'} -> {'start_location', 'bonus', 'day', 'type', 'penalty', 'finish_location', 'length'}
{'type', 'rank'} -> {'bonus'}
{'start_location', 'rider'} -> {'penalty', 'bonus', 'time', 'rank'}
{'time', 'rider'} -> {'penalty', 'bonus', 'type'}
{'finish_location'} -> {'start_location', 'type', 'day', 'length'}
{'start_location'} -> {'type', 'finish_location', 'da

In [17]:
attributes_individual = individual_results.columns.to_list()
individual_all_closures = compute_all_closures(attributes_individual, fd_individual)
individual_all_closures

{('day',): {'day',
  'finish_country_code',
  'finish_country_name',
  'finish_location',
  'length',
  'start_country_code',
  'start_country_name',
  'start_location',
  'type'},
 ('rider',): {'finish_country_code',
  'finish_country_name',
  'name',
  'rider',
  'start_country_code',
  'start_country_name',
  'team'},
 ('rank',): {'finish_country_code',
  'finish_country_name',
  'rank',
  'start_country_code',
  'start_country_name'},
 ('time',): {'finish_country_code',
  'finish_country_name',
  'start_country_code',
  'start_country_name',
  'time'},
 ('bonus',): {'bonus',
  'finish_country_code',
  'finish_country_name',
  'start_country_code',
  'start_country_name'},
 ('penalty',): {'finish_country_code',
  'finish_country_name',
  'penalty',
  'start_country_code',
  'start_country_name'},
 ('start_location',): {'day',
  'finish_country_code',
  'finish_country_name',
  'finish_location',
  'length',
  'start_country_code',
  'start_country_name',
  'start_location',
  'type'

In [18]:
individual_candidate_keys = compute_candidate_keys(individual_all_closures, attributes_individual)

In [19]:
find_prime_attributes(individual_candidate_keys)

{'day',
 'finish_location',
 'length',
 'name',
 'rank',
 'rider',
 'start_location',
 'team',
 'time'}

In [21]:
minimal_fds_individual = minimal_cover(fd_individual, p = 0.5)
for lhs, rhs in minimal_fds_individual:
    print(f"{lhs} -> {rhs}")

{'finish_location', 'rank'} -> {'penalty', 'time'}
{'name', 'time'} -> {'type'}
{'time', 'rank'} -> {'start_location'}
{'type', 'rank'} -> {'bonus'}
{'start_location', 'rider'} -> {'rank'}
{'time', 'rider'} -> {'bonus'}
{'finish_location'} -> {'start_location'}
{'start_location'} -> {'day'}
{'time', 'length'} -> {'day'}
set() -> {'finish_country_name', 'start_country_name', 'finish_country_code', 'start_country_code'}
{'day'} -> {'finish_location', 'length'}
{'rider'} -> {'name'}
{'team', 'time', 'rank'} -> {'name'}
{'bonus', 'time', 'team'} -> {'penalty'}
{'rank', 'team', 'length'} -> {'penalty'}
{'name'} -> {'team', 'rider'}
{'length'} -> {'type'}
