# Field : structure analysis

## Goals

- understand the relationships between Field objects
- present the methods that modify these relationships (read beforehand the structuring of Field)

-----

In [1]:
from observation import Nfield, Sfield

## coupling analysis
Two Field objects can be compared to identify the relationships between the two (similar to relationships between entities of a conceptual data model).
Several relationships are defined :
- coupled (one-to-one)
- derived (many-to-one)
- crossed (many-to-many with 'all-to-all' links)
- linked (many-to-many)

In [2]:
month   = Sfield(['jan', 'feb', 'apr', 'jan', 'sep', 'dec', 'apr', 'may', 'jan'], reindex=True)
quarter = Nfield(['q1',  'q1',  'q2',  'q1',  'q3',  'q4',  'q2',  'q2',  'q1'], reindex=True)
print('quarter is coupled to month ?   ', quarter.iscoupled(month))
print('quarter is derived to month ?   ', quarter.isderived(month))
print('quarter relationship to month : ', quarter.couplinginfos(month), '\n')

monthnum = Sfield([1, 2, 4, 1, 9, 12, 4, 5, 1], reindex=True)
print('monthnum is coupled to month ?   ', monthnum.iscoupled(month))
print('monthnum is derived to month ?   ', monthnum.isderived(month))
print('monthnum relationship to month : ', monthnum.couplinginfos(month), '\n')

year     = Sfield([2020, 2020, 2021, 2021, 2022, 2022], reindex=True)
semester = Sfield(['s1', 's2', 's1', 's2','s1', 's2',], reindex=True)
print('semester is coupled to year ?   ', semester.iscoupled(year))
print('semester is derived to year ?   ', semester.isderived(year))
print('semester is crossed to year ?   ', semester.iscrossed(year))
print('semester relationship to year : ', semester.couplinginfos(year), '\n')

quarter is coupled to month ?    False
quarter is derived to month ?    True
quarter relationship to month :  {'dist': 6, 'dmin': 6, 'dmax': 24, 'diff': 2, 'dran': 18, 'rateder': 0.0, 'distomin': 0, 'distomax': 18, 'distance': 2, 'ratecpl': 0.1, 'typecoupl': 'derived'} 

monthnum is coupled to month ?    True
monthnum is derived to month ?    False
monthnum relationship to month :  {'dist': 6, 'dmin': 6, 'dmax': 36, 'diff': 0, 'dran': 30, 'rateder': 0.0, 'distomin': 0, 'distomax': 30, 'distance': 0, 'ratecpl': 0.0, 'typecoupl': 'coupled'} 

semester is coupled to year ?    False
semester is derived to year ?    False
semester is crossed to year ?    True
semester relationship to year :  {'dist': 6, 'dmin': 3, 'dmax': 6, 'diff': 1, 'dran': 3, 'rateder': 1.0, 'distomin': 3, 'distomax': 0, 'distance': 4, 'ratecpl': 1.0, 'typecoupl': 'crossed'} 



## coupling forcing - codec adjustement
We can impose a relationship with codec extension or reduction.
Deviations can be identified simply
    
    


In [3]:
#for example if i introduce an error in the quarters, i can impose that the quarters are always derived from the months

quarter[2] = 'q3'
print('quarter relationship to month : ', quarter.couplinginfos(month), '\n') # quarter and month are only linked
# the rate is low (< 0.1), it means that quarter is almost derived or coupled with month

errors = month.coupling(quarter)                                              # coupling forcing (return duplicate record)
print('quarter relationship to month : ', quarter.couplinginfos(month), '\n') # quarter is now derived from month

print('quarter codec : ', quarter.codec)
print('month codec : ', month.codec)                      # a codec extension is made to compensate for the error
print('errors : ', month[errors], quarter[errors])        # 'apr' is the error with two quarters 'q2', 'q3'

quarter relationship to month :  {'dist': 7, 'dmin': 6, 'dmax': 24, 'diff': 2, 'dran': 18, 'rateder': 0.05555555555555555, 'distomin': 1, 'distomax': 17, 'distance': 3, 'ratecpl': 0.15, 'typecoupl': 'linked'} 

quarter relationship to month :  {'dist': 7, 'dmin': 7, 'dmax': 28, 'diff': 3, 'dran': 21, 'rateder': 0.0, 'distomin': 0, 'distomax': 21, 'distance': 3, 'ratecpl': 0.125, 'typecoupl': 'derived'} 

quarter codec :  ["q1", "q3", "q4", "q2"]
month codec :  ['jan', 'feb', 'apr', 'sep', 'dec', 'apr', 'may']
errors :  ['apr', 'apr'] ["q3", "q2"]


## coupling forcing - values adjustement
We can impose a relationship with values extension or reduction.
Deviations can be identified simply


In [4]:
# In the previous example with years and semesters, if some data is missing, years and semesters cannot be crossed

year     = Sfield([2020, 2021, 2021, 2022], reindex=True)
semester = Sfield(['s1', 's1', 's2','s1'], reindex=True)
print('semester relationship to year : ', semester.couplinginfos(year), '\n')     # year and semester are linked

added = Sfield.full([year, semester])
print('semester relationship to year : ', semester.couplinginfos(year), '\n')     # year and semester are now crossed
print('years extension    : ', year)
print('semester extension : ', semester)
print('added values : ', year[added], semester[added])

semester relationship to year :  {'dist': 4, 'dmin': 3, 'dmax': 6, 'diff': 1, 'dran': 3, 'rateder': 0.3333333333333333, 'distomin': 1, 'distomax': 2, 'distance': 2, 'ratecpl': 0.5, 'typecoupl': 'linked'} 

semester relationship to year :  {'dist': 6, 'dmin': 3, 'dmax': 6, 'diff': 1, 'dran': 3, 'rateder': 1.0, 'distomin': 3, 'distomax': 0, 'distance': 4, 'ratecpl': 1.0, 'typecoupl': 'crossed'} 

years extension    :  {'$default': [2020, 2021, 2021, 2022, 2020, 2022]}
semester extension :  {'$default': ['s1', 's1', 's2', 's1', 's2', 's2']}
added values :  [2020, 2022] ['s2', 's2']
