# DIALITE: Discover, Align and Integrate open Data Tables

## Demo 2: DIALITE Extendibility

In [2]:
# Import the necessary libraries
import pandas as pd
import dialite_server as dialite
import json
import time
import glob

In [8]:
# Generate a new query table using GPT-3
query_table = dialite.randomly_generate_query_table(
    'a table about covid with 5 columns and 5 rows'
    )
query_table.head(5)

Unnamed: 0,Country,Cases,Deaths,Recovered,Active
0,USA,5742812,178701,2633567,2930544
1,Brazil,3713876,116476,2788841,808559
2,India,3444061,61529,2643788,738744
3,Russia,982822,16841,745930,219051
4,Mexico,704016,73814,442309,187893


In [9]:
# Use newly added outer join operator for the integration.
# We use a user-provided integration set.
integration_set = set(glob.glob("data/integration-set/stadiums_0/*"))
integrated_table_outer_join = dialite.integrate_tables(integration_set, algorithm = "outer_join")

Successfully integrated 4 tables using outer join algorithm.
Integrated table:
     index             player position     team                  stadium  \
0        0      aaron rodgers       qb  packers                      NaN   
1        1       alvin kamara       rb   saints  mercedes-benz superdome   
2        2     cameron jordan       de   saints  mercedes-benz superdome   
3        3  marshon lattimore       cb   saints  mercedes-benz superdome   
4        4     tyrann mathieu        s   saints  mercedes-benz superdome   
..     ...                ...      ...      ...                      ...   
109    109       mark andrews      NaN      NaN             m and t bank   
110    110                NaN      NaN   ravens             m and t bank   
111    111                NaN      NaN    colts            lucas stadium   
112    112                NaN      NaN  cowboys                     at&t   
113    113                NaN      NaN  packers            lambeau field   

        

In [10]:
#for comparison, we also integrate tables using ALITE.
integrated_table_alite = dialite.integrate_tables(integration_set, algorithm = "ALITE")

Successfully integrated 4 tables using ALITE.
Integrated table:
     index             player position     team                  stadium  \
0        0      lamar jackson       qb   ravens         m&t bank stadium   
1        1        darius slay       cb   eagles  lincoln financial field   
2        2       roquan smith       lb    bears            soldier field   
3        3        dalvin cook       rb  vikings        u.s. bank stadium   
4        4       wyatt teller        g   browns      firstenergy stadium   
..     ...                ...      ...      ...                      ...   
116    116       jamarr chase       wr  bengals               paul brown   
117    117     kenny moore ii       cb    colts            lucas stadium   
118    118    jonathan taylor       rb    colts            lucas stadium   
119    119  shaquille leonard       lb    colts            lucas stadium   
120    120     quenton nelson        g    colts            lucas stadium   

                       

In [11]:
# Apply entity-resolution as a downstreaming task

dialite.analyze_er(integrated_table_outer_join)

Precision: 0.339
Recall: 0.397
F-score: 0.366


In [12]:
dialite.analyze_er(integrated_table_alite)


Precision: 0.795
Recall: 0.838
F-score: 0.816
