# Exhibition - enriching information

In [None]:
import pandas as pd
import duckdb

In [None]:
df_exhibition = pd.read_csv('/Users/carboni/Documents/UNIGE/basart_downloads/2023/data/processed/exhibition_done/Artlas Exhibition.csv', dtype ='str')
df_catalog = pd.read_csv('/Users/carboni/Documents/UNIGE/basart_downloads/2023/data/processed/exhibition_extra/exhibitions_ids/Exhibition Catalog.csv', dtype ='str')
df_exhibition_section = pd.read_csv('/Users/carboni/Documents/UNIGE/basart_downloads/2023/data/processed/exhibition_extra/exhibitions_ids/Exhibition Section.csv', dtype ='str')
df_has_exhibited_in = pd.read_csv('/Users/carboni/Documents/UNIGE/basart_downloads/2023/data/processed/exhibition_extra/exhibitions_ids/Exhibited.csv', dtype ='str')
df_personage = pd.read_csv('/Users/carboni/Documents/UNIGE/basart_downloads/2023/data/processed/personage/Exhibitor.csv', low_memory=False, dtype ='str')
df_artwork = pd.read_csv('/Users/carboni/Documents/UNIGE/basart_downloads/2023/data/processed/exhibition_extra/exhibitions_ids/Exhibited Work.csv', low_memory=False, dtype ='str')

In [None]:
df_catalog.rename(columns={'id': 'id_catalog'}, inplace=True)
df_exhibition.rename(columns={'id': 'id_exhibition'}, inplace=True)
df_exhibition_section.rename(columns={'id': 'id_section'}, inplace=True)
df_has_exhibited_in.rename(columns={'id_exhibition_section': 'id_section', 'id': 'id_has_exhibited_in'}, inplace=True)
df_artwork.rename(columns={'id': 'id_work'}, inplace=True)

In [None]:
df_artwork.head(3)

In [None]:
merged_catalog = pd.merge(df_exhibition[['id_exhibition']], df_catalog[['id_catalog', 'id_exhibition']], left_on='id_exhibition', right_on='id_exhibition')

In [None]:
merged_catalog.head(3)

In [None]:
merged_section = pd.merge(merged_catalog[['id_catalog', 'id_exhibition']], df_exhibition_section[['id_section', 'id_catalog']], left_on='id_catalog', right_on='id_catalog')

In [None]:
merged_section.head(3)

In [None]:
merged_exhibited = pd.merge(merged_section[['id_catalog', 'id_exhibition', 'id_section']], df_has_exhibited_in[['id_personage', 'id_section', 'id_has_exhibited_in']], left_on='id_section', right_on='id_section')

In [None]:
merged_exhibited.head(3)

In [None]:
merged_work = pd.merge(merged_exhibited[['id_catalog', 'id_exhibition', 'id_section','id_personage', 'id_has_exhibited_in']], df_artwork[['id_work', 'id_has_exhibited_in']], left_on='id_has_exhibited_in', right_on='id_has_exhibited_in')

In [None]:
merged_work.drop('id_has_exhibited_in', axis=1, inplace=True)

In [None]:
merged_work.head(3)

# Saving the final results

In [None]:
columns = merged_work.columns

# Iterate through the columns
for col in columns:
    # Check if the column name contains a space
    if ' ' in col:
        # Replace spaces with underscores
        merged_work.rename(columns={col: col.replace(' ', '_')}, inplace=True)

In [None]:
merged_work.to_xml('/Users/carboni/Documents/UNIGE/basart_downloads/2023/data/processed/exhibition_extra/exhibition_enriched.xml', root_name="data", pretty_print=True)

# Data Analysis

Checking data consistency

## How many sections and catalogues per exhibition

In [None]:
final_merged_with_exhibition = pd.merge(merged_work, df_exhibition, on='id_exhibition', how='left')

In [None]:
duckdb.sql("CREATE TABLE exhibition AS SELECT * FROM final_merged_with_exhibition")

In [None]:
result = duckdb.execute("SELECT * FROM exhibition").fetchdf()
result.head(5)

In [None]:
query = """
SELECT id_exhibition, traveling, COUNT(DISTINCT id_section) as section_count, COUNT(DISTINCT id_catalog) as catalog_count_count
FROM exhibition
GROUP BY id_exhibition, traveling
HAVING COUNT(DISTINCT id_section) > 1;
"""

In [None]:
result = duckdb.query(query).fetchdf()
print(result)

### Multiple catalogues' id example

In [None]:
catalogues = duckdb.execute("SELECT DISTINCT id_section, id_catalog FROM exhibition WHERE id_exhibition = 18492;").fetchdf()

In [None]:
print(catalogues)

### Multiple section example

In [None]:
section = duckdb.execute("SELECT DISTINCT id_section, id_catalog FROM exhibition WHERE id_exhibition = 4283;").fetchdf()

In [None]:
print(section)