# Flowmap Blue OD Matrix

In [45]:
import pandas as pd
import os

In [26]:
df = pd.read_csv('/Users/carboni/Downloads/Avantguarde.csv')

### Clusters and other keywords

In [28]:
df.head()

Unnamed: 0,numero_cluster,manifest_url,canvas_number,image_url,City,Country,Title,wkt,Date,Journal Type,notice,group_name,group_tags
0,849,https://digi.ub.uni-heidelberg.de/diglit/iiif/...,101.0,https://digi.ub.uni-heidelberg.de/iiif/2/dkd19...,Stuttgart,Germany,Deutsche Kunst und Dekoration,POINT(9.1775 48.776111111111),1904-01-01,Decorative Art,,"Metzner (Franz), Design for a Mausoleum at Ber...",
1,849,https://digi.ub.uni-heidelberg.de/diglit/iiif/...,102.0,https://digi.ub.uni-heidelberg.de/iiif/2/kunst...,Leipzig,Germany,Kunstgewerbeblatt,POINT(12.375 51.34),1906-01-01,Decorative Art,,"Metzner (Franz), Design for a Mausoleum at Ber...",
2,849,https://digi.ub.uni-heidelberg.de/diglit/iiif/...,307.0,https://digi.ub.uni-heidelberg.de/iiif/2/inter...,New York City,United States of America,International studio,POINT(-74.006015 40.712728),1907-01-01,Modern Art Journal,,"Metzner (Franz), Design for a Mausoleum at Ber...",
3,849,https://digi.ub.uni-heidelberg.de/diglit/iiif/...,315.0,https://digi.ub.uni-heidelberg.de/iiif/2/studi...,London,United Kingdom,Studio : international art,POINT(-0.1275 51.507222222222),1907-01-01,Modern Art Journal,,"Metzner (Franz), Design for a Mausoleum at Ber...",
4,537,https://digi.ub.uni-heidelberg.de/diglit/iiif/...,678.0,https://digi.ub.uni-heidelberg.de/iiif/2/cicer...,Leipzig,Germany,Der Cicerone,POINT(12.375 51.34),1929-01-01,Modern Art Journal,,"De Chirico, Einsame Strasse [coll. Paul Guilla...",


## Create the index for cities

In [29]:
df[['lon', 'lat']] = df['wkt'].str.extract(r'POINT\(([^ ]+) ([^ ]+)\)')

In [30]:
city_coords = df.groupby('City').agg({'lat': 'first', 'lon': 'first'}).reset_index()

In [31]:
city_coords.rename({'City': 'name'}, axis=1, inplace=True)

In [32]:
city_coords.index = city_coords.index + 1
city_coords.index.name = 'id'

In [33]:
print(city_coords)

                name                 lat                 lon
id                                                          
1        Baden-Baden     48.761944444444     8.2408333333333
2          Barcelona             41.3825     2.1769444444444
3             Berlin     52.516666666667     13.383333333333
4            Chicago     41.881944444444    -87.627777777778
5   City of Brussels  50.846666666666664   4.351666666666667
6         Copenhagen     55.676111111111     12.568888888889
7              Delft     52.011944444444     4.3594444444444
8          Frankfurt     50.113611111111     8.6797222222222
9            Hanover     52.374444444444     9.7386111111111
10           Leipzig               51.34              12.375
11            Lleida            41.61674             0.62218
12            London     51.507222222222             -0.1275
13            Munich             48.1375              11.575
14     New York City           40.712728          -74.006015
15             Paris    

In [34]:
city_coords.to_csv('od_city_index.csv', index=True)

## Index to OD Matrix

In [35]:
df['Year'] = pd.to_datetime(df['Date']).dt.year

In [36]:
city_id_map = city_coords.reset_index().set_index('name')['id'].to_dict()

In [37]:
df['origin'] = df['City'].map(city_id_map).astype(int)
df['dest'] = df['City'].shift(-1).map(city_id_map).fillna(method='ffill').astype(int)
df.iloc[-1, df.columns.get_loc('dest')] = df.iloc[-1]['origin']

### Overall OD Matrix

In [38]:
overall_od_matrix = df.groupby(['origin', 'dest', 'Year']).size().reset_index(name='count')

In [39]:
overall_od_matrix['time'] = overall_od_matrix['Year'].astype(str) + '-01-01'
overall_od_matrix.drop('Year', axis=1, inplace=True)

In [40]:
print(overall_od_matrix)

     origin  dest  count        time
0         1    18      1  1954-01-01
1         2     1      1  1904-01-01
2         2    12      1  1914-01-01
3         2    19      1  1921-01-01
4         3     3      1  1923-01-01
..      ...   ...    ...         ...
129      18    18      1  1927-01-01
130      19     3      1  1985-01-01
131      19    15      1  1924-01-01
132      19    15      2  1965-01-01
133      19    18      1  1961-01-01

[134 rows x 4 columns]


In [41]:
overall_od_matrix.to_csv('overall_od_matrix.csv', index=False)

## Keyword to OD Matrix

In [42]:
unique_keywords = df['group_name'].unique()

In [None]:
filename = keyword.replace(' ', '_').replace(',', '').replace(':', '').replace('/', '_or_') + '.csv'

In [47]:
group_name_dir = 'group_name'
if not os.path.exists(group_name_dir):
    os.makedirs(group_name_dir)

In [48]:
for keyword in unique_keywords:
    # Filter DataFrame for the current keyword
    filtered_df = df[df['group_name'] == keyword]

    # Create OD matrix for the filtered DataFrame
    keyword_od_matrix = filtered_df.groupby(['origin', 'dest', 'Year']).size().reset_index(name='count')
    keyword_od_matrix['time'] = keyword_od_matrix['Year'].astype(str) + '-01-01'
    keyword_od_matrix.drop('Year', axis=1, inplace=True)

    # Sanitize the keyword to use as a filename
    filename = keyword.replace(' ', '_').replace(',', '').replace(':', '').replace('/', '_or_') + '.csv'
    
    # Full path for the file
    full_path = os.path.join(group_name_dir, filename)

    # Save keyword OD matrix to CSV in the group_name directory
    keyword_od_matrix.to_csv(full_path, index=False)
    print(f'OD matrix for "{keyword}" saved as {full_path}')

OD matrix for "Metzner (Franz), Design for a Mausoleum at Berlin, date?" saved as group_name/Metzner_(Franz)_Design_for_a_Mausoleum_at_Berlin_date?.csv
OD matrix for "De Chirico, Einsame Strasse [coll. Paul Guillaume], 1914" saved as group_name/De_Chirico_Einsame_Strasse_[coll._Paul_Guillaume]_1914.csv
OD matrix for "Diverse_Geom_abstract" saved as group_name/Diverse_Geom_abstract.csv
OD matrix for "Picasso, Le Buste, 1925" saved as group_name/Picasso_Le_Buste_1925.csv
OD matrix for "Picasso, La Dame au Fauteuil, 1914" saved as group_name/Picasso_La_Dame_au_Fauteuil_1914.csv
OD matrix for "Van Gogh, Le Pont de Trinquetaille, 1888" saved as group_name/Van_Gogh_Le_Pont_de_Trinquetaille_1888.csv
OD matrix for "Cézanne, Les Joueurs de cartes, vers 1890-95, huile sur toile, musée d'Orsay" saved as group_name/Cézanne_Les_Joueurs_de_cartes_vers_1890-95_huile_sur_toile_musée_d'Orsay.csv
OD matrix for "Picasso_Blind Beggar with a Boy_1903" saved as group_name/Picasso_Blind_Beggar_with_a_Boy_190