In [None]:
import os
import pandas as pd

# Read ERIH-PLUS approved journals dataset
erih_plus_df = pd.read_csv('ERIHPLUSapprovedJournals.csv')

# Get the list of CSV files in the OpenCitations Meta data dump directory
csv_directory = 'path/to/csv/files'
csv_files = [f for f in os.listdir(csv_directory) if f.endswith('.csv')]

# Initialize an empty dataframe for the mapping of OpenCitations Meta and ERIH-PLUS venue data
mapping_df = pd.DataFrame(columns=['oc_omid', 'oc_issn', 'ep_id', 'ep_issn'])

# Iterate over the CSV files
for csv_file in csv_files:
    # Read OpenCitations Meta data dump in chunks
    chunksize = 10 ** 5
    meta_data_chunks = pd.read_csv(os.path.join(csv_directory, csv_file), chunksize=chunksize, iterator=True)

    # Iterate over the chunks of OpenCitations Meta data dump
    for chunk in meta_data_chunks:
        # Filter the chunk to include only venue information
        venue_chunk = chunk[chunk['type'] == 'venue']

        # Merge the ERIH-PLUS and OpenCitations Meta data on ISSN
        merged_chunk = erih_plus_df.merge(venue_chunk, left_on=['print issn', 'online issn'], right_on=['issn'], how='inner')

        # Keep only the relevant columns for the mapping dataframe
        merged_chunk = merged_chunk[['oc_omid', 'oc_issn', 'journal id', 'issn']].rename(columns={'journal id': 'ep_id', 'issn': 'ep_issn'})

        # Append the merged chunk to the mapping dataframe
        mapping_df = mapping_df.append(merged_chunk, ignore_index=True)

# Save the mapping dataframe as a CSV file
mapping_df.to_csv('OpenCitations_Meta_ERIH_PLUS_mapping.csv', index=False)


In [None]:
import pandas as pd

# Read the DOAJ dump CSV file
doaj_df = pd.read_csv('doaj_dump.csv')

# Create a dictionary of Open Access ISSNs
oa_issn_dict = {}
for _, row in doaj_df.iterrows():
    for issn_type in ['Print ISSN', 'Online ISSN']:
        issn = row[issn_type]
        if not pd.isna(issn):
            oa_issn_dict[issn] = True

# Read the mapping dataframe
mapping_df = pd.read_csv('OpenCitations_Meta_ERIH_PLUS_mapping.csv')

# Merge Open Access information with the main dataframe
mapping_df['Open Access'] = mapping_df['ep_issn'].map(oa_issn_dict)

# Fill missing values in the 'Open Access' column with 'Unknown'
mapping_df['Open Access'] = mapping_df['Open Access'].fillna('Unknown')

# Save the updated dataframe as a CSV file
mapping_df.to_csv('OpenCitations_Meta_ERIH_PLUS_OA_mapping.csv', index=False)
