In [1]:
import os
import pandas as pd
import requests

# 1. Load data

In [2]:
with open('csv/mw_import.csv', encoding='utf-16') as main_dataset, open('csv/diskmags_csdb.csv', encoding='utf-8') as csdb_dataset, open('csv/diskmags_demozoo.csv', encoding='utf-8') as demozoo_dataset, open('csv/diskmags_pouet.csv', encoding='utf-8') as pouet_dataset:
    df_main = pd.read_csv(main_dataset)
    df_csdb = pd.read_csv(csdb_dataset)
    df_demozoo = pd.read_csv(demozoo_dataset)
    df_pouet = pd.read_csv(pouet_dataset)

## 1.1 Create subset of German-language magazines for the Commodore 64

In [3]:
df_main_german = df_main[df_main['Magazine[Language]'].apply(lambda x: isinstance(x,str) and 'German' in x.split('; '))]
df_main_german = df_main_german[df_main_german['Magazine[Systems]'].apply(lambda x: isinstance(x,str) and 'Commodore 64' in x.split('; '))]
df_main_german.to_csv('csv/mw_import_c64_german.csv', index=False)
df_main_german['systems_lower'] = df_main_german['Magazine[Systems]'].apply(lambda x: x.lower() if isinstance(x, str) else None)

## 1.2 Prepare other datsets

In [4]:
df_csdb['system_lower'] = df_csdb['platform'].apply(lambda x: x.lower() if isinstance(x, str) else None)
df_demozoo['system_lower'] = df_demozoo['platform'].apply(lambda x: x.lower() if isinstance(x, str) else None)
df_pouet['system_lower'] = df_pouet['platform'].apply(lambda x: x.lower() if isinstance(x, str) else None)

# 2. Get the list of all issues

In [5]:
german_diskmags_list = []

# Iterate over all rows of the main dataset
for index, row in df_main_german.iterrows():
    if not isinstance(row['systems_lower'], str): continue
    # Get values
    title = row['Title'].split('; ')
    system = row['systems_lower'].split('; ')
    source = row['Magazine[Source]'].split('; ')
    
    # CSDB
    if 'CSDB' in source:
        subset_csdb = df_csdb[df_csdb['title'].apply(lambda x: any(item in x.split(', ') for item in title))]
        subset_csdb_cleaned = subset_csdb[['title', 'issue', 'download_links', 'platform']]
        german_diskmags_list.append(subset_csdb_cleaned)
        
    # Demozoo
    if 'Demozoo' in source:
        subset_demozoo = df_demozoo[df_demozoo['title'].apply(lambda x: any(item in x.split(', ') for item in title))]
        subset_demozoo_cleaned = subset_demozoo[subset_demozoo['system_lower'].apply(lambda x: any(item in x for item in system))]
        if not subset_demozoo_cleaned.empty:
            subset_demozoo_cleaned = subset_demozoo_cleaned[['title', 'issue', 'download_links', 'platform']]
            german_diskmags_list.append(subset_demozoo_cleaned)
            
    # Pouet
    if 'Pouet' in source:
        subset_pouet = df_pouet[df_pouet['title'].apply(lambda x: any(item in x.split(', ') for item in title))]
        subset_pouet_cleaned = subset_pouet[subset_pouet['system_lower'].apply(lambda x: any(item in x for item in system))]
        if not subset_pouet_cleaned.empty:
            subset_pouet_cleaned = subset_pouet_cleaned[['title', 'issue', 'download_links', 'platform']]
            german_diskmags_list.append(subset_pouet_cleaned)
            
# Concat subsets to the dataframe
subset_issues_german = pd.concat(german_diskmags_list, ignore_index=True)
subset_issues_german = subset_issues_german[~subset_issues_german['download_links'].duplicated(keep='first')] # Remove duplicates
subset_issues_german = subset_issues_german[~subset_issues_german['issue'].duplicated(keep='first')] # Remove duplicates
subset_issues_german = subset_issues_german.sort_values(by='issue', key=lambda x: x.str.lower())

In [6]:
subset_issues_german.to_csv('csv/issues_c64_german.csv', index=False)