In [1]:
import pandas as pd

# Load data

In [2]:
df_magazines = pd.read_csv('../data/target_data/east_middle_europe_magazines.csv', encoding='utf-16')
df_issues = pd.read_csv('../data/target_data/east_middle_europe_magazines_issues.csv', encoding='utf-16')

# Sort columns alphabetically

In [3]:
df_magazines = df_magazines.sort_values(by=['magazine_title'], key=lambda x: x.str.lower())

In [4]:
df_issues = df_issues.sort_values(by=['issue_title'], key=lambda x: x.str.lower())

In [5]:
df_magazines.to_csv('../data/target_data/east_middle_europe_magazines.csv', index=False, encoding='utf-16')
df_issues.to_csv('../data/target_data/east_middle_europe_magazines_issues.csv', index=False, encoding='utf-16')

# Merge cells in order to upload data onto the MediaWiki Catalogue of Diskmags

In [6]:
df_issues_sorted = df_issues.sort_values(by=['source'], key=lambda x: x.str.lower())

In [7]:
def join_non_missing(values):
    target = []
    for value in values:
        if pd.notna(value):
            value = value.split('; ')
            for v in value:
                if v not in target:
                    target.append(v)
    return '; '.join(target)

In [8]:
df_issues_merged = df_issues_sorted.groupby(['issue_title', 'magazine_title']).agg(join_non_missing).reset_index()

In [9]:
df_issues_merged

Unnamed: 0,issue_title,magazine_title,orig_issue_title,origin,language,release_date,system_name_standardized,system_name,editor,source,link,disclaimer
0,Alamakota No.0,Alamakota,Alamakota 0; Alamakota - issue #0,Poland,Polish,1991-05-10; 1991-05,Amiga,Amiga OCS/ECS,Luzers; Luzers Team,Demozoo; Pouet,https://demozoo.org//productions/249444/; http...,
1,Alamakota No.1,Alamakota,Alamakota 1; alamakota - issue #1,Poland,Polish,1991-09; 1991-08,Amiga,Amiga OCS/ECS,Luzers; Luzers Team,Demozoo; Pouet,https://demozoo.org//productions/249445/; http...,
2,Alamakota No.2,Alamakota,Alamakota 2; alamakota - issue #2,Poland,Polish,1991-11; 1991-10,Amiga,Amiga OCS/ECS,Luzers; Luzers Team,Demozoo; Pouet,https://demozoo.org//productions/249446/; http...,
3,Alamakota No.3,Alamakota,Alamakota 3; Alamakota - issue #3,Poland,Polish,1992,Amiga,Amiga OCS/ECS,Luzers; Luzers Team,Demozoo; Pouet,https://demozoo.org//productions/249447/; http...,
4,Ale Jaja (issue),Ale Jaja,Ale Jaja,Poland,Polish,1994,Amiga,Amiga OCS/ECS,Maksiu,Demozoo,https://demozoo.org//productions/220181/,
...,...,...,...,...,...,...,...,...,...,...,...,...
586,Zig Zag No.8,Zig Zag,Zig Zag 8; Zig Zag #08,Poland,Polish,1994-04-26; 1994-04,Amiga,Amiga OCS/ECS,Union,Demozoo; Pouet,https://demozoo.org//productions/139133/; http...,
587,Zosia No.1,Zosia,Zosia 1; zosia #1,Poland,English,1999-12,Amiga,Amiga AGA,Madwizards; MadWizards,Demozoo; Pouet,https://demozoo.org//productions/9059/; https:...,
588,Zosia No.2,Zosia,Zosia 2; zosia #2,Poland,English,2000-04-25; 2000-04,Amiga,Amiga OCS/ECS; Amiga AGA,Madwizards; MadWizards,Demozoo; Pouet,https://demozoo.org//productions/248743/; http...,
589,Zosia No.3,Zosia,Zosia 3; zosia #3,Poland,English,2000,Amiga,Amiga AGA,Madwizards; MadWizards,Demozoo; Pouet,https://demozoo.org//productions/9060/; https:...,


In [10]:
df_issues_merged = df_issues_merged.rename(columns={'issue_title': 'Title', 
                                                    'magazine_title': 'Issue[Magazine]',
                                                    'orig_issue_title': 'Issue[Spellings]',
                                                    'origin': 'Issue[Origin]',
                                                    'language': 'Issue[Language]',
                                                    'release_date': 'Issue[Release Date]',
                                                    'system_name_standardized': 'Issue[OriginalSystemName]',
                                                    'system_name': 'Issue[Systems]',
                                                    'editor': 'Issue[Groups]',
                                                    'source': 'Issue[Source]',
                                                    'link': 'Issue[Links]',
                                                    'disclaimer': 'Issue[Disclaimer]'})

In [11]:
df_issues_merged.to_csv('../data/target_data/mw_import_east_middle_europe_magazines_issues.csv', encoding='utf-16', index=False)

# Prepare the datasets in order to publish them in other resources

## Replace country names and languages with their codes regarding the ZDB-Format

In [12]:
country_codes = {'Poland': 'PL',
                 'Slovenia': 'SI',
                 'Czech Republic': 'CZ',
                 'Hungary': 'HU',
                 'Sweden': 'SE'}
language_codes = {'Polish': 'pol',
                  'Hungarian': 'hun',
                  'Slovenian': 'slv',
                  'Czech': 'cze',
                  'English': 'eng'}

In [13]:
for country, code in country_codes.items():
    df_magazines['origin'] = df_magazines['origin'].str.replace(country, code)
    df_issues['origin'] = df_issues['origin'].str.replace(country, code)
for language, code in language_codes.items():
    df_magazines['language'] = df_magazines['language'].str.replace(language, code)
    df_issues['language'] = df_issues['language'].str.replace(language, code)

## Replace missing values with dots

In [14]:
df_issues = df_issues.replace('', '.').fillna('.')
df_magazines = df_magazines.replace('', '.').fillna('.')

## Insert links to the diskmags catalogue

In [15]:
def create_link(value):
    if value != '.':
        return f"https://diskmags.de/index.php?title={value.replace(' ', '_')}"
    else:
        return '.'

In [16]:
df_magazines['diskmags_catalogue_link'] = df_magazines['magazine_title'].apply(create_link)

In [17]:
df_issues['diskmags_catalogue_link'] = df_issues['issue_title'].apply(create_link)

In [18]:
df_magazines = df_magazines.sort_values(by='magazine_title', key=lambda x: x.str.lower())
df_issues = df_issues.sort_values(by='issue_title', key=lambda x: x.str.lower())

## Reorder the columns

In [19]:
new_order_issues = ['issue_title', 'orig_issue_title', 'magazine_title', 'origin',
       'language', 'release_date', 'system_name_standardized', 'system_name',
       'editor', 'source', 'link', 'diskmags_catalogue_link', 'disclaimer']
df_issues = df_issues[new_order_issues]
df_issues = df_issues.rename(columns={'orig_issue_title': 'issue_title_original', 'link': 'source_link'})

In [20]:
new_order_magazines = ['magazine_title', 'aka', 'language', 'origin', 'first_publication_date',
       'last_publication_date', 'system', 'source', 'editor', 'region',
       'diskmags_catalogue_link']
df_magazines = df_magazines[new_order_magazines]
df_magazines = df_magazines.drop('region', axis='columns')

In [21]:
df_magazines.head()

Unnamed: 0,magazine_title,aka,language,origin,first_publication_date,last_publication_date,system,source,editor,diskmags_catalogue_link
0,Alamakota,.,pol,PL,1991-05-10,1992,AMIGA OCS/ECS,Demozoo; Pouet,Luzers; Luzers Team,https://diskmags.de/index.php?title=Alamakota
1,Ale Jaja,.,pol,PL,1994,1994,AMIGA OCS/ECS,Demozoo,Maksiu,https://diskmags.de/index.php?title=Ale_Jaja
2,Always (2),.,eng; pol,PL,1993,1996,Commodore 64,CSDB; Internet Archive; Pouet,Elysium; Sunga,https://diskmags.de/index.php?title=Always_(2)
3,Amber,.,eng,PL,1998,1999-12,MS-DOS,Demozoo; Internet Archive; Pouet; Volko Encycl...,Amber Diskmag Editing Team,https://diskmags.de/index.php?title=Amber
4,AmigaCS,.,cze,CZ,1998-03-21,1998-12-20,AMIGA AGA,Demozoo; Pouet,Abort,https://diskmags.de/index.php?title=AmigaCS


In [22]:
df_issues.head()

Unnamed: 0,issue_title,issue_title_original,magazine_title,origin,language,release_date,system_name_standardized,system_name,editor,source,source_link,diskmags_catalogue_link,disclaimer
1124,.,.,X-Magazine,CZ,cze,.,ZX Spectrum,ZX Spectrum,.,ZXpress,https://zxpress.ru/issue.php?id=311,.,.
1123,.,.,ZX Land,PL,pol,.,ZX Spectrum,ZX Spectrum,.,ZXpress,https://zxpress.ru/issue.php?id=303,.,.
0,Alamakota No.0,Alamakota 0,Alamakota,PL,pol,1991-05-10,Amiga,Amiga OCS/ECS,Luzers,Demozoo,https://demozoo.org//productions/249444/,https://diskmags.de/index.php?title=Alamakota_...,.
1,Alamakota No.0,Alamakota - issue #0,Alamakota,PL,pol,1991-05,Amiga,Amiga OCS/ECS,Luzers Team,Pouet,https://www.pouet.net/prod.php?which=60514,https://diskmags.de/index.php?title=Alamakota_...,.
3,Alamakota No.1,Alamakota 1,Alamakota,PL,pol,1991-09,Amiga,Amiga OCS/ECS,Luzers,Demozoo,https://demozoo.org//productions/249445/,https://diskmags.de/index.php?title=Alamakota_...,.


In [24]:
df_magazines.to_csv('../data/target_data/east_middle_europe_magazines.csv', encoding='utf-16', index=False)
df_issues.to_csv('../data/target_data/east_middle_europe_magazines_issues.csv', encoding='utf-16', index=False)