In [1]:
# Name changes 
# This notebook generates the input for Table 2 in the paper 
#
# 5 July 2024
# marieke.van.erp@dh.huc.knaw.nl

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import matplotlib.patches as mpatches

In [22]:
# Read in the data 
company_data = pd.read_csv('data/company_names_dates.tsv', sep='\t')
# Do some clean up to remove duplicates 
pruned = company_data[['company.value', 'companyLabel.value', 'name.value', 'starttime.value']].drop_duplicates()
pruned['occurrence_companies'] = pruned.groupby('company.value')['company.value'].transform('size')

In [25]:
# General stats of the dataset 
# Unique number of values 
pruned.nunique()

company.value           1643
companyLabel.value      1638
name.value              3555
starttime.value         1665
occurrence_companies      10
dtype: int64

In [26]:
# Get the highest number of name changes 
pruned['occurrence_companies'].max()

10

In [28]:
# Check out the companies with the most name changes 
pruned.loc[pruned['occurrence_companies'] > 8]

Unnamed: 0,company.value,companyLabel.value,name.value,starttime.value,occurrence_companies
193,http://www.wikidata.org/entity/Q804868,Baldwin Locomotive Works,M. W. Baldwin,1825-01-01T00:00:00Z,9
194,http://www.wikidata.org/entity/Q804868,Baldwin Locomotive Works,"Baldwin, Vale & Hufty",1839-01-01T00:00:00Z,9
195,http://www.wikidata.org/entity/Q804868,Baldwin Locomotive Works,Baldwin & Whitney,1842-01-01T00:00:00Z,9
196,http://www.wikidata.org/entity/Q804868,Baldwin Locomotive Works,M. W. Baldwin,1846-01-01T00:00:00Z,9
197,http://www.wikidata.org/entity/Q804868,Baldwin Locomotive Works,M. W. Baldwin & Company,1854-01-01T00:00:00Z,9
539,http://www.wikidata.org/entity/Q1722322,Kaliwerk Krügershall Teutschenthal,VEB Kali- und Steinsalzbetrieb Deutschland,1952-01-01T00:00:00Z,10
540,http://www.wikidata.org/entity/Q1722322,Kaliwerk Krügershall Teutschenthal,Werk Krügershall,1945-01-01T00:00:00Z,10
541,http://www.wikidata.org/entity/Q1722322,Kaliwerk Krügershall Teutschenthal,"VEB Kombinat Kali, VEB Kali- und Steinsalzbetr...",1970-01-01T00:00:00Z,10
542,http://www.wikidata.org/entity/Q1722322,Kaliwerk Krügershall Teutschenthal,"Burbach-Kaliwerke A.G., Magdeburg, Werk Krüger...",1929-01-01T00:00:00Z,10
543,http://www.wikidata.org/entity/Q1722322,Kaliwerk Krügershall Teutschenthal,Kaliwerk Teutschenthal,1948-07-01T00:00:00Z,10


In [33]:
# Zoom in on an interesting example 
pruned.loc[pruned['company.value'] == 'http://www.wikidata.org/entity/Q154037']

Unnamed: 0,company.value,companyLabel.value,name.value,starttime.value,occurrence_companies
75,http://www.wikidata.org/entity/Q154037,TotalEnergies,TotalFinaElf SA,2000-01-01T00:00:00Z,8
76,http://www.wikidata.org/entity/Q154037,TotalEnergies,Total SE,2020-01-01T00:00:00Z,8
77,http://www.wikidata.org/entity/Q154037,TotalEnergies,Total SA,2003-01-01T00:00:00Z,8
90,http://www.wikidata.org/entity/Q154037,TotalEnergies,TotalFina SA,1999-01-01T00:00:00Z,8
91,http://www.wikidata.org/entity/Q154037,TotalEnergies,Total SA,1991-01-01T00:00:00Z,8
92,http://www.wikidata.org/entity/Q154037,TotalEnergies,Total − Compagnie Française des Pétroles,1985-01-01T00:00:00Z,8
93,http://www.wikidata.org/entity/Q154037,TotalEnergies,Compagnie Française des Pétroles,1924-01-01T00:00:00Z,8
94,http://www.wikidata.org/entity/Q154037,TotalEnergies,TotalEnergies SE,2021-01-01T00:00:00Z,8


In [30]:
pruned['occurrence_companies'].mean()

2.9811582155721807

In [31]:
pruned['occurrence_companies'].median()

3.0

In [32]:
pruned['occurrence_companies'].std()

1.7189370747744275