In [1]:
import pandas as pd
import re

In [2]:
def fix_country(country):
    country = re.sub(r"(\(.*?\)|\[.*?\])", " ", country)
    country = re.sub(r"Flag of .*?\.\w*", " ", country)
    countries = country.split()
    result = []
    current = []
    for country in countries:
        current.append(country)
        if not country in ["United", "Soviet", "Ottoman", "Allied-occupied", "South"]:
            result.append(" ".join(current))
            current = []
    if "Russia" in result or "Soviet Union" in result or "Ukraine" in result:
        return "USSR/Russia"
    return result[0]

In [3]:
def read_table(name):
    table = pd.read_csv(name,
                        sep="\t",
                        names=["name", "country", "years", "count1", "count2"],
                        index_col=False)
    table.country = table.country.map(fix_country)
    return table

In [4]:
def sorted_cases(table):
    return table.groupby("country").count().name.sort_values(ascending=False)

In [5]:
for no, league in enumerate(["Platinum", "Golden", "Silver", "Bronze", "Medical", "Group"], start=1):
    table = read_table(f"{no}.tsv")
    scores = sorted_cases(table)[:5]
    print(league, "league:")
    print(scores)
    print()

Platinum league:
country
USSR/Russia      9
United States    5
Colombia         3
Brazil           3
India            2
Name: name, dtype: int64

Golden league:
country
United States    17
USSR/Russia      10
South Africa     10
India             7
China             3
Name: name, dtype: int64

Silver league:
country
United States    59
USSR/Russia      24
South Africa      8
France            5
China             5
Name: name, dtype: int64

Bronze league:
country
United States     10
United Kingdom     7
Italy              2
USSR/Russia        1
Turkey             1
Name: name, dtype: int64

Medical league:
country
United States     11
France             4
United Kingdom     2
Germany            2
USSR/Russia        1
Name: name, dtype: int64

Group league:
country
United States     13
USSR/Russia        6
Mexico             6
China              5
United Kingdom     3
Name: name, dtype: int64



In [6]:
scores = []
for no, league in enumerate(["Platinum", "Golden", "Silver", "Bronze", "Medical", "Group"], start=1):
    scores.append(sorted_cases(read_table(f"{no}.tsv")))
total = sum(scores).dropna().astype(int)
print(total.sort_values(ascending=False))

country
United States    115
USSR/Russia       51
Mexico            11
Name: name, dtype: int64
