In [5]:
import pandas as pd
import os
import ast

## A function used to update Country names cleanly is imported from another file in this project
from aux_fix_country_utils import fix_country_from_text

import warnings
warnings.simplefilter(action='ignore', category=pd.errors.DtypeWarning)

In [1]:
DF_input_base = input().strip()

  C:\Users\svalb\OneDrive\Escritorio\Data_40_years_cancer_studies\parsedXMLs_base\


In [2]:
DF_input_match_GLOBOCAN = input().strip()

  C:\Users\svalb\OneDrive\Escritorio\Data_40_years_cancer_studies\parsedXMLs_match_GLOBOCAN\


In [6]:
# Import list of csvs to parse
list_csvs_base = []

for file in os.listdir(DF_input_base):
    if file[-4:] == ".csv":
        list_csvs_base.append(file)

n_csvs_base = len(list_csvs_base)

In [7]:
# Import list of csvs to parse
list_csvs_match_GLOBOCAN = []

for file in os.listdir(DF_input_match_GLOBOCAN):
    if file[-4:] == ".csv":
        list_csvs_match_GLOBOCAN.append(file)

n_csvs_match_GLOBOCAN = len(list_csvs_match_GLOBOCAN)

## Note
The following corrections are performed afer separating the articles' dataset in two copies, one in which the cancer and country names are untouched (base) and another in which cancer and country names are modified to match those in the Globocan dataset. Here corrections in both copies are performed.

In [6]:
# Global correction: Replace "Pitcairn" by "Italy" in Country. All assignments to Pitcairn correspond to italian research centers
# Base dataset
print("--Fixing base dataset")

for csv in list_csvs_base:
    print(f"Parsing csv: {csv} ({list_csvs_base.index(csv) + 1}/{n_csvs_base})")
    df = pd.read_csv(DF_input_base + csv)
    df.loc[df["Country"] == "Pitcairn", "Country"] = "Italy"
    df.to_csv(DF_input_base + csv, index = False)
    del df

print("--Fixing done for base dataset")

print("--Fixing match GLobocan dataset")

# Match Globocan dataset
for csv in list_csvs_match_GLOBOCAN:
    print(f"Parsing csv: {csv} ({list_csvs_match_GLOBOCAN.index(csv) + 1}/{n_csvs_match_GLOBOCAN})")
    df = pd.read_csv(DF_input_match_GLOBOCAN + csv)
    df.loc[df["Country"] == "Pitcairn", "Country"] = "Italy"
    df.to_csv(DF_input_match_GLOBOCAN + csv, index = False)
    del df

print("--Fixing done for match GLobocan dataset")

--Fixing base dataset
Parsing csv: parsedX_100000.csv (1/45)
Parsing csv: parsedX_1000000.csv (2/45)
Parsing csv: parsedX_1100000.csv (3/45)
Parsing csv: parsedX_1200000.csv (4/45)
Parsing csv: parsedX_1300000.csv (5/45)
Parsing csv: parsedX_1400000.csv (6/45)
Parsing csv: parsedX_1500000.csv (7/45)
Parsing csv: parsedX_1600000.csv (8/45)
Parsing csv: parsedX_1700000.csv (9/45)
Parsing csv: parsedX_1800000.csv (10/45)
Parsing csv: parsedX_1900000.csv (11/45)
Parsing csv: parsedX_200000.csv (12/45)
Parsing csv: parsedX_2000000.csv (13/45)
Parsing csv: parsedX_2100000.csv (14/45)
Parsing csv: parsedX_2200000.csv (15/45)
Parsing csv: parsedX_2300000.csv (16/45)
Parsing csv: parsedX_2400000.csv (17/45)
Parsing csv: parsedX_2500000.csv (18/45)
Parsing csv: parsedX_2600000.csv (19/45)
Parsing csv: parsedX_2700000.csv (20/45)
Parsing csv: parsedX_2800000.csv (21/45)
Parsing csv: parsedX_2900000.csv (22/45)
Parsing csv: parsedX_300000.csv (23/45)
Parsing csv: parsedX_3000000.csv (24/45)
Parsin

### Systematic corrections for **Most_common_list_countries**
The following corrections aim to fix wrong Country assigments via the **Most_common_list_countries** *Country_source*. In some cases there were systematic errors found (e.g. in articles with the word "Perugia" in the affiliation, the assigned country was "Peru" instead of "Italy". Fixing is done for bothe the base and the matching Globocan datasets

In [8]:
def fixSystematicMost_common_list_countries(dataset, DF_location, list_csvs, country_wrong, problematic_text, country_right):
    print(f"--Fixing {dataset} dataset")
    for csv in list_csvs:
        print(f"Parsing csv: {csv} ({list_csvs.index(csv) + 1}/{len(list_csvs)})")
        df = pd.read_csv(DF_location + csv)

        # Helper df with only records which have the Most_common_list_countries and the problematic country assignment
        df_short = df.loc[(df["Country_source"] == "Most_common_list_countries") & (df["Country"] == country_wrong)]

        # If any record found
        if len(df_short) > 0:
            for PMID in df_short["PMID"].to_list():

                # NER_lastAuthor is stored as a string, convert to list and iterate over elements
                for el in ast.literal_eval(df_short.loc[df_short["PMID"] == PMID, "NER_lastAuthor"].values[0]):

                    # If problematic text in the list, assign correct Country and Manual_correction
                    if problematic_text in el['text']:
                        df.loc[df["PMID"] == PMID, ["Country", "Country_source"]] = [country_right, "Manual_correction"]
    
            df.to_csv(DF_location + csv, index = False)
        del df, df_short
    print(f"--Fixing done for {dataset} dataset")    

In [8]:
## Fix wrong Country assignment in Most_common_list_countries: Peru instead of Italy in records with Perugia
fixSystematicMost_common_list_countries("base", DF_input_base, list_csvs_base, "Peru", "Perugia", "Italy")
fixSystematicMost_common_list_countries("match GLobocan", DF_input_match_GLOBOCAN, list_csvs_match_GLOBOCAN, "Peru", "Perugia", "Italy")

--Fixing base dataset
Parsing csv: parsedX_100000.csv (1/45)
Parsing csv: parsedX_1000000.csv (2/45)
Parsing csv: parsedX_1100000.csv (3/45)
Parsing csv: parsedX_1200000.csv (4/45)
Parsing csv: parsedX_1300000.csv (5/45)
Parsing csv: parsedX_1400000.csv (6/45)
Parsing csv: parsedX_1500000.csv (7/45)
Parsing csv: parsedX_1600000.csv (8/45)
Parsing csv: parsedX_1700000.csv (9/45)
Parsing csv: parsedX_1800000.csv (10/45)
Parsing csv: parsedX_1900000.csv (11/45)
Parsing csv: parsedX_200000.csv (12/45)
Parsing csv: parsedX_2000000.csv (13/45)
Parsing csv: parsedX_2100000.csv (14/45)
Parsing csv: parsedX_2200000.csv (15/45)
Parsing csv: parsedX_2300000.csv (16/45)
Parsing csv: parsedX_2400000.csv (17/45)
Parsing csv: parsedX_2500000.csv (18/45)
Parsing csv: parsedX_2600000.csv (19/45)
Parsing csv: parsedX_2700000.csv (20/45)
Parsing csv: parsedX_2800000.csv (21/45)
Parsing csv: parsedX_2900000.csv (22/45)
Parsing csv: parsedX_300000.csv (23/45)
Parsing csv: parsedX_3000000.csv (24/45)
Parsin

In [9]:
# Similar to the previous function but fixing only if the NER term is EXACTLY the problematic text
def fixSystematicMost_common_list_countries_FULL_MATCH(dataset, DF_location, list_csvs, country_wrong, problematic_text, country_right):
    print(f"--Fixing {dataset} dataset")
    for csv in list_csvs:
        print(f"Parsing csv: {csv} ({list_csvs.index(csv) + 1}/{len(list_csvs)})")
        df = pd.read_csv(DF_location + csv)

        # Helper df with only records which have the Most_common_list_countries and the problematic country assignment
        df_short = df.loc[(df["Country_source"] == "Most_common_list_countries") & (df["Country"] == country_wrong)]

        # If any record found
        if len(df_short) > 0:
            for PMID in df_short["PMID"].to_list():

                # NER_lastAuthor is stored as a string, convert to list and iterate over elements
                for el in ast.literal_eval(df_short.loc[df_short["PMID"] == PMID, "NER_lastAuthor"].values[0]):

                    # If problematic text is exaclty (not substring) in the list, assign correct Country and Manual_correction
                    if problematic_text == el['text']:
                        df.loc[df["PMID"] == PMID, ["Country", "Country_source"]] = [country_right, "Manual_correction"]
    
            df.to_csv(DF_location + csv, index = False)
        del df, df_short
    print(f"--Fixing done for {dataset} dataset")

In [10]:
## Fix wrong Country assignment in Most_common_list_countries: Peru instead of Brazil in records with PE

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "PE"
# Important: some records should be assinged to other countries. These are done manually below

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_base, list_csvs_base, "Peru", "PE", "Brazil")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_match_GLOBOCAN, list_csvs_match_GLOBOCAN, "Peru", "PE", "Brazil")

--Fixing base dataset
Parsing csv: parsedX_100000.csv (1/45)
Parsing csv: parsedX_1000000.csv (2/45)
Parsing csv: parsedX_1100000.csv (3/45)
Parsing csv: parsedX_1200000.csv (4/45)
Parsing csv: parsedX_1300000.csv (5/45)
Parsing csv: parsedX_1400000.csv (6/45)
Parsing csv: parsedX_1500000.csv (7/45)
Parsing csv: parsedX_1600000.csv (8/45)
Parsing csv: parsedX_1700000.csv (9/45)
Parsing csv: parsedX_1800000.csv (10/45)
Parsing csv: parsedX_1900000.csv (11/45)
Parsing csv: parsedX_200000.csv (12/45)
Parsing csv: parsedX_2000000.csv (13/45)
Parsing csv: parsedX_2100000.csv (14/45)
Parsing csv: parsedX_2200000.csv (15/45)
Parsing csv: parsedX_2300000.csv (16/45)
Parsing csv: parsedX_2400000.csv (17/45)
Parsing csv: parsedX_2500000.csv (18/45)
Parsing csv: parsedX_2600000.csv (19/45)
Parsing csv: parsedX_2700000.csv (20/45)
Parsing csv: parsedX_2800000.csv (21/45)
Parsing csv: parsedX_2900000.csv (22/45)
Parsing csv: parsedX_300000.csv (23/45)
Parsing csv: parsedX_3000000.csv (24/45)
Parsin

In [11]:
## Fix wrong Country assignment in Most_common_list_countries: United Kingdom instead of Sweden in records with Sweden

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "Sweden"

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_base, list_csvs_base, "United Kingdom", "Sweden", "Sweden")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_match_GLOBOCAN, list_csvs_match_GLOBOCAN, "United Kingdom", "Sweden", "Sweden")

--Fixing base dataset
Parsing csv: parsedX_100000.csv (1/45)
Parsing csv: parsedX_1000000.csv (2/45)
Parsing csv: parsedX_1100000.csv (3/45)
Parsing csv: parsedX_1200000.csv (4/45)
Parsing csv: parsedX_1300000.csv (5/45)
Parsing csv: parsedX_1400000.csv (6/45)
Parsing csv: parsedX_1500000.csv (7/45)
Parsing csv: parsedX_1600000.csv (8/45)
Parsing csv: parsedX_1700000.csv (9/45)
Parsing csv: parsedX_1800000.csv (10/45)
Parsing csv: parsedX_1900000.csv (11/45)
Parsing csv: parsedX_200000.csv (12/45)
Parsing csv: parsedX_2000000.csv (13/45)
Parsing csv: parsedX_2100000.csv (14/45)
Parsing csv: parsedX_2200000.csv (15/45)
Parsing csv: parsedX_2300000.csv (16/45)
Parsing csv: parsedX_2400000.csv (17/45)
Parsing csv: parsedX_2500000.csv (18/45)
Parsing csv: parsedX_2600000.csv (19/45)
Parsing csv: parsedX_2700000.csv (20/45)
Parsing csv: parsedX_2800000.csv (21/45)
Parsing csv: parsedX_2900000.csv (22/45)
Parsing csv: parsedX_300000.csv (23/45)
Parsing csv: parsedX_3000000.csv (24/45)
Parsin

In [12]:
## Fix wrong Country assignment in Most_common_list_countries: Denmark instead of United Kingdom in records with Denmark Hill

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "Denmark Hill"

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_base, list_csvs_base, "Denmark", "Denmark Hill", "United Kingdom")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_match_GLOBOCAN, list_csvs_match_GLOBOCAN, "Denmark", "Denmark Hill", "United Kingdom")

--Fixing base dataset
Parsing csv: parsedX_100000.csv (1/45)
Parsing csv: parsedX_1000000.csv (2/45)
Parsing csv: parsedX_1100000.csv (3/45)
Parsing csv: parsedX_1200000.csv (4/45)
Parsing csv: parsedX_1300000.csv (5/45)
Parsing csv: parsedX_1400000.csv (6/45)
Parsing csv: parsedX_1500000.csv (7/45)
Parsing csv: parsedX_1600000.csv (8/45)
Parsing csv: parsedX_1700000.csv (9/45)
Parsing csv: parsedX_1800000.csv (10/45)
Parsing csv: parsedX_1900000.csv (11/45)
Parsing csv: parsedX_200000.csv (12/45)
Parsing csv: parsedX_2000000.csv (13/45)
Parsing csv: parsedX_2100000.csv (14/45)
Parsing csv: parsedX_2200000.csv (15/45)
Parsing csv: parsedX_2300000.csv (16/45)
Parsing csv: parsedX_2400000.csv (17/45)
Parsing csv: parsedX_2500000.csv (18/45)
Parsing csv: parsedX_2600000.csv (19/45)
Parsing csv: parsedX_2700000.csv (20/45)
Parsing csv: parsedX_2800000.csv (21/45)
Parsing csv: parsedX_2900000.csv (22/45)
Parsing csv: parsedX_300000.csv (23/45)
Parsing csv: parsedX_3000000.csv (24/45)
Parsin

In [8]:
## Fix wrong Country assignment in Most_common_list_countries: Ireland instead of United Kingdom in records with N. Ireland

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "N. Ireland"

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_base, list_csvs_base, "Ireland", "N. Ireland", "United Kingdom")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_match_GLOBOCAN, list_csvs_match_GLOBOCAN, "Ireland", "N. Ireland", "United Kingdom")

--Fixing base dataset
Parsing csv: parsedX_100000.csv (1/45)
Parsing csv: parsedX_1000000.csv (2/45)
Parsing csv: parsedX_1100000.csv (3/45)
Parsing csv: parsedX_1200000.csv (4/45)
Parsing csv: parsedX_1300000.csv (5/45)
Parsing csv: parsedX_1400000.csv (6/45)
Parsing csv: parsedX_1500000.csv (7/45)
Parsing csv: parsedX_1600000.csv (8/45)
Parsing csv: parsedX_1700000.csv (9/45)
Parsing csv: parsedX_1800000.csv (10/45)
Parsing csv: parsedX_1900000.csv (11/45)
Parsing csv: parsedX_200000.csv (12/45)
Parsing csv: parsedX_2000000.csv (13/45)
Parsing csv: parsedX_2100000.csv (14/45)
Parsing csv: parsedX_2200000.csv (15/45)
Parsing csv: parsedX_2300000.csv (16/45)
Parsing csv: parsedX_2400000.csv (17/45)
Parsing csv: parsedX_2500000.csv (18/45)
Parsing csv: parsedX_2600000.csv (19/45)
Parsing csv: parsedX_2700000.csv (20/45)
Parsing csv: parsedX_2800000.csv (21/45)
Parsing csv: parsedX_2900000.csv (22/45)
Parsing csv: parsedX_300000.csv (23/45)
Parsing csv: parsedX_3000000.csv (24/45)
Parsin

In [21]:
## Fix wrong Country assignment in Most_common_list_countries: Bangladesh instead of France in records with Bd

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "Bd"
# Important: some records should be assinged to other countries. These are done manually below

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_base, list_csvs_base, "Bangladesh", "Bd", "France")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_match_GLOBOCAN, list_csvs_match_GLOBOCAN, "Bangladesh", "Bd", "France")

--Fixing base dataset
Parsing csv: parsedX_100000.csv (1/45)
Parsing csv: parsedX_1000000.csv (2/45)
Parsing csv: parsedX_1100000.csv (3/45)
Parsing csv: parsedX_1200000.csv (4/45)
Parsing csv: parsedX_1300000.csv (5/45)
Parsing csv: parsedX_1400000.csv (6/45)
Parsing csv: parsedX_1500000.csv (7/45)
Parsing csv: parsedX_1600000.csv (8/45)
Parsing csv: parsedX_1700000.csv (9/45)
Parsing csv: parsedX_1800000.csv (10/45)
Parsing csv: parsedX_1900000.csv (11/45)
Parsing csv: parsedX_200000.csv (12/45)
Parsing csv: parsedX_2000000.csv (13/45)
Parsing csv: parsedX_2100000.csv (14/45)
Parsing csv: parsedX_2200000.csv (15/45)
Parsing csv: parsedX_2300000.csv (16/45)
Parsing csv: parsedX_2400000.csv (17/45)
Parsing csv: parsedX_2500000.csv (18/45)
Parsing csv: parsedX_2600000.csv (19/45)
Parsing csv: parsedX_2700000.csv (20/45)
Parsing csv: parsedX_2800000.csv (21/45)
Parsing csv: parsedX_2900000.csv (22/45)
Parsing csv: parsedX_300000.csv (23/45)
Parsing csv: parsedX_3000000.csv (24/45)
Parsin

In [221]:
## Fix wrong Country assignment in Most_common_list_countries: Madagascar instead of Brazil in records with MG

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "MG"

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_base, list_csvs_base, "Madagascar", "MG", "Brazil")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_match_GLOBOCAN, list_csvs_match_GLOBOCAN, "Madagascar", "MG", "Brazil")

--Fixing base dataset
Parsing csv: parsedX_100000.csv (1/45)
Parsing csv: parsedX_1000000.csv (2/45)
Parsing csv: parsedX_1100000.csv (3/45)
Parsing csv: parsedX_1200000.csv (4/45)
Parsing csv: parsedX_1300000.csv (5/45)
Parsing csv: parsedX_1400000.csv (6/45)
Parsing csv: parsedX_1500000.csv (7/45)
Parsing csv: parsedX_1600000.csv (8/45)
Parsing csv: parsedX_1700000.csv (9/45)
Parsing csv: parsedX_1800000.csv (10/45)
Parsing csv: parsedX_1900000.csv (11/45)
Parsing csv: parsedX_200000.csv (12/45)
Parsing csv: parsedX_2000000.csv (13/45)
Parsing csv: parsedX_2100000.csv (14/45)
Parsing csv: parsedX_2200000.csv (15/45)
Parsing csv: parsedX_2300000.csv (16/45)
Parsing csv: parsedX_2400000.csv (17/45)
Parsing csv: parsedX_2500000.csv (18/45)
Parsing csv: parsedX_2600000.csv (19/45)
Parsing csv: parsedX_2700000.csv (20/45)
Parsing csv: parsedX_2800000.csv (21/45)
Parsing csv: parsedX_2900000.csv (22/45)
Parsing csv: parsedX_300000.csv (23/45)
Parsing csv: parsedX_3000000.csv (24/45)
Parsin

In [304]:
## Fix wrong Country assignment in Most_common_list_countries: Mexico instead of Colombia in records with Col

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "Col"

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_base, list_csvs_base, "Colombia", "Col", "Mexico")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_match_GLOBOCAN, list_csvs_match_GLOBOCAN, "Colombia", "Col", "Mexico")

--Fixing base dataset
Parsing csv: parsedX_100000.csv (1/45)
Parsing csv: parsedX_1000000.csv (2/45)
Parsing csv: parsedX_1100000.csv (3/45)
Parsing csv: parsedX_1200000.csv (4/45)
Parsing csv: parsedX_1300000.csv (5/45)
Parsing csv: parsedX_1400000.csv (6/45)
Parsing csv: parsedX_1500000.csv (7/45)
Parsing csv: parsedX_1600000.csv (8/45)
Parsing csv: parsedX_1700000.csv (9/45)
Parsing csv: parsedX_1800000.csv (10/45)
Parsing csv: parsedX_1900000.csv (11/45)
Parsing csv: parsedX_200000.csv (12/45)
Parsing csv: parsedX_2000000.csv (13/45)
Parsing csv: parsedX_2100000.csv (14/45)
Parsing csv: parsedX_2200000.csv (15/45)
Parsing csv: parsedX_2300000.csv (16/45)
Parsing csv: parsedX_2400000.csv (17/45)
Parsing csv: parsedX_2500000.csv (18/45)
Parsing csv: parsedX_2600000.csv (19/45)
Parsing csv: parsedX_2700000.csv (20/45)
Parsing csv: parsedX_2800000.csv (21/45)
Parsing csv: parsedX_2900000.csv (22/45)
Parsing csv: parsedX_300000.csv (23/45)
Parsing csv: parsedX_3000000.csv (24/45)
Parsin

In [388]:
## Fix wrong Country assignment in Most_common_list_countries: Jamaica instead of Colombia in records with Jamaica

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "Jamaica"
# Important: some records should be assinged to other countries. These are done manually below

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_base, list_csvs_base, "Jamaica", "Jamaica", "United States")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_match_GLOBOCAN, list_csvs_match_GLOBOCAN, "Jamaica", "Jamaica", "United States")

--Fixing base dataset
Parsing csv: parsedX_100000.csv (1/45)
Parsing csv: parsedX_1000000.csv (2/45)
Parsing csv: parsedX_1100000.csv (3/45)
Parsing csv: parsedX_1200000.csv (4/45)
Parsing csv: parsedX_1300000.csv (5/45)
Parsing csv: parsedX_1400000.csv (6/45)
Parsing csv: parsedX_1500000.csv (7/45)
Parsing csv: parsedX_1600000.csv (8/45)
Parsing csv: parsedX_1700000.csv (9/45)
Parsing csv: parsedX_1800000.csv (10/45)
Parsing csv: parsedX_1900000.csv (11/45)
Parsing csv: parsedX_200000.csv (12/45)
Parsing csv: parsedX_2000000.csv (13/45)
Parsing csv: parsedX_2100000.csv (14/45)
Parsing csv: parsedX_2200000.csv (15/45)
Parsing csv: parsedX_2300000.csv (16/45)
Parsing csv: parsedX_2400000.csv (17/45)
Parsing csv: parsedX_2500000.csv (18/45)
Parsing csv: parsedX_2600000.csv (19/45)
Parsing csv: parsedX_2700000.csv (20/45)
Parsing csv: parsedX_2800000.csv (21/45)
Parsing csv: parsedX_2900000.csv (22/45)
Parsing csv: parsedX_300000.csv (23/45)
Parsing csv: parsedX_3000000.csv (24/45)
Parsin

In [399]:
## Fix wrong Country assignment in Most_common_list_countries: Spain instead of Brazil in records with ES

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "ES"
# Important: some records should be assinged to other countries. These are done manually below

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_base, list_csvs_base, "Spain", "ES", "Brazil")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_match_GLOBOCAN, list_csvs_match_GLOBOCAN, "Spain", "ES", "Brazil")

--Fixing base dataset
Parsing csv: parsedX_100000.csv (1/45)
Parsing csv: parsedX_1000000.csv (2/45)
Parsing csv: parsedX_1100000.csv (3/45)
Parsing csv: parsedX_1200000.csv (4/45)
Parsing csv: parsedX_1300000.csv (5/45)
Parsing csv: parsedX_1400000.csv (6/45)
Parsing csv: parsedX_1500000.csv (7/45)
Parsing csv: parsedX_1600000.csv (8/45)
Parsing csv: parsedX_1700000.csv (9/45)
Parsing csv: parsedX_1800000.csv (10/45)
Parsing csv: parsedX_1900000.csv (11/45)
Parsing csv: parsedX_200000.csv (12/45)
Parsing csv: parsedX_2000000.csv (13/45)
Parsing csv: parsedX_2100000.csv (14/45)
Parsing csv: parsedX_2200000.csv (15/45)
Parsing csv: parsedX_2300000.csv (16/45)
Parsing csv: parsedX_2400000.csv (17/45)
Parsing csv: parsedX_2500000.csv (18/45)
Parsing csv: parsedX_2600000.csv (19/45)
Parsing csv: parsedX_2700000.csv (20/45)
Parsing csv: parsedX_2800000.csv (21/45)
Parsing csv: parsedX_2900000.csv (22/45)
Parsing csv: parsedX_300000.csv (23/45)
Parsing csv: parsedX_3000000.csv (24/45)
Parsin

In [410]:
## Fix wrong Country assignment in Most_common_list_countries: Serbia instead of Brazil in records with RS

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "RS"

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_base, list_csvs_base, "Serbia", "RS", "Brazil")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_match_GLOBOCAN, list_csvs_match_GLOBOCAN, "Serbia", "RS", "Brazil")

--Fixing base dataset
Parsing csv: parsedX_100000.csv (1/45)
Parsing csv: parsedX_1000000.csv (2/45)
Parsing csv: parsedX_1100000.csv (3/45)
Parsing csv: parsedX_1200000.csv (4/45)
Parsing csv: parsedX_1300000.csv (5/45)
Parsing csv: parsedX_1400000.csv (6/45)
Parsing csv: parsedX_1500000.csv (7/45)
Parsing csv: parsedX_1600000.csv (8/45)
Parsing csv: parsedX_1700000.csv (9/45)
Parsing csv: parsedX_1800000.csv (10/45)
Parsing csv: parsedX_1900000.csv (11/45)
Parsing csv: parsedX_200000.csv (12/45)
Parsing csv: parsedX_2000000.csv (13/45)
Parsing csv: parsedX_2100000.csv (14/45)
Parsing csv: parsedX_2200000.csv (15/45)
Parsing csv: parsedX_2300000.csv (16/45)
Parsing csv: parsedX_2400000.csv (17/45)
Parsing csv: parsedX_2500000.csv (18/45)
Parsing csv: parsedX_2600000.csv (19/45)
Parsing csv: parsedX_2700000.csv (20/45)
Parsing csv: parsedX_2800000.csv (21/45)
Parsing csv: parsedX_2900000.csv (22/45)
Parsing csv: parsedX_300000.csv (23/45)
Parsing csv: parsedX_3000000.csv (24/45)
Parsin

In [412]:
## Fix wrong Country assignment in Most_common_list_countries: Paraguay instead of Argentina in records with Paraguay

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "Paraguay"
# Important: some records should be assinged to other countries. These are done manually below

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_base, list_csvs_base, "Paraguay", "Paraguay", "Argentina")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_match_GLOBOCAN, list_csvs_match_GLOBOCAN, "Paraguay", "Paraguay", "Argentina")

--Fixing base dataset
Parsing csv: parsedX_100000.csv (1/45)
Parsing csv: parsedX_1000000.csv (2/45)
Parsing csv: parsedX_1100000.csv (3/45)
Parsing csv: parsedX_1200000.csv (4/45)
Parsing csv: parsedX_1300000.csv (5/45)
Parsing csv: parsedX_1400000.csv (6/45)
Parsing csv: parsedX_1500000.csv (7/45)
Parsing csv: parsedX_1600000.csv (8/45)
Parsing csv: parsedX_1700000.csv (9/45)
Parsing csv: parsedX_1800000.csv (10/45)
Parsing csv: parsedX_1900000.csv (11/45)
Parsing csv: parsedX_200000.csv (12/45)
Parsing csv: parsedX_2000000.csv (13/45)
Parsing csv: parsedX_2100000.csv (14/45)
Parsing csv: parsedX_2200000.csv (15/45)
Parsing csv: parsedX_2300000.csv (16/45)
Parsing csv: parsedX_2400000.csv (17/45)
Parsing csv: parsedX_2500000.csv (18/45)
Parsing csv: parsedX_2600000.csv (19/45)
Parsing csv: parsedX_2700000.csv (20/45)
Parsing csv: parsedX_2800000.csv (21/45)
Parsing csv: parsedX_2900000.csv (22/45)
Parsing csv: parsedX_300000.csv (23/45)
Parsing csv: parsedX_3000000.csv (24/45)
Parsin

In [457]:
## Fix wrong Country assignment in Most_common_list_countries: Slovakia instead of Canada in records with SK

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "SK"
# Important: some records should be assinged to other countries. These are done manually below

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_base, list_csvs_base, "Slovakia", "SK", "Canada")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_match_GLOBOCAN, list_csvs_match_GLOBOCAN, "Slovakia", "SK", "Canada")

--Fixing base dataset
Parsing csv: parsedX_100000.csv (1/45)
Parsing csv: parsedX_1000000.csv (2/45)
Parsing csv: parsedX_1100000.csv (3/45)
Parsing csv: parsedX_1200000.csv (4/45)
Parsing csv: parsedX_1300000.csv (5/45)
Parsing csv: parsedX_1400000.csv (6/45)
Parsing csv: parsedX_1500000.csv (7/45)
Parsing csv: parsedX_1600000.csv (8/45)
Parsing csv: parsedX_1700000.csv (9/45)
Parsing csv: parsedX_1800000.csv (10/45)
Parsing csv: parsedX_1900000.csv (11/45)
Parsing csv: parsedX_200000.csv (12/45)
Parsing csv: parsedX_2000000.csv (13/45)
Parsing csv: parsedX_2100000.csv (14/45)
Parsing csv: parsedX_2200000.csv (15/45)
Parsing csv: parsedX_2300000.csv (16/45)
Parsing csv: parsedX_2400000.csv (17/45)
Parsing csv: parsedX_2500000.csv (18/45)
Parsing csv: parsedX_2600000.csv (19/45)
Parsing csv: parsedX_2700000.csv (20/45)
Parsing csv: parsedX_2800000.csv (21/45)
Parsing csv: parsedX_2900000.csv (22/45)
Parsing csv: parsedX_300000.csv (23/45)
Parsing csv: parsedX_3000000.csv (24/45)
Parsin

In [10]:
## Fix wrong Country assignment in Most_common_list_countries: Bosnia and Herzegovina instead of Brazil in records with BA

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "BA"
# Important: some records should be assinged to other countries. These are done manually below

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_base, list_csvs_base, "Bosnia and Herzegovina", "BA", "Brazil")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_match_GLOBOCAN, list_csvs_match_GLOBOCAN, "Bosnia and Herzegovina", "BA", "Brazil")

--Fixing base dataset
Parsing csv: parsedX_100000.csv (1/45)
Parsing csv: parsedX_1000000.csv (2/45)
Parsing csv: parsedX_1100000.csv (3/45)
Parsing csv: parsedX_1200000.csv (4/45)
Parsing csv: parsedX_1300000.csv (5/45)
Parsing csv: parsedX_1400000.csv (6/45)
Parsing csv: parsedX_1500000.csv (7/45)
Parsing csv: parsedX_1600000.csv (8/45)
Parsing csv: parsedX_1700000.csv (9/45)
Parsing csv: parsedX_1800000.csv (10/45)
Parsing csv: parsedX_1900000.csv (11/45)
Parsing csv: parsedX_200000.csv (12/45)
Parsing csv: parsedX_2000000.csv (13/45)
Parsing csv: parsedX_2100000.csv (14/45)
Parsing csv: parsedX_2200000.csv (15/45)
Parsing csv: parsedX_2300000.csv (16/45)
Parsing csv: parsedX_2400000.csv (17/45)
Parsing csv: parsedX_2500000.csv (18/45)
Parsing csv: parsedX_2600000.csv (19/45)
Parsing csv: parsedX_2700000.csv (20/45)
Parsing csv: parsedX_2800000.csv (21/45)
Parsing csv: parsedX_2900000.csv (22/45)
Parsing csv: parsedX_300000.csv (23/45)
Parsing csv: parsedX_3000000.csv (24/45)
Parsin

### Correcting *parsedX_100000.csv*

In [13]:
# Read csv
df_parsedX_100000_base = pd.read_csv(DF_input_base + "parsedX_100000.csv")
df_parsedX_100000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_100000.csv")

In [14]:
## Replace found errors by true Country values

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1522964, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1522964, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1318271, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1318271, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1525053, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1525053, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1586297, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1586297, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1703193, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1703193, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1768942, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1768942, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1772998, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1772998, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1874576, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1874576, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1573176, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] ==1573176, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1349273, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1349273, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1375495, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1375495, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1707769, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1707769, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1707770, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1707770, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1390303, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1390303, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1295379, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1295379, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1947740, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1947740, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1300849, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1300849, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1356784, ["Country", "Country_source"]] = fix_country_from_text("Sweden")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1356784, ["Country", "Country_source"]] = fix_country_from_text("Sweden")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1370978, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1370978, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1387621, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1387621, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1409313, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1409313, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1423819, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1423819, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1445797, ["Country", "Country_source"]] = fix_country_from_text("Sweden")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1445797, ["Country", "Country_source"]] = fix_country_from_text("Sweden")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1449159, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1449159, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1458906, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1458906, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1467206, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1467206, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1486561, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1486561, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1486713, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1486713, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1501759, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1501759, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1506818, ["Country", "Country_source"]] = fix_country_from_text("Sweden")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1506818, ["Country", "Country_source"]] = fix_country_from_text("Sweden")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1511046, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1511046, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1526868, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1526868, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1537073, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1537073, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1543545, ["Country", "Country_source"]] = fix_country_from_text("Denmark")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1543545, ["Country", "Country_source"]] = fix_country_from_text("Denmark")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1551310, ["Country", "Country_source"]] = fix_country_from_text("Denmark")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1551310, ["Country", "Country_source"]] = fix_country_from_text("Denmark")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1553897, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1553897, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1577173, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1577173, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1584102, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1584102, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1587763, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1587763, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1591123, ["Country", "Country_source"]] = fix_country_from_text("Sweden")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1591123, ["Country", "Country_source"]] = fix_country_from_text("Sweden")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1606565, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1606565, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1622660, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1622660, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1626970, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1626970, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1647021, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1647021, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1648537, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1648537, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1655272, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1655272, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1657755, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1657755, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1673272, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1673272, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1693592, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1693592, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1694580, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1694580, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1695382, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1695382, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1698662, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1698662, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1702309, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1702309, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1702350, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1702350, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1705275, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1705275, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1708257, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1708257, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1710543, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1710543, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1767140, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1767140, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1328906, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1328906, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1371670, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1371670, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1769875, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1769875, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == 1356784, ["Country", "Country_source"]] = fix_country_from_text("Sweden")
df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == 1356784, ["Country", "Country_source"]] = fix_country_from_text("Sweden")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_100000_base.loc[df_parsedX_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_100000_match_GLOBOCAN.loc[df_parsedX_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [15]:
df_parsedX_100000_base.to_csv(DF_input_base + "parsedX_100000.csv", index = False)
df_parsedX_100000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_100000.csv", index = False)

del df_parsedX_100000_base
del df_parsedX_100000_match_GLOBOCAN

### Correcting *parsedX_1000000.csv*

In [223]:
# Read csv
df_parsedX_1000000_base = pd.read_csv(DF_input_base + "parsedX_1000000.csv")
df_parsedX_1000000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_1000000.csv")

In [224]:
## Replace found errors by true Country values

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11721110, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11721110, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11309027, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11309027, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11450918, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11450918, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11468033, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11468033, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11593153, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11593153, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11680021, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11680021, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11917379, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11917379, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11956310, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11956310, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 12016415, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 12016415, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 12058131, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 12058131, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 12108281, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 12108281, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11688359, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11688359, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11462953, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11462953, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11787374, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11787374, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11818943, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11818943, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11836591, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11836591, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 12063738, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 12063738, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11292140, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11292140, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11716229, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11716229, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 12026781, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 12026781, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 12057873, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 12057873, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11820680, ["Country", "Country_source"]] = fix_country_from_text("Denmark")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11820680, ["Country", "Country_source"]] = fix_country_from_text("Denmark")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11745825, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11745825, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11374632, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11374632, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11715595, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11715595, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11299736, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11299736, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11346461, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11346461, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11392669, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11392669, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11525795, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11525795, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11585860, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11585860, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11585862, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11585862, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11689289, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11689289, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11831358, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11831358, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11856476, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11856476, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11957087, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11957087, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11957150, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11957150, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11971643, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11971643, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 12051806, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 12051806, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 12065087, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 12065087, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11274560, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11274560, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11312188, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11312188, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11373238, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11373238, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11391258, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11391258, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11394840, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11394840, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11475067, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11475067, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11550299, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11550299, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11641151, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11641151, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11641168, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11641168, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11704885, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11704885, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11713294, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11713294, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11775520, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11775520, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11792503, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11792503, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11803655, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11803655, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11818600, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11818600, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11867778, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11867778, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11867808, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11867808, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11886351, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11886351, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11906872, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11906872, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11921866, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11921866, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 12100775, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 12100775, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11678667, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11678667, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11322142, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11322142, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == 11337012, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == 11337012, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1000000_base.loc[df_parsedX_1000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1000000_match_GLOBOCAN.loc[df_parsedX_1000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [225]:
df_parsedX_1000000_base.to_csv(DF_input_base + "parsedX_1000000.csv", index = False)
df_parsedX_1000000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_1000000.csv", index = False)

del df_parsedX_1000000_base
del df_parsedX_1000000_match_GLOBOCAN

### Correcting *parsedX_1100000.csv*

In [226]:
# Read csv
df_parsedX_1100000_base = pd.read_csv(DF_input_base + "parsedX_1100000.csv")
df_parsedX_1100000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_1100000.csv")

In [227]:
## Replace found errors by true Country values

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12957547, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12957547, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12355183, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12355183, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12161378, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12161378, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12506764, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12506764, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12621853, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12621853, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12422048, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12422048, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12538984, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12538984, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12873172, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12873172, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12145848, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12145848, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12205789, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12205789, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12846424, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12846424, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12972519, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12972519, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12508755, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12508755, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12713186, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12713186, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12239967, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12239967, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12483128, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12483128, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12860824, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12860824, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12943194, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12943194, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12512967, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12512967, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12613152, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12613152, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12709653, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12709653, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12821934, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12821934, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12829030, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12829030, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12124465, ["Country", "Country_source"]] = fix_country_from_text("Sweden")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12124465, ["Country", "Country_source"]] = fix_country_from_text("Sweden")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12375037, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12375037, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12846497, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12846497, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12186347, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12186347, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12772990, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12772990, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12121834, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12121834, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12469273, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12469273, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 14508103, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 14508103, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12778306, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12778306, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12208572, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12208572, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12880155, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12880155, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12126097, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12126097, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12130459, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12130459, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12151355, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12151355, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12271438, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12271438, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12366719, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12366719, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12427434, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12427434, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12465387, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12465387, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12553031, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12553031, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12600411, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12600411, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12653256, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12653256, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12680153, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12680153, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12692952, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12692952, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12788315, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12788315, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12826309, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12826309, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12829003, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12829003, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12860469, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12860469, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12886135, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12886135, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12118378, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12118378, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12130443, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12130443, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12204674, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12204674, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12210601, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12210601, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12354987, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12354987, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12381480, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12381480, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12388495, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12388495, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12461296, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12461296, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12490981, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12490981, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12525194, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12525194, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12530055, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12530055, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12540430, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12540430, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12601184, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12601184, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12633840, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12633840, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12685660, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12685660, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12700131, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12700131, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12783605, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12783605, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12818835, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12818835, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12844367, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12844367, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12853663, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12853663, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12930079, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12930079, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12932929, ["Country", "Country_source"]] = fix_country_from_text("Israel")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12932929, ["Country", "Country_source"]] = fix_country_from_text("Israel")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12958549, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12958549, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12570061, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12570061, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12618671, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12618671, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12772187, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12772187, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == 12821385, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == 12821385, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1100000_base.loc[df_parsedX_1100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1100000_match_GLOBOCAN.loc[df_parsedX_1100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [228]:
df_parsedX_1100000_base.to_csv(DF_input_base + "parsedX_1100000.csv", index = False)
df_parsedX_1100000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_1100000.csv", index = False)

del df_parsedX_1100000_base
del df_parsedX_1100000_match_GLOBOCAN

### Correcting *parsedX_1200000.csv*

In [306]:
# Read csv
df_parsedX_1200000_base = pd.read_csv(DF_input_base + "parsedX_1200000.csv")
df_parsedX_1200000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_1200000.csv")

In [307]:
## Replace found errors by true Country values

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15481700, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15481700, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14696430, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14696430, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15067206, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15067206, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14586258, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14586258, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14600589, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14600589, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14968015, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14968015, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15022781, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15022781, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15027374, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15027374, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15036243, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15036243, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15039608, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15039608, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15204374, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15204374, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15204423, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15204423, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15351012, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15351012, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15357755, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15357755, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15365563, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15365563, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15380568, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15380568, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15545939, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15545939, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15052176, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15052176, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14717137, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14717137, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14571433, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14571433, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14725654, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14725654, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15485203, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15485203, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15535599, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15535599, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14766271, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14766271, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14605625, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14605625, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14690004, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14690004, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14719847, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14719847 , ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14990209, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14990209, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15166878, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15166878, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15190250, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15190250, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15542931, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15542931, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15468691, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15468691, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15472714, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15472714, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15232311, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15232311, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14614324, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14614324, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15534620, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15534620, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15149080, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15149080, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15293670, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15293670, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15370683, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15370683, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14574513, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14574513, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14586750, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14586750, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14728590, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14728590, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15052456, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15052456, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15061261, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15061261, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15063149, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15063149, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15072586, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15072586, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15110228, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15110228, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15170673, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15170673, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15178352, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15178352, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15182427, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15182427, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15183528, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15183528, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15183531, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15183531, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15195751, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15195751, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15459021, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15459021, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15471671, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15471671, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14574001, ["Country", "Country_source"]] = fix_country_from_text("Israel")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14574001, ["Country", "Country_source"]] = fix_country_from_text("Israel")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14593775, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14593775, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14619605, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14619605, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14661438, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14661438, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14706924, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14706924, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14722482, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14722482, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14738240, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14738240, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14752193, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14752193, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14985491, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14985491, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14990839, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14990839, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15006540, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15006540, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15026141, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15026141, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15044255, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15044255, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15047380, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15047380, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15082088, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15082088, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15090276, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15090276, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15143160, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15143160, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15171812, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15171812, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15219496, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15219496, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15223312, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15223312, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15238167, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15238167, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15280468, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15280468, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15289300, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15289300, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15304390, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15304390, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15310767, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15310767, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15320718, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15320718, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15367573, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15367573, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15367686, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15367686, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15456869, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15456869, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15461858, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15461858, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15474442, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15474442, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15509721, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15509721, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15537980, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15537980, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14561262, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14561262, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14561263, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14561263, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14564245, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14564245, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15359655, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15359655, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15010194, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15010194, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15192967, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15192967, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 15262439, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 15262439, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == 14564821, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == 14564821, ["Country", "Country_source"]] = fix_country_from_text("France")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1200000_base.loc[df_parsedX_1200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1200000_match_GLOBOCAN.loc[df_parsedX_1200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [308]:
df_parsedX_1200000_base.to_csv(DF_input_base + "parsedX_1200000.csv", index = False)
df_parsedX_1200000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_1200000.csv", index = False)

del df_parsedX_1200000_base
del df_parsedX_1200000_match_GLOBOCAN

### Correcting *parsedX_1300000.csv*

In [309]:
# Read csv
df_parsedX_1300000_base = pd.read_csv(DF_input_base + "parsedX_1300000.csv")
df_parsedX_1300000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_1300000.csv")

In [310]:
## Replace found errors by true Country values

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16267798, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16267798, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15942562, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15942562, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15692413, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15692413, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15731292, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15731292, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15734139, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15734139, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15748131, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15748131, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15808448, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15808448, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15810157, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15810157, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16039103, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16039103, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16129520, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16129520, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16134302, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16134302, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16181454, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16181454, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15771949, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15771949, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16313961, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16313961, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16238194, ["Country", "Country_source"]] = fix_country_from_text("Romania")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16238194, ["Country", "Country_source"]] = fix_country_from_text("Romania")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15693138, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15693138, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15893285, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15893285, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16010794, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16010794, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16203811, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16203811, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15565382, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15565382, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15839470, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15839470, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15883785, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15883785, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15912426, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15912426, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15925994, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15925994, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16040004, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16040004, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16143088, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16143088, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16172016, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16172016, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16284766, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16284766, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15896501, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15896501, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15907365, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15907365, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15928562, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15928562, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16033738, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16033738, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16043670, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16043670, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16215812, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16215812, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16238143, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16238143, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16249791, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16249791, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15907589, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15907589, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15570129, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15570129, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15680664, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15680664, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16133534, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16133534, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15688377, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15688377, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15723650, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15723650, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15736053, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15736053, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15751122, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15751122, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15848042, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15848042, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15892897, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15892897, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15929075, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15929075, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15991841, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15991841, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16043954, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16043954, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16049979, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16049979, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16081511, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16081511, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16133359, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16133359, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16133366, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16133366, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16175472, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16175472, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16200895, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16200895, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16219633, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16219633, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15583013, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15583013, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15593349, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15593349, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15603536, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15603536, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15728131, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15728131, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15737872, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15737872, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15742256, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15742256, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15743682, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15743682, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15743806, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15743806, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15800709, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15800709, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15831701, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15831701, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15836967, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15836967, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15840790, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15840790, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15842999, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15842999, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15857509, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15857509, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15858077, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15858077, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15870290, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15870290, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15927067, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15927067, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15939586, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15939586, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15984700, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15984700, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15987430, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15987430, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16127634, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16127634, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16157293, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16157293, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16164925, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16164925, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16164933, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16164933, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16183933, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16183933, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16219545, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16219545, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16230047, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16230047, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16280041, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16280041, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16044687, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16044687, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15596293, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15596293, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15713519, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15713519, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15795242, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15795242, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15888262, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15888262, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16023763, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16023763, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 16213362, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 16213362, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == 15767962, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == 15767962, ["Country", "Country_source"]] = fix_country_from_text("France")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1300000_base.loc[df_parsedX_1300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1300000_match_GLOBOCAN.loc[df_parsedX_1300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [311]:
df_parsedX_1300000_base.to_csv(DF_input_base + "parsedX_1300000.csv", index = False)
df_parsedX_1300000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_1300000.csv", index = False)

del df_parsedX_1300000_base
del df_parsedX_1300000_match_GLOBOCAN

### Correcting *parsedX_1400000.csv*

In [312]:
# Read csv
df_parsedX_1400000_base = pd.read_csv(DF_input_base + "parsedX_1400000.csv")
df_parsedX_1400000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_1400000.csv")

In [313]:
## Replace found errors by true Country values

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16715323, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16715323, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17007025, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17007025, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16340834, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16340834, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16482873, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16482873, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16642235, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16642235, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16697618, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16697618, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17030388, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17030388, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17091098, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17091098, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16760298, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16760298, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16388370, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] ==16388370 , ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16701496, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16701496, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17171637, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17171637, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16892552, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16892552, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16567022, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16567022, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16696934, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16696934, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16715760, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16715760, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16538218, ["Country", "Country_source"]] = fix_country_from_text("Sweden")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16538218, ["Country", "Country_source"]] = fix_country_from_text("Sweden")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16372487, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] ==16372487, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16410589, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] ==16410589 , ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16942641, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16942641, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17007014, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17007014, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16419064, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16419064, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17010186, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17010186, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16407624, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16407624, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17016635, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17016635, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16683388, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16683388, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16872799, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16872799, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16819688, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16819688, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16614876, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16614876, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16365683, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16365683, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16394290, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16394290, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16395701, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16395701, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16493531, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16493531, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16515943, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16515943, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16570353, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16570353, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16601012, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16601012, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16616664, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16616664, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16643879, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16643879, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16704527, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16704527, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16935975, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16935975, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16959434, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16959434, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17012850, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17012850, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17016582, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17016582, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17049467, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17049467, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17053293, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17053293, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17111235, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17111235, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16357363, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16357363, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16399897, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16399897, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16404370, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16404370, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16419187, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16419187, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16421588, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16421588, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16426843, ["Country", "Country_source"]] = fix_country_from_text("Israel")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16426843, ["Country", "Country_source"]] = fix_country_from_text("Israel")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16458195, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16458195, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16478428, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16478428, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16482447, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16482447, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16517773, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16517773, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16583981, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16583981, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16647352, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16647352, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16705174, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16705174, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16705457, ["Country", "Country_source"]] = fix_country_from_text("Egypt")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16705457, ["Country", "Country_source"]] = fix_country_from_text("Egypt")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16730930, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16730930, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16775349, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16775349, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16792817, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16792817, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16805920, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16805920, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16808852, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16808852, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16832677, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16832677, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16849592, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16849592, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16877361, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16877361, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16942611, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16942611, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16978951, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16978951, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16984691, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16984691, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16990665, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16990665, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17018578, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17018578, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17036566, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17036566, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17066774, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17066774, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17071576, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17071576, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17102042, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17102042, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17102978, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17102978, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17114539, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17114539, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17131121, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17131121, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16359366, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16359366, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16982072, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16982072, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16343481, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16343481, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16504883, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16504883, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16537699, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16537699, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16803519, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16803519, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16852046, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16852046, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16959370, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16959370, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17022892, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17022892, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17042487, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17042487, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 17112680, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 17112680, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == 16460443, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == 16460443, ["Country", "Country_source"]] = fix_country_from_text("Italy")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1400000_base.loc[df_parsedX_1400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1400000_match_GLOBOCAN.loc[df_parsedX_1400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [314]:
df_parsedX_1400000_base.to_csv(DF_input_base + "parsedX_1400000.csv", index = False)
df_parsedX_1400000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_1400000.csv", index = False)

del df_parsedX_1400000_base
del df_parsedX_1400000_match_GLOBOCAN

### Correcting *parsedX_1500000.csv*

In [315]:
# Read csv
df_parsedX_1500000_base = pd.read_csv(DF_input_base + "parsedX_1500000.csv")
df_parsedX_1500000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_1500000.csv")

In [316]:
## Replace found errors by true Country values

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17406831, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17406831, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17927986, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17927986, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17383523, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17383523, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17389413, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17389413, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17479108, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17479108, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17596859, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17596859, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17702593, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17702593, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17932053, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17932053, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 18080419, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 18080419, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17347637, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17347637, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17240633, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17240633, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17319790, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17319790, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17394837, ["Country", "Country_source"]] = fix_country_from_text("Denmark")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17394837, ["Country", "Country_source"]] = fix_country_from_text("Denmark")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17870629, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17870629, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17955256 , ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17955256 , ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17236946, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17236946, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17430760, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17430760, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17618801, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17618801, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17414499, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17414499, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17454361, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17454361, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17998653, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17998653, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17919432, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17919432, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17481704, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17481704, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17470161, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17470161, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17589568, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17589568, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17217918, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17217918, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17235569, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17235569, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17272286, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17272286, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17331170, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17331170, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17410583, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17410583, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17417780, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17417780, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17468826, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17468826, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17506908, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17506908, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17531078, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17531078, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17557291, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17557291, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17571970, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17571970, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17599625, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17599625, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17655966, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17655966, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17657716, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17657716, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17728072, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17728072, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17976452, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17976452, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 18037575, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 18037575, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17189608, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17189608, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17261174, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17261174, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17312187, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17312187, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17317227, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17317227, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17336103, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17336103, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17376329, ["Country", "Country_source"]] = fix_country_from_text("Portugal")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17376329, ["Country", "Country_source"]] = fix_country_from_text("Portugal")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17385008, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17385008, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17561080, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17561080, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17615985, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17615985, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17631914, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17631914, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17668300, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17668300, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17692120, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17692120, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17692334, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17692334, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17694514, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17694514, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17722646, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17722646, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17867389, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17867389, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17909818, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17909818, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17937802, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17937802, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17979493, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17979493, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17980055, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17980055, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17990362, ["Country", "Country_source"]] = fix_country_from_text("Greece")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17990362, ["Country", "Country_source"]] = fix_country_from_text("Greece")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 18045465, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 18045465, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 18079680, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 18079680, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17284750, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17284750, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17254669, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17254669, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17342320, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17342320, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17896919, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17896919, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17434159, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17434159, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17668204, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17668204, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17922950, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17922950, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 18053356, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 18053356, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 18053362, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 18053362, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 18053364, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 18053364, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == 17315792, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == 17315792, ["Country", "Country_source"]] = fix_country_from_text("France")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1500000_base.loc[df_parsedX_1500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1500000_match_GLOBOCAN.loc[df_parsedX_1500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [317]:
df_parsedX_1500000_base.to_csv(DF_input_base + "parsedX_1500000.csv", index = False)
df_parsedX_1500000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_1500000.csv", index = False)

del df_parsedX_1500000_base
del df_parsedX_1500000_match_GLOBOCAN

### Correcting *parsedX_1600000.csv*

In [241]:
# Read csv
df_parsedX_1600000_base = pd.read_csv(DF_input_base + "parsedX_1600000.csv")
df_parsedX_1600000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_1600000.csv")

In [242]:
## Replace found errors by true Country values

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18228178, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18228178, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 19011482, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 19011482, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18365760, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18365760, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18977715, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18977715, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18408385, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18408385, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18380311, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18380311, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18456062, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18456062, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18538271, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18538271, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18557535, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18557535, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18588522, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18588522, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18604657, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18604657, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18656282, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18656282, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18807605, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18807605, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 19031932, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 19031932, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18502168, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18502168, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 19016421, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 19016421, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18603738, ["Country", "Country_source"]] = fix_country_from_text("Iran")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18603738, ["Country", "Country_source"]] = fix_country_from_text("Iran")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18990025, ["Country", "Country_source"]] = fix_country_from_text("Iran")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18990025, ["Country", "Country_source"]] = fix_country_from_text("Iran")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18945211, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18945211, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18293907, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18293907, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 19020783, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 19020783, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18803871, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18803871, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18954760, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18954760, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18793269, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18793269, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18162345, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18162345, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18358106, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18358106, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18569287, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18569287, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18604658, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18604658, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18604659, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18604659, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18824095, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18824095, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18950215, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18950215, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18666318, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18666318, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18264209, ["Country", "Country_source"]] = fix_country_from_text("New Zealand")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18264209, ["Country", "Country_source"]] = fix_country_from_text("New Zealand")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18813779, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18813779, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18363871, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18363871, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18294668, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18294668, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18397471, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18397471, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18483618, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18483618, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18547741, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18547741, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18579255, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18579255, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18704666, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18704666, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18844938, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18844938, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18853477, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18853477, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18853746, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18853746, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 19003583, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 19003583, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 19003968, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 19003968, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 19028472, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 19028472, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18087007, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18087007, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18091388, ["Country", "Country_source"]] = fix_country_from_text("Greece")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18091388, ["Country", "Country_source"]] = fix_country_from_text("Greece")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18179693, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18179693, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18186930, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18186930, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18208587, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18208587, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18211746, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18211746, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18227155, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18227155, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18279521, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18279521, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18287436, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18287436, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18373870, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18373870, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18375898, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18375898, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18378905, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18378905, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18403589, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18403589, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18430835, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18430835, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18439062, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18439062, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18440462, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18440462, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18442884, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18442884, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18461285, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18461285, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18468080, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18468080, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18489764, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18489764, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18490593, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18490593, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18510722, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18510722, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18543006, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18543006, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18550181, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18550181, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18578862, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18578862, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18645123, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18645123, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18648954, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18648954, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18683894, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18683894, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18700021, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18700021, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18710507, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18710507, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18817561, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18817561, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18819711, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18819711, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18841754, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18841754, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18842429, ["Country", "Country_source"]] = fix_country_from_text("Portugal")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18842429, ["Country", "Country_source"]] = fix_country_from_text("Portugal")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18925701, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18925701, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18974381, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18974381, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18988376, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18988376, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 19020218, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 19020218, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18192578, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18192578, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18534614, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18534614, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18178686, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18178686, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18288922, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18288922, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18442289, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18442289, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18508272, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18508272, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18207203, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18207203, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18210196, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18210196, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18647433, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18647433, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == 18647554, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == 18647554, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1600000_base.loc[df_parsedX_1600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1600000_match_GLOBOCAN.loc[df_parsedX_1600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [243]:
df_parsedX_1600000_base.to_csv(DF_input_base + "parsedX_1600000.csv", index = False)
df_parsedX_1600000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_1600000.csv", index = False)

del df_parsedX_1600000_base
del df_parsedX_1600000_match_GLOBOCAN

### Correcting *parsedX_1700000.csv*

In [318]:
# Read csv
df_parsedX_1700000_base = pd.read_csv(DF_input_base + "parsedX_1700000.csv")
df_parsedX_1700000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_1700000.csv")

In [319]:
## Replace found errors by true Country values

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19046130, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19046130, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19051189, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19051189, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19061466, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19061466, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19075591, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19075591, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19126291, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19126291, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19168018, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19168018, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19178759, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19178759, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19268208, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19268208, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19326171, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19326171, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19479418, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19479418, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19647630, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19647630, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19760114, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19760114, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19801219, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19801219, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19824281, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19824281, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19683251, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19683251, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19101895, ["Country", "Country_source"]] = fix_country_from_text("Iran")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19101895, ["Country", "Country_source"]] = fix_country_from_text("Iran")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19256743, ["Country", "Country_source"]] = fix_country_from_text("Iran")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19256743, ["Country", "Country_source"]] = fix_country_from_text("Iran")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19527768, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19527768, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19793167, ["Country", "Country_source"]] = fix_country_from_text("Iran")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19793167, ["Country", "Country_source"]] = fix_country_from_text("Iran")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19332022, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19332022, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19754825, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19754825, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19135106, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19135106, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19501160, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19501160, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19559379, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19559379, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19579023, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19579023, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19186231, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19186231, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19341448, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19341448, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19539896, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19539896, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19423036, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19423036, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19132508, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19132508, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19439861, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19439861, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19460457, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19460457, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19578735, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19578735, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19288002, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19288002, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19041193, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19041193, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19140851, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19140851, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19159355, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19159355, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19212106, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19212106, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19243246, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19243246, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19332896, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19332896, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19363707, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19363707, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19491504, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19491504, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19617218, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19617218, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19624307, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19624307, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19661356, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19661356, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19688830, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19688830, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19696287, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19696287, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19830514, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19830514, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19075129, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19075129, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19083124, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19083124, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19112019, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19112019, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19113066, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19113066, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19128442, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19128442, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19135233, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19135233, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19161612, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19161612, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19162530, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19162530, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19172559, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19172559, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19204205, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19204205, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19224751, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19224751, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19226029, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19226029, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19261923, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19261923, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19295174, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19295174, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19338682, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19338682, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19351437, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19351437, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19374538, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19374538, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19380366, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19380366, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19451225, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19451225, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19456988, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19456988, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19466138, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19466138, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19513974, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19513974, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19596836, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19596836, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19605661, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19605661, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19620444, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19620444, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19620554, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19620554, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19634814, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19634814, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19640736, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19640736, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19642980, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19642980, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19667072, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19667072, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19690108, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19690108, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19819862, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19819862, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19625371, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19625371, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19184018, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 191840180, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19438579, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19438579, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19459643, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19459643, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19487245, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19487245, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19566874, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19566874, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19184018, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19184018, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19071205, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19071205, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19221528, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19221528, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19396245, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19396245, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19484502, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19484502, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19509180, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19509180, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19666296, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19666296, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19784659, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19784659, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19608330, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19608330, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == 19660768, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == 19660768, ["Country", "Country_source"]] = fix_country_from_text("Italy")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1700000_base.loc[df_parsedX_1700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1700000_match_GLOBOCAN.loc[df_parsedX_1700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [320]:
df_parsedX_1700000_base.to_csv(DF_input_base + "parsedX_1700000.csv", index = False)
df_parsedX_1700000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_1700000.csv", index = False)

del df_parsedX_1700000_base
del df_parsedX_1700000_match_GLOBOCAN

### Correcting *parsedX_1800000.csv*

In [321]:
# Read csv
df_parsedX_1800000_base = pd.read_csv(DF_input_base + "parsedX_1800000.csv")
df_parsedX_1800000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_1800000.csv")

In [322]:
## Replace found errors by true Country values

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20704980, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20704980, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20116771, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20116771, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 19858739, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 19858739, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 19858743, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 19858743, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20651972, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20651972, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20079483, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20079483, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20177103, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20177103, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20399038, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20399038, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20423029, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20423029, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20423030, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20423030, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20423034, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20423034, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20423041, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20423041, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20594856, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20594856, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20607507, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20607507, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20690526, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20690526, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20535696, ["Country", "Country_source"]] = fix_country_from_text("Iran")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20535696, ["Country", "Country_source"]] = fix_country_from_text("Iran")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20585938, ["Country", "Country_source"]] = fix_country_from_text("Sweden")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20585938, ["Country", "Country_source"]] = fix_country_from_text("Sweden")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20107748, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20107748, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20482834, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20482834, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20683915, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20683915, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 19895170, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 19895170, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 19945092, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 19945092, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20027168, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20027168, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20150629, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20150629, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20156772, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20156772, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20358379, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20358379, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20435483, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20435483, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20177111, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20177111, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 19944031, ["Country", "Country_source"]] = fix_country_from_text("Ecuador")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 19944031, ["Country", "Country_source"]] = fix_country_from_text("Ecuador")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20382204, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20382204, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20416248, ["Country", "Country_source"]] = fix_country_from_text("Cuba")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20416248, ["Country", "Country_source"]] = fix_country_from_text("Cuba")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20442517, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20442517, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20508350, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20508350, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20384524, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20384524, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20428757, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20428757, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20150637, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20150637, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20628233, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20628233, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20558993, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20558993, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 19842033, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 19842033, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 19945229, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 19945229, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 19951711, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 19951711, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20063370, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20063370, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20082301, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20082301, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20180086, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20180086, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20651352, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20651352, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20664927, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20664927, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 19838853, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 19838853, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 19917953, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 19917953, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 19920823, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 19920823, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 19932449, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 19932449, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 19938727, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 19938727, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20062554, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20062554, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20066419, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20066419, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20083694, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20083694, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20085938, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20085938, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20093504, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20093504, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20160001, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20160001, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20173136, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20173136, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20188700, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20188700, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20194622, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20194622, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20209740, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20209740, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20223023, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20223023, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20299242, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20299242, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20354138, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20354138, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20364408, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20364408, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20376532, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20376532, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20377096, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20377096, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20382846, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20382846, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20385618, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20385618, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20392998, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20392998, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20399885, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20399885, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20410506, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20410506, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20442224, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20442224, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20453920, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20453920, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20453921, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20453921, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20470778, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20470778, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20482797, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20482797, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20506860, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20506860, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20516093, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20516093, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20521089, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20521089, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20586632, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20586632, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20624308, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20624308, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20625146, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20625146, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20660826, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20660826, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20697056, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20697056, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20704751, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20704751, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20704851, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20704851, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20015556, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20015556, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 19941471, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 19941471, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20417624, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20417624, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20491997, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20491997, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20495653, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20495653, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20628822, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20628822, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20013146, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20013146, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20153564, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20153564, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20166982, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20166982, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20172682, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20172682, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20367086, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20367086, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20399003, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20399003, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20597404, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20597404, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == 20597418, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == 20597418, ["Country", "Country_source"]] = fix_country_from_text("France")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1800000_base.loc[df_parsedX_1800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1800000_match_GLOBOCAN.loc[df_parsedX_1800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [323]:
df_parsedX_1800000_base.to_csv(DF_input_base + "parsedX_1800000.csv", index = False)
df_parsedX_1800000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_1800000.csv", index = False)

del df_parsedX_1800000_base
del df_parsedX_1800000_match_GLOBOCAN

### Correcting *parsedX_1900000.csv*

In [324]:
# Read csv
df_parsedX_1900000_base = pd.read_csv(DF_input_base + "parsedX_1900000.csv")
df_parsedX_1900000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_1900000.csv")

In [325]:
## Replace found errors by true Country values

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20979677, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20979677, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20981674, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20981674, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21353748, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21353748, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21389920, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21389920, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21513998, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21513998, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21577052, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21577052, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20812824, ["Country", "Country_source"]] = fix_country_from_text("Iran")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20812824, ["Country", "Country_source"]] = fix_country_from_text("Iran")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20845290, ["Country", "Country_source"]] = fix_country_from_text("Iran")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20845290, ["Country", "Country_source"]] = fix_country_from_text("Iran")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20845293, ["Country", "Country_source"]] = fix_country_from_text("Iran")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20845293, ["Country", "Country_source"]] = fix_country_from_text("Iran")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20929551, ["Country", "Country_source"]] = fix_country_from_text("Iran")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20929551, ["Country", "Country_source"]] = fix_country_from_text("Iran")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20938425, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20938425, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21134529, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21134529, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] ==  21132507, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] ==  21132507, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21147621, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21147621, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21374929, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21374929, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21381411, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21381411, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21530283, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21530283, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21570311, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21570311, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20963646, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20963646, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21057074, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21057074, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21078497, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21078497, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21317527, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21317527, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20940127, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20940127, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21188482, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21188482, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21550293, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21550293, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20811660, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20811660, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21342522, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21342522, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20921213, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20921213, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21104470, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21104470, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20871480, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20871480, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21042771, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21042771, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21046513, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21046513, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21166884, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21166884, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21237499, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21237499, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21274264, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21274264, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21432438, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21432438, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21515922, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21515922, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21573878, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21573878, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21573892, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21573892, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20737365, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20737365, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20795794, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20795794, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20799942, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20799942, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20829508, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20829508, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20840098, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20840098, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20858520, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20858520, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20959667, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20959667, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21036538, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21036538, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21054901, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21054901, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21062986, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21062986, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21084729, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21084729, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21108835, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21108835, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21111364, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21111364, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21113464, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21113464, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21173379, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21173379, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21193708, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21193708, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21237503, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21237503, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21246405, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21246405, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21264542, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21264542, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21285094, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21285094, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21290202, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21290202, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21298495, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21298495, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21352556, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21352556, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21472872, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21472872, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21472885, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21472885, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21479313, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21479313, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21485699, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21485699, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21487628, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21487628, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21603121, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21603121, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21619579, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21619579, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21129090, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21129090, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21079866, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21079866, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21094563, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21094563, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21282027, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21282027, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21542600, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21542600, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21561408, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21561408, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21258055, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21258055, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21501677, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21501677, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 20955144, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 20955144, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == 21341559, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == 21341559, ["Country", "Country_source"]] = fix_country_from_text("Italy")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_1900000_base.loc[df_parsedX_1900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_1900000_match_GLOBOCAN.loc[df_parsedX_1900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [326]:
df_parsedX_1900000_base.to_csv(DF_input_base + "parsedX_1900000.csv", index = False)
df_parsedX_1900000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_1900000.csv", index = False)

del df_parsedX_1900000_base
del df_parsedX_1900000_match_GLOBOCAN

### Correcting *parsedX_200000.csv*

In [46]:
# Read csv
df_parsedX_200000_base = pd.read_csv(DF_input_base + "parsedX_200000.csv")
df_parsedX_200000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_200000.csv")

In [47]:
## Replace found errors by true Country values

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 1998100, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 1998100, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2010301, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2010301, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2073005, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2073005, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2164195, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2164195, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2171180, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2171180, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2233473, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2233473, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2363615, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2363615, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2446758, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2446758, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2446759, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2446759, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2477147, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2477147, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2521813, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2521813, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2611966, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2611966, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2275578, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2275578, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2356460, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2356460, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2101452, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2101452, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2583858, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2583858, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2456177, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2456177, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2536573, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2536573, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2477086, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2477086, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2218045, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2218045, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2539611, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2539611, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2348881, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2348881, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2371934, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2371934, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == 2675007, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == 2675007, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_200000_base.loc[df_parsedX_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_200000_match_GLOBOCAN.loc[df_parsedX_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [48]:
df_parsedX_200000_base.to_csv(DF_input_base + "parsedX_200000.csv", index = False)
df_parsedX_200000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_200000.csv", index = False)

del df_parsedX_200000_base
del df_parsedX_200000_match_GLOBOCAN

### Correcting *parsedX_2000000.csv*

In [327]:
# Read csv
df_parsedX_2000000_base = pd.read_csv(DF_input_base + "parsedX_2000000.csv")
df_parsedX_2000000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_2000000.csv")

In [328]:
## Replace found errors by true Country values

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21738117, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21738117, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21955498, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21955498, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22008273, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22008273, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22079516, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22079516, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22347959, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22347959, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21768603, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21768603, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21845203, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21845203, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21696673, ["Country", "Country_source"]] = fix_country_from_text("Denmark")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21696673, ["Country", "Country_source"]] = fix_country_from_text("Denmark")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21975185, ["Country", "Country_source"]] = fix_country_from_text("Denmark")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21975185, ["Country", "Country_source"]] = fix_country_from_text("Denmark")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21667238, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21667238, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22248273, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22248273, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22271404, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22271404, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22306424 , ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22306424 , ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21886899, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21886899, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21767710, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21767710, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22229958, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22229958, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22233946, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22233946, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21749397, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21749397, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21986113, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21986113, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22295896, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22295896, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21623259, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21623259, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22126249	, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22126249	, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21737655, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21737655, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21818589, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21818589, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21826671, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21826671, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22038670, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22038670, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22041920, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22041920, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22287757, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22287757, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22288737, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22288737, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22358611, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22358611, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21697760, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21697760, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21723792, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21723792, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21732047, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21732047, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21741707, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21741707, ["Country", "Country_source"]] = fix_country_from_text("United States")

# Convert to Taiwan in the base dataset and to China in the matching Globocan dataset because, in the latter, there is no Taiwan (added to China)
df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21793155, ["Country", "Country_source"]] = fix_country_from_text("Taiwan")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21793155, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21800071, ["Country", "Country_source"]] = fix_country_from_text("Israel")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21800071, ["Country", "Country_source"]] = fix_country_from_text("Israel")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21803154, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21803154, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21812970, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21812970, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21844577, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21844577, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21848691, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21848691, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21849090, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21849090, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21864884, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21864884, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21867523, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21867523, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21871986, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21871986, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21894461, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21894461, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21899746, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21899746, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21906355, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21906355, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21921791, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21921791, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21922019, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21922019, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21962474, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21962474, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21965458, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21965458, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22023734, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22023734, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22028409, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22028409, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22071020, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22071020, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22076479, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22076479, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22079760, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22079760, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22112838, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22112838, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22142690, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22142690, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22158716, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22158716, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22174434, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22174434, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22206853, ["Country", "Country_source"]] = fix_country_from_text("Greece")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22206853, ["Country", "Country_source"]] = fix_country_from_text("Greece")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22208390, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22208390, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22210429, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22210429, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22214514, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22214514, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22232519, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22232519, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22259051, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22259051, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22265879, ["Country", "Country_source"]] = fix_country_from_text("Portugal")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22265879, ["Country", "Country_source"]] = fix_country_from_text("Portugal")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22276017, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22276017, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22278770, ["Country", "Country_source"]] = fix_country_from_text("Israel")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22278770, ["Country", "Country_source"]] = fix_country_from_text("Israel")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22296784, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22296784, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22300659, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22300659, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22373079, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22373079, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21755950, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21755950, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21792328, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21792328, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21944546, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21944546, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21954959, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21954959, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21967832, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21967832, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22085129, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22085129, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22285059, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22285059, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21686802, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21686802, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21714437, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21714437, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21779758, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21779758, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21990213, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21990213, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22046564, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22046564, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22067260, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22067260, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22155745, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22155745, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22293942, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22293942, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 21934690, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 21934690, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22283649, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22283649, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == 22352749, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == 22352749, ["Country", "Country_source"]] = fix_country_from_text("Italy")

#df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2000000_base.loc[df_parsedX_2000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2000000_match_GLOBOCAN.loc[df_parsedX_2000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [329]:
df_parsedX_2000000_base.to_csv(DF_input_base + "parsedX_2000000.csv", index = False)
df_parsedX_2000000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_2000000.csv", index = False)

del df_parsedX_2000000_base
del df_parsedX_2000000_match_GLOBOCAN

### Correcting *parsedX_2100000.csv*

In [330]:
# Read csv
df_parsedX_2100000_base = pd.read_csv(DF_input_base + "parsedX_2100000.csv")
df_parsedX_2100000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_2100000.csv")

In [331]:
## Replace found errors by true Country values

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22504321, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22504321, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 23150848, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 23150848, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22492023, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22492023, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22498191, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22498191, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22697326, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22697326, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22760819, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22760819, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22409891, ["Country", "Country_source"]] = fix_country_from_text("Denmark")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22409891, ["Country", "Country_source"]] = fix_country_from_text("Denmark")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22649741, ["Country", "Country_source"]] = fix_country_from_text("Bulgaria")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22649741, ["Country", "Country_source"]] = fix_country_from_text("Bulgaria")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 23085864, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 23085864, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22640890, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22640890, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22385244, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22385244, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22508812, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22508812, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22544068, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22544068, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22753729, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22753729, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22804241, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22804241, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22393102, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22393102, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22696254, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22696254, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22407418, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22407418, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22879551, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22879551, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22437116, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22437116, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22488622, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22488622, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22644852, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22644852, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22796160, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22796160, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22809646, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22809646, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22782591, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22782591, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22808570, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22808570, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22905907, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22905907, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22537748, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22537748, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22614238, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22614238, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22615049, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22615049, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22798169, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22798169, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22827846, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22827846, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22853690, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22853690, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22891146, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22891146, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22947631, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22947631, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22961650, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22961650, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 23022228, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 23022228, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 23054846, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 23054846, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 23054846, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 23054846, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 23117294, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 23117294, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22402380, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22402380, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22438764, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22438764, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22449145, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22449145, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22455486, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22455486, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22459781, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22459781, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22471922, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22471922, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22472335, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22472335, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22523611, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22523611, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22653380, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22653380, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22692591, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22692591, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22709648, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22709648, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22733221, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] ==22733221 , ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22749210, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22749210, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22829042, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22829042, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22831463, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22831463, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22834842, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22834842, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22841728, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22841728, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22854225, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22854225, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22863609, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22863609, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22882222, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22882222, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22889306, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22889306, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22926903, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22926903, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22977854, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22977854, ["Country", "Country_source"]] = fix_country_from_text("Russia")

# Convert to Taiwan in the base dataset and to China in the matching Globocan dataset because, in the latter, there is no Taiwan (added to China)
df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22978603, ["Country", "Country_source"]] = fix_country_from_text("Taiwan")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22978603, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 23032615, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] ==23032615 , ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 23036231, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 23036231, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 23065162, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 23065162, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 23065174, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 23065174, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 23065228, ["Country", "Country_source"]] = fix_country_from_text("Iran")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 23065228, ["Country", "Country_source"]] = fix_country_from_text("Iran")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 23065820, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 23065820, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 23066136, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 23066136, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 23072453, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 23072453, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 23143011, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 23143011, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 23152406, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 23152406, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 23170963, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 23170963, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22506638, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22506638, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22537682, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22537682, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22545419, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22545419, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22549684, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22549684, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22642942, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22642942, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22749804, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22749804, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22846704, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22846704, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 23100366, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 23100366, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22472691, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22472691, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22495270, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22495270, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == 22594476, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == 22594476, ["Country", "Country_source"]] = fix_country_from_text("Italy")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2100000_base.loc[df_parsedX_2100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2100000_match_GLOBOCAN.loc[df_parsedX_2100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [332]:
df_parsedX_2100000_base.to_csv(DF_input_base + "parsedX_2100000.csv", index = False)
df_parsedX_2100000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_2100000.csv", index = False)

del df_parsedX_2100000_base
del df_parsedX_2100000_match_GLOBOCAN

### Correcting *parsedX_2200000.csv*

In [390]:
# Read csv
df_parsedX_2200000_base = pd.read_csv(DF_input_base + "parsedX_2200000.csv")
df_parsedX_2200000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_2200000.csv")

In [391]:
## Replace found errors by true Country values

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23199318, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23199318, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23466149, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23466149, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23567228, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23567228, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23568704, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23568704, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23642892, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23642892, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23850038, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23850038, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23852704, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23852704, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23878888, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23878888, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23410091, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23410091, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23453489, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23453489, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23591915, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23591915, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23677538, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23677538, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23737135, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23737135, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23810007, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23810007, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23814805, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23814805, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23728991, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23728991, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23541856, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23541856, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23576902, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23576902, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23745557, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23745557, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23549732, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23549732, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23531907, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23531907, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23801227, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23801227, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23880302, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23880302, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23250087, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23250087, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23225448, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23225448, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23232495, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23232495, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23242821, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23242821, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23264066, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23264066, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23280118, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23280118, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23371017, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23371017, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23379230, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23379230, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23460370, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23460370, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23587603, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23587603, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23720159, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23720159, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23861712, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23861712, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23180516, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23180516, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23231516, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23231516, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23241366, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23241366, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23250450, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23250450, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23263697, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23263697, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23304651, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23304651, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23326271, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23326271, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23328810, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23328810, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23345359, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23345359, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23345384, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23345384, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23355268, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23355268, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23379852, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23379852, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23397776, ["Country", "Country_source"]] = fix_country_from_text("Croatia")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23397776, ["Country", "Country_source"]] = fix_country_from_text("Croatia")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23451615, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23451615, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23471296, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23471296, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23520191, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23520191, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23522447, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23522447, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23566155, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23566155, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23569735, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23569735, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23571611, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23571611, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23591925, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23591925, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23664318, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23664318, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23701117, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23701117, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23718873, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23718873, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23748174, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23748174, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23773794, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23773794, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23777766, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23777766, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23793983, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23793983, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23794150, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23794150, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23810058, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23810058, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23848276, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23848276, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23866263, ["Country", "Country_source"]] = fix_country_from_text("South Korea")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23866263, ["Country", "Country_source"]] = fix_country_from_text("South Korea")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23866296, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23866296, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23870712, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23870712, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23773084, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23773084, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23185422, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23185422, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23714623, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23714623, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23796995, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23796995, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23328791, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23328791, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23594910, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23594910, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23693014, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23693014, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23782518, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23782518, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23861223, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23861223, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == 23876539, ["Country", "Country_source"]] = fix_country_from_text("Jamaica")
df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == 23876539, ["Country", "Country_source"]] = fix_country_from_text("Jamaica")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2200000_base.loc[df_parsedX_2200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2200000_match_GLOBOCAN.loc[df_parsedX_2200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [392]:
df_parsedX_2200000_base.to_csv(DF_input_base + "parsedX_2200000.csv", index = False)
df_parsedX_2200000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_2200000.csv", index = False)

del df_parsedX_2200000_base
del df_parsedX_2200000_match_GLOBOCAN

### Correcting *parsedX_2300000.csv*

In [336]:
# Read csv
df_parsedX_2300000_base = pd.read_csv(DF_input_base + "parsedX_2300000.csv")
df_parsedX_2300000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_2300000.csv")

In [337]:
## Replace found errors by true Country values

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24198623, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24198623, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24363999, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24363999, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24461094, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24461094, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24635957, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24635957, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24183806, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24183806, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24284972, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24284972, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24515771, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24515771, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24355447, ["Country", "Country_source"]] = fix_country_from_text("Denmark")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24355447, ["Country", "Country_source"]] = fix_country_from_text("Denmark")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24388681, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24388681, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23915744, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23915744, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23944616, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23944616, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24068440, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24068440, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24443268, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24443268, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24477172, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24477172, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24529771, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24529771, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24027420, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24027420, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24322649, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24322649, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24235966, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24235966, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24419921, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24419921, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23933175, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23933175, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24073795, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24073795, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24431104, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24431104, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23911595, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23911595, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23911636, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23911636, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23912027, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23912027, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23912362, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23912362, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24519169, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24519169, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23905912, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23905912, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23908602, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23908602, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23912793, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23912793, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23915951, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23915951, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23917393, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23917393, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23920228, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23920228, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23922721, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23922721, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23928999, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23928999, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23929086, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23929086, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23932339, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23932339, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23933084, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23933084, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23933406, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23933406, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23933437, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23933437, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23934226, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23934226, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23934840, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23934840, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24021898, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24021898, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24101096, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24101096, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24212624, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24212624, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24282571, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24282571, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24288667, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24288667, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24343741, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24343741, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24355909, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24355909, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24371455, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24371455, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24445733, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24445733, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24568474, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24568474, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23956614, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23956614, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24020869, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24020869, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24045542, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24045542, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24064977, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24064977, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24091718, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24091718, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24101654, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24101654, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24119165, ["Country", "Country_source"]] = fix_country_from_text("Sweden")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24119165, ["Country", "Country_source"]] = fix_country_from_text("Sweden")

# Convert to Taiwan in the base dataset and to China in the matching Globocan dataset because, in the latter, there is no Taiwan (added to China)
df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24121978, ["Country", "Country_source"]] = fix_country_from_text("Taiwan")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24121978, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24135896, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24135896, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24147471, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24147471, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24291674, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24291674, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24323185, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24323185, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24327327, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24327327, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24447304, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24447304, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24456610, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24456610, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24476678, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24476678, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24485017, ["Country", "Country_source"]] = fix_country_from_text("South Korea")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24485017, ["Country", "Country_source"]] = fix_country_from_text("South Korea")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24494720, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24494720, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24524764, ["Country", "Country_source"]] = fix_country_from_text("Israel")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24524764, ["Country", "Country_source"]] = fix_country_from_text("Israel")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24557659, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24557659, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24561676, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24561676, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24563726, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24563726, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24572574, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24572574, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24633598, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24633598, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24652560, ["Country", "Country_source"]] = fix_country_from_text("Indonesia")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24652560, ["Country", "Country_source"]] = fix_country_from_text("Indonesia")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24467876, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24467876, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 23939153, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 23939153, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24269025, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24269025, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24315836, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24315836, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24321579, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24321579, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24000898, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24000898, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24100386, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24100386, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24119925, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24119925, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24169630, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24169630, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24268397, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24268397, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24354191, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24354191, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24594798, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24594798, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24079174, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24079174, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == 24374273, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == 24374273, ["Country", "Country_source"]] = fix_country_from_text("Italy")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2300000_base.loc[df_parsedX_2300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2300000_match_GLOBOCAN.loc[df_parsedX_2300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [338]:
df_parsedX_2300000_base.to_csv(DF_input_base + "parsedX_2300000.csv", index = False)
df_parsedX_2300000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_2300000.csv", index = False)

del df_parsedX_2300000_base
del df_parsedX_2300000_match_GLOBOCAN

### Correcting *parsedX_2400000.csv*

In [339]:
# Read csv
df_parsedX_2400000_base = pd.read_csv(DF_input_base + "parsedX_2400000.csv")
df_parsedX_2400000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_2400000.csv")

In [340]:
## Replace found errors by true Country values

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24781824, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24781824, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25019046, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25019046, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25253429, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25253429, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24739432, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24739432, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25010257, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25010257, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24739578, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24739578, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24888817, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24888817, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25198032, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25198032, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25307957, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25307957, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25329865, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25329865, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25121354, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25121354, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24960584, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24960584, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25124196, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25124196, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25145319, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25145319, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24731713, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24731713, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24949329, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24949329, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25138130, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25138130, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25330744, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25330744, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24675430, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24675430, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25323390, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25323390, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24906397, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24906397, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24974129, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24974129, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24665346, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24665346, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24703097, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24703097, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24748865, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24748865, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24958732, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24958732, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24969896, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24969896, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25120646, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25120646, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25169535, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25169535, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25258254, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25258254, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25349214, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25349214, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24661934, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24661934, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24726916, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24726916, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24742083, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24742083, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24753208, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24753208, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24768732, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24768732, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24778001, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24778001, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24833010, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24833010, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24842651, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24842651, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24885701, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24885701, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24885892, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24885892, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24967520, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24967520, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25008465, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25008465, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25015688, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25015688, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25073515, ["Country", "Country_source"]] = fix_country_from_text("Egypt")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25073515, ["Country", "Country_source"]] = fix_country_from_text("Egypt")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25077882, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25077882, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25117591, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25117591, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25168387, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25168387, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25189706, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25189706, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25196248, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25196248, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25209180, ["Country", "Country_source"]] = fix_country_from_text("Bulgaria")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25209180, ["Country", "Country_source"]] = fix_country_from_text("Bulgaria")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25216851, ["Country", "Country_source"]] = fix_country_from_text("Israel")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25216851, ["Country", "Country_source"]] = fix_country_from_text("Israel")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25286960, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25286960, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25319495, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25319495, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25333035, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25333035, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25351969, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25351969, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25360374, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25360374, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25336387, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25336387, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25344426, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25344426, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24709561, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24709561, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24718706, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24718706, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24817602, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24817602, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25096762, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25096762, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25119587, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25119587, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25246023, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25246023, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25299496, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25299496, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25341666, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 25341666, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 25344197, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] ==25344197 , ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == 24928688, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == 24928688, ["Country", "Country_source"]] = fix_country_from_text("Italy")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2400000_base.loc[df_parsedX_2400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2400000_match_GLOBOCAN.loc[df_parsedX_2400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [341]:
df_parsedX_2400000_base.to_csv(DF_input_base + "parsedX_2400000.csv", index = False)
df_parsedX_2400000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_2400000.csv", index = False)

del df_parsedX_2400000_base
del df_parsedX_2400000_match_GLOBOCAN

### Correcting *parsedX_2500000.csv*

In [464]:
# Read csv
df_parsedX_2500000_base = pd.read_csv(DF_input_base + "parsedX_2500000.csv")
df_parsedX_2500000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_2500000.csv")

In [466]:
## Replace found errors by true Country values

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25936229, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25936229, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25575781, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25575781, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25569638, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25569638, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25420684, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25420684, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25420684, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25634551, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25537567, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25537567, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25566968, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25566968, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25749513, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25749513, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25691671, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25691671, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25462266, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25462266, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25501064, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25501064, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25634195, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25634195, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25470344, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25470344, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25634176, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25634176, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25853692, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25853692, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25394925, ["Country", "Country_source"]] = fix_country_from_text("Denmark")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25394925, ["Country", "Country_source"]] = fix_country_from_text("Denmark")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25510259, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25510259, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25828708, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25828708, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25853302, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25853302, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25926141 , ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25926141 , ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25421664, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25421664, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25569657, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25569657, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25847424, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25847424, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25568391, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25568391, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25467734, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25467734, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25482473, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25482473, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25601092, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25601092, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25630663, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25630663, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25743823, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25743823, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25674762, ["Country", "Country_source"]] = fix_country_from_text("South Korea")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25674762, ["Country", "Country_source"]] = fix_country_from_text("South Korea")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25526474, ["Country", "Country_source"]] = fix_country_from_text("South Korea")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25526474, ["Country", "Country_source"]] = fix_country_from_text("South Korea")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25700305, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25700305, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25789951, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25789951, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25832335, ["Country", "Country_source"]] = fix_country_from_text("Sweden")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25832335, ["Country", "Country_source"]] = fix_country_from_text("Sweden")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25804110, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25804110, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25380778, ["Country", "Country_source"]] = fix_country_from_text("Greece")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25380778, ["Country", "Country_source"]] = fix_country_from_text("Greece")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25519220, ["Country", "Country_source"]] = fix_country_from_text("Malaysia")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25519220, ["Country", "Country_source"]] = fix_country_from_text("Malaysia")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25455737, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25455737, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25577513, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25577513, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25663441, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25663441, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25704555, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25704555, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25829531, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25829531, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25855965, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25855965, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25893823, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25893823, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25997737, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25997737, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25364495, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25364495, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25364655, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25364655, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25367402, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25367402, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25417181, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25417181, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25488804, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25488804, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25521938, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25521938, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25532835, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25532835, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25546693, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25546693, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25553116, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25553116, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25592002, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25592002, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25604141, ["Country", "Country_source"]] = fix_country_from_text("Iran")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25604141, ["Country", "Country_source"]] = fix_country_from_text("Iran")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25642326, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25642326, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25648640, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25648640, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25738299, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25738299, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25765654, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25765654, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25787964, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25787964, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25788186, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25788186, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25845809, ["Country", "Country_source"]] = fix_country_from_text("Israel")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25845809, ["Country", "Country_source"]] = fix_country_from_text("Israel")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25865744, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25865744, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25882093, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25882093, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25890191, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25890191, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25897337, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25897337, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25905177, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25905177, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25905927, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25905927, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25943888, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25943888, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 26021563, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 26021563, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25758031, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25758031, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25658127, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25658127, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 26001395, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 26001395, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25445350, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25445350, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25528484, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25528484, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25535587, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25535587, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25542236, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25542236, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25720523, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25720523, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25795146, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25795146, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25802479, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25802479, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25837847, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25837847, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25984538, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25984538, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 26014616, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 26014616, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25501061, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25501061, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25677263, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25677263, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25701382, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25701382, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25674748, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25674748, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25827538, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25827538, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == 25827539, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == 25827539, ["Country", "Country_source"]] = fix_country_from_text("India")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2500000_base.loc[df_parsedX_2500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2500000_match_GLOBOCAN.loc[df_parsedX_2500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [467]:
df_parsedX_2500000_base.to_csv(DF_input_base + "parsedX_2500000.csv", index = False)
df_parsedX_2500000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_2500000.csv", index = False)

del df_parsedX_2500000_base
del df_parsedX_2500000_match_GLOBOCAN

### Correcting *parsedX_2600000.csv*

In [468]:
# Read csv
df_parsedX_2600000_base = pd.read_csv(DF_input_base + "parsedX_2600000.csv")
df_parsedX_2600000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_2600000.csv")

In [469]:
## Replace found errors by true Country values

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26047983, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26047983, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26149098, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26149098, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26078215, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26078215, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26399387, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26399387, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26414901, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26414901, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26530358, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26530358, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26683282, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26683282, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26222501, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26222501, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26448015, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26448015, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26109347, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26109347, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26210402, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26210402, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26235418, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26235418, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26340057, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26340057, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26405550, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26405550, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26482183, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26482183, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26700672, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26700672, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26511210, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26511210, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26642762, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26642762, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26125142, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26125142, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26266182, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26266182, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26678861, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26678861, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26448009, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26448009, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26469889, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26469889, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26346926, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26346926, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26435889, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26435889, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26106210, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26106210, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26304895, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26304895, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26151503, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26151503, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26173602, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26173602, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26275804, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26275804, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26304819, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26304819, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26434861, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26434861, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26437854, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26437854, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26049699, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26049699, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26082811, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26082811, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26125223, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26125223, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26156521, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26156521, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26160057, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26160057, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26166665, ["Country", "Country_source"]] = fix_country_from_text("Portugal")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26166665, ["Country", "Country_source"]] = fix_country_from_text("Portugal")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26181546, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26181546, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26190888, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26190888, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26221192, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26221192, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26279470, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26279470, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26287406, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26287406, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26311478, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26311478, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26316877, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26316877, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26343244, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26343244, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26381428, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26381428, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26385356, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26385356, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26415497, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26415497, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26446460, ["Country", "Country_source"]] = fix_country_from_text("Iran")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26446460, ["Country", "Country_source"]] = fix_country_from_text("Iran")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26466668, ["Country", "Country_source"]] = fix_country_from_text("Greece")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26466668, ["Country", "Country_source"]] = fix_country_from_text("Greece")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26522776, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26522776, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26611382, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26611382, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26631394, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26631394, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26656323, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26656323, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26419685, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26419685, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26681384, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26681384, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26073129, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26073129, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26112485, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26112485, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26213910, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26213910, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26217805, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26217805, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26295406, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26295406, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26316883, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26316883, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26335153, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26335153, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26343499, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26343499, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26496413, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26496413, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26549463, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26549463, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26631442, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26631442, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26706364, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26706364, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26421962, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26421962, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26706169, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26706169, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26090299, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26090299, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26188121, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26188121, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26329327, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26329327, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")

df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == 26632735, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == 26632735, ["Country", "Country_source"]] = fix_country_from_text("Australia")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2600000_base.loc[df_parsedX_2600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2600000_match_GLOBOCAN.loc[df_parsedX_2600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [470]:
df_parsedX_2600000_base.to_csv(DF_input_base + "parsedX_2600000.csv", index = False)
df_parsedX_2600000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_2600000.csv", index = False)

del df_parsedX_2600000_base
del df_parsedX_2600000_match_GLOBOCAN

### Correcting *parsedX_2700000.csv*

In [471]:
# Read csv
df_parsedX_2700000_base = pd.read_csv(DF_input_base + "parsedX_2700000.csv")
df_parsedX_2700000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_2700000.csv")

In [472]:
## Replace found errors by true Country values

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26974049, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26974049, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27292875, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27292875, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27142453, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27142453, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27207581, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27207581, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26817902, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26817902, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27239393, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27239393, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27369807, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27369807, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26886624, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26886624, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26830275, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26830275, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26902155, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26902155, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26921575, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26921575, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26944049, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26944049, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26906470, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26906470, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27099806, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27099806, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26871844, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26871844, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27013905, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27013905, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27081632, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27081632, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27131889, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27131889, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27235944, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27235944, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26765418, ["Country", "Country_source"]] = fix_country_from_text("South Korea")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26765418, ["Country", "Country_source"]] = fix_country_from_text("South Korea")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26937937, ["Country", "Country_source"]] = fix_country_from_text("South Korea")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26937937, ["Country", "Country_source"]] = fix_country_from_text("South Korea")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27374881, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27374881, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26765477, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26765477, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27043668, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27043668, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27057916, ["Country", "Country_source"]] = fix_country_from_text("South Korea")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27057916, ["Country", "Country_source"]] = fix_country_from_text("South Korea")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26896570, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26896570, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27005806, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27005806, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27015862, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27015862, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27050912, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27050912, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27271270, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27271270, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27272782, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27272782, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27342746, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27342746, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26729349, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26729349, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26817896, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26817896, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26828843, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26828843, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26838800, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26838800, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26847449, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26847449, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26862179, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26862179, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26874618, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26874618, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27038831, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27038831, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27073117, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27073117, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27125973, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27125973, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27146669, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27146669, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27156077, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27156077, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27179143, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27179143, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27223688, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27223688, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27230973, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27230973, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27248849, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27248849, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27251414, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27251414, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27318562, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27318562, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27357927, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27357927, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27421752, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27421752, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26777299, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26777299, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26828111, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26828111, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26772481, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26772481, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26868456, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26868456, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26895493, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26895493, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26924581, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26924581, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 26971296, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 26971296, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27286681, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27286681, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27082613, ["Country", "Country_source"]] = fix_country_from_text("South Korea")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27082613, ["Country", "Country_source"]] = fix_country_from_text("South Korea")

df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == 27100425, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == 27100425, ["Country", "Country_source"]] = fix_country_from_text("China")

#df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2700000_base.loc[df_parsedX_2700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2700000_match_GLOBOCAN.loc[df_parsedX_2700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [473]:
df_parsedX_2700000_base.to_csv(DF_input_base + "parsedX_2700000.csv", index = False)
df_parsedX_2700000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_2700000.csv", index = False)

del df_parsedX_2700000_base
del df_parsedX_2700000_match_GLOBOCAN

### Correcting *parsedX_2800000.csv*

In [420]:
# Read csv
df_parsedX_2800000_base = pd.read_csv(DF_input_base + "parsedX_2800000.csv")
df_parsedX_2800000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_2800000.csv")

In [421]:
## Replace found errors by true Country values

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28198377, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28198377, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27665490, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27665490, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27683017, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27683017, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27787590, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27787590, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28033219, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28033219, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28050791, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28050791, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27663926, ["Country", "Country_source"]] = fix_country_from_text("Romania")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27663926, ["Country", "Country_source"]] = fix_country_from_text("Romania")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27430869, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27430869, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27562128, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27562128, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27668530, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27668530, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27933657, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27933657, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28050799, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28050799, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27579604, ["Country", "Country_source"]] = fix_country_from_text("Norway")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27579604, ["Country", "Country_source"]] = fix_country_from_text("Norway")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27629291, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27629291, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27583885, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27583885, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27507083, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27507083, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27863301, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27863301, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28151550, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28151550, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27540011, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27540011, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28197752, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28197752, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27892939, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27892939, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28060745, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28060745, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28138630, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28138630, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27565505, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27565505, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27585955, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27585955, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27605336, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27605336, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27646764, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27646764, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27656876, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27656876, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27657109, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27657109, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27657319, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27657319, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27777777, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27777777, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27817104, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27817104, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27877247, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27877247, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27890941, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27890941, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27896669, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27896669, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27930560, ["Country", "Country_source"]] = fix_country_from_text("Taiwan")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27930560, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27942988, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27942988, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28049037, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28049037, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28050734, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28050734, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28074255, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28074255, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28074323, ["Country", "Country_source"]] = fix_country_from_text("Israel")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28074323, ["Country", "Country_source"]] = fix_country_from_text("Israel")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28089377, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28089377, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28111515, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28111515, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28112410, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28112410, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28124990, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28124990, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28144830, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28144830, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28174341, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28174341, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27658668, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27658668, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28073307, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28073307, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27455223, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27455223, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27591381, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27591381, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27639478, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27639478, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27746255, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27746255, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27752939, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27752939, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27766434, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27766434, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27889662, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27889662, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28063068, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28063068, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28079012, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28079012, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28105425, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28105425, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28122528, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28122528, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28131812, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28131812, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28174065, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28174065, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 28191467, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 28191467, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == 27716767, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")
df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == 27716767, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2800000_base.loc[df_parsedX_2800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2800000_match_GLOBOCAN.loc[df_parsedX_2800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [422]:
df_parsedX_2800000_base.to_csv(DF_input_base + "parsedX_2800000.csv", index = False)
df_parsedX_2800000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_2800000.csv", index = False)

del df_parsedX_2800000_base
del df_parsedX_2800000_match_GLOBOCAN

### Correcting *parsedX_2900000.csv*

In [349]:
# Read csv
df_parsedX_2900000_base = pd.read_csv(DF_input_base + "parsedX_2900000.csv")
df_parsedX_2900000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_2900000.csv")

In [350]:
## Replace found errors by true Country values

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28499594, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28499594, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28719413, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28719413, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28431616, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28431616, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28548372, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28548372, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28655066, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28655066, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28668260, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28668260, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28674914, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28674914, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28716152, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28716152, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28814206, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28814206, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28830697, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28830697, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28738235, ["Country", "Country_source"]] = fix_country_from_text("Portugal")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28738235, ["Country", "Country_source"]] = fix_country_from_text("Portugal")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28806573, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28806573, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28259356, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28259356, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28373458, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28373458, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28407301, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28407301, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28629398, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28629398, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28653894, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28653894, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28875243, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28875243, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28763126, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28763126, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28495502, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28495502, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28702499, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28702499, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28345916, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28345916, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28441796, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28441796, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28487075, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28487075, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28271192, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28271192, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28207858, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28207858, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28219303, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28219303, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28432617, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28432617, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28522594, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28522594, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28285009, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28285009, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28320152, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28320152, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28399836, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28399836, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28443209, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28443209, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28445439, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28445439, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28467142, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28467142, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28518072, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28518072, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28643684, ["Country", "Country_source"]] = fix_country_from_text("Uruguay")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28643684, ["Country", "Country_source"]] = fix_country_from_text("Uruguay")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28645022, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28645022, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28707579, ["Country", "Country_source"]] = fix_country_from_text("Belarus")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28707579, ["Country", "Country_source"]] = fix_country_from_text("Belarus")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28797293, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28797293, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28810912, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28810912, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28853247, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28853247, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28871632, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28871632, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28448799, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28448799, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28467626, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28467626, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28561159, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28561159, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28620964, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28620964, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28627971, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28627971, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28273793, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28273793, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28289860, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28289860, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28433864, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28433864, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28558708, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28558708, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28558783, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28558783, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28746889, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28746889, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == 28844340, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == 28844340, ["Country", "Country_source"]] = fix_country_from_text("Italy")

#df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_2900000_base.loc[df_parsedX_2900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_2900000_match_GLOBOCAN.loc[df_parsedX_2900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [351]:
df_parsedX_2900000_base.to_csv(DF_input_base + "parsedX_2900000.csv", index = False)
df_parsedX_2900000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_2900000.csv", index = False)

del df_parsedX_2900000_base
del df_parsedX_2900000_match_GLOBOCAN

### Correcting *parsedX_300000.csv*

In [200]:
# Read csv
df_parsedX_300000_base = pd.read_csv(DF_input_base + "parsedX_300000.csv")
df_parsedX_300000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_300000.csv")

In [201]:
## Replace found errors by true Country values

df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == 3162999, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == 3162999, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == 3347787, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == 3347787, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == 3163142, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == 3163142, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == 3393815, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == 3393815, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == 3393816, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == 3393816, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == 3374767, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == 3374767, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == 3050155, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == 3050155, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == 3232476, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == 3232476, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == 3176994, ["Country", "Country_source"]] = fix_country_from_text("Sweden")
df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == 3176994, ["Country", "Country_source"]] = fix_country_from_text("Sweden")

df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == 3211366, ["Country", "Country_source"]] = fix_country_from_text("Sweden")
df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == 3211366, ["Country", "Country_source"]] = fix_country_from_text("Sweden")

df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == 3323456, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == 3323456, ["Country", "Country_source"]] = fix_country_from_text("Spain")

#df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_300000_base.loc[df_parsedX_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_300000_match_GLOBOCAN.loc[df_parsedX_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [202]:
df_parsedX_300000_base.to_csv(DF_input_base + "parsedX_300000.csv", index = False)
df_parsedX_300000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_300000.csv", index = False)

del df_parsedX_300000_base
del df_parsedX_300000_match_GLOBOCAN

### Correcting *parsedX_3000000.csv*

In [474]:
# Read csv
df_parsedX_3000000_base = pd.read_csv(DF_input_base + "parsedX_3000000.csv")
df_parsedX_3000000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_3000000.csv")

In [475]:
## Replace found errors by true Country values

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29427708, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29427708, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 28943004, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 28943004, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29408798, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29408798, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29027024, ["Country", "Country_source"]] = fix_country_from_text("Romania")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29027024, ["Country", "Country_source"]] = fix_country_from_text("Romania")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29200002, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29200002, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29224886, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29224886, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29324312, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29324312, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29372682, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29372682, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29372683, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29372683, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29372684, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29372684, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29372685, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29372685, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29372686, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29372686, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29372687, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29372687, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29372688, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29372688, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29372689, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29372689, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29469164, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29469164, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29541403, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29541403, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 28912581, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 28912581, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29416801, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29416801, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29054708, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29054708, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29048707, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29048707, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29099538, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29099538, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29120299, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29120299, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 28929714, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 28929714, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29217900, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29217900, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29258764, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29258764, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29298444, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29298444, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29387978, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29387978, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29399968, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29399968, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29467894, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29467894, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 28923102, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 28923102, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 28955945, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 28955945, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 28988350, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 28988350, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29158608, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29158608, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29273857, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29273857, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29317059, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29317059, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29333502, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29333502, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29361927, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29361927, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29375621, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29375621, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29390360, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29390360, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29405062, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29405062, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29439667, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29439667, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29451568, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29451568, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29454062, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29454062, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29468085, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29468085, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29484561, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29484561, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29492278, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29492278, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29500477, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29500477, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29518729, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29518729, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 28941682, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 28941682, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 28973007, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 28973007, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29017520, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29017520, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 28889078, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 28889078, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 28912828, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 28912828, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 28969609, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 28969609, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29107095, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29107095, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29158887, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29158887, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29189179, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29189179, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29278371, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29278371, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29307014, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29307014, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29329808, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29329808, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29346309, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29346309, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29456457, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29456457, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29303071, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29303071, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29346040, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29346040, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29492192, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29492192, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 29696467, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 29696467, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == 30146944, ["Country", "Country_source"]] = fix_country_from_text("South Korea")
df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == 30146944, ["Country", "Country_source"]] = fix_country_from_text("South Korea")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3000000_base.loc[df_parsedX_3000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3000000_match_GLOBOCAN.loc[df_parsedX_3000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [476]:
df_parsedX_3000000_base.to_csv(DF_input_base + "parsedX_3000000.csv", index = False)
df_parsedX_3000000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_3000000.csv", index = False)

del df_parsedX_3000000_base
del df_parsedX_3000000_match_GLOBOCAN

### Correcting *parsedX_3100000.csv*

In [423]:
# Read csv
df_parsedX_3100000_base = pd.read_csv(DF_input_base + "parsedX_3100000.csv")
df_parsedX_3100000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_3100000.csv")

In [424]:
## Replace found errors by true Country values

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29856243, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29856243, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29653120, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29653120, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30229205, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30229205, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29851693, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29851693, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29936076, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29936076, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29848725, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29848725, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30174762, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30174762, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29707810, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29707810, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29688847, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29688847, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29797143, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29797143, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29845457, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29845457, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29854570, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29854570, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30004912, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30004912, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29649201, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29649201, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29753941, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29753941, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29864472, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29864472, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30086794, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30086794, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30228363, ["Country", "Country_source"]] = fix_country_from_text("Ecuador")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30228363, ["Country", "Country_source"]] = fix_country_from_text("Ecuador")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30173360, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30173360, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30197505, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30197505, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29852068, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29852068, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29633024, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29633024, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29776427, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29776427, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30153836, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30153836, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30201356, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30201356, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30240295, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30240295, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29675328, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29675328, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29688241, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29688241, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29708784, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29708784, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29848545, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29848545, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29890024, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29890024, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30036439, ["Country", "Country_source"]] = fix_country_from_text("Israel")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30036439, ["Country", "Country_source"]] = fix_country_from_text("Israel")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30083079, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30083079, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30126136, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30126136, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30149619, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30149619, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30172100, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30172100, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30187420, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30187420, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30238389, ["Country", "Country_source"]] = fix_country_from_text("Taiwan")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30238389, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30244691, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30244691, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30099579, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30099579, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29727323, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29727323, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29780264, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29780264, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30087854, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30087854, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29550881, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29550881, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29596496, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29596496, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29601905, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29601905, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29649549, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29649549, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29666399, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29666399, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29690880, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29690880, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29730133, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29730133, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29987501, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29987501, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30007568, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30007568, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30017980, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30017980, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30036967, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30036967, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30039421, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30039421, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30048774, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30048774, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30095060, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30095060, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30166901, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30166901, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29803859, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29803859, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 30132201, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 30132201, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == 29928685, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")
df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == 29928685, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3100000_base.loc[df_parsedX_3100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3100000_match_GLOBOCAN.loc[df_parsedX_3100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [425]:
df_parsedX_3100000_base.to_csv(DF_input_base + "parsedX_3100000.csv", index = False)
df_parsedX_3100000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_3100000.csv", index = False)

del df_parsedX_3100000_base
del df_parsedX_3100000_match_GLOBOCAN

### Correcting *parsedX_3200000.csv*

In [358]:
# Read csv
df_parsedX_3200000_base = pd.read_csv(DF_input_base + "parsedX_3200000.csv")
df_parsedX_3200000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_3200000.csv")

In [359]:
## Replace found errors by true Country values

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30585274, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30585274, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30318359, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30318359, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30770107, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30770107, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30556666, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30556666, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30605368, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30605368, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30660204, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30660204, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30778733, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30778733, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30267416, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30267416, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30509087, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30509087, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30509097, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30509097, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30522776, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30522776, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30538792, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30538792, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30543514, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30543514, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30881489, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30881489, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30903319, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30903319, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30281209, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30281209, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30499398, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30499398, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30830381, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30830381, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30394213, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30394213, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30588883, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30588883, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30594780, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30594780, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30817555, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30817555, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30304773, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30304773, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30368245, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30368245, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30519894, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30519894, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30584072, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30584072, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30867787, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30867787, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30267774, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30267774, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30293944, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30293944, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30301761, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30301761, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30306455, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30306455, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30316706, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30316706, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30318746, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30318746, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30366433, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30366433, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30390687, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30390687, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30499543, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30499543, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30511014, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30511014, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30539764, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30539764, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30568254, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30568254, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30576789, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30576789, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30589383, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30589383, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30642715, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30642715, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30649745, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30649745, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30701090, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30701090, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30721442, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30721442, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30841428, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30841428, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30881265, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30881265, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30712554, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30712554, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30271126, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30271126, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30345486, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30345486, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30425290, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30425290, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30515130, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30515130, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30627918, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30627918, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30739527, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30739527, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30775443, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30775443, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30835026, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30835026, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30839211, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30839211, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30904021, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30904021, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30562965, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30562965, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30631960, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30631960, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30641974, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30641974, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30721448, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30721448, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30758769, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30758769, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30813351, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30813351, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30816248, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30816248, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30832668, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30832668, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30704124, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30704124, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == 30901673, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == 30901673, ["Country", "Country_source"]] = fix_country_from_text("Italy")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3200000_base.loc[df_parsedX_3200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3200000_match_GLOBOCAN.loc[df_parsedX_3200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [360]:
df_parsedX_3200000_base.to_csv(DF_input_base + "parsedX_3200000.csv", index = False)
df_parsedX_3200000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_3200000.csv", index = False)

del df_parsedX_3200000_base
del df_parsedX_3200000_match_GLOBOCAN

### Correcting *parsedX_3300000.csv*

In [301]:
# Read csv
df_parsedX_3300000_base = pd.read_csv(DF_input_base + "parsedX_3300000.csv")
df_parsedX_3300000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_3300000.csv")

In [302]:
## Replace found errors by true Country values

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 30954945, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 30954945, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31324932, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31324932, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31107807, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31107807, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 30981540, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 30981540, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31129938, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31129938, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31243585, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31243585, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31255836, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31255836, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31539207, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31539207, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31049795, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31049795, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31123853, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31123853, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31172643, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31172643, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31176573, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31176573, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31177128, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31177128, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31295151, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31295151, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31307200, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31307200, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31419008, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31419008, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31539170, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31539170, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31465861, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31465861, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31068071, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31068071, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31227248, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31227248, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31114735, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31114735, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31497440, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31497440, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31523585, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31523585, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31057112, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31057112, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31172982, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31172982, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31162490, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31162490, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31186023, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31186023, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 30999925, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 30999925, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31019757, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31019757, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31222819, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31222819, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31238937, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31238937, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31255178, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31255178, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31522686, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31522686, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 30919085, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 30919085, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 30934722, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 30934722, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31006017, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31006017, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31171910, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31171910, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31281694, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31281694, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31339352, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31339352, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31363464, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31363464, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31433788, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31433788, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31453005, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31453005, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31482393, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31482393, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31493190, ["Country", "Country_source"]] = fix_country_from_text("Belarus")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31493190, ["Country", "Country_source"]] = fix_country_from_text("Belarus")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31512414, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31512414, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31529263, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31529263, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31544847, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31544847, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31545410, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31545410, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31178298, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31178298, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31210389, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31210389, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31240514, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31240514, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31473959, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31473959, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31527615, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31527615, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == 31426795, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == 31426795, ["Country", "Country_source"]] = fix_country_from_text("Italy")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3300000_base.loc[df_parsedX_3300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3300000_match_GLOBOCAN.loc[df_parsedX_3300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [303]:
df_parsedX_3300000_base.to_csv(DF_input_base + "parsedX_3300000.csv", index = False)
df_parsedX_3300000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_3300000.csv", index = False)

del df_parsedX_3300000_base
del df_parsedX_3300000_match_GLOBOCAN

### Correcting *parsedX_3400000.csv*

In [477]:
# Read csv
df_parsedX_3400000_base = pd.read_csv(DF_input_base + "parsedX_3400000.csv")
df_parsedX_3400000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_3400000.csv")

In [478]:
## Replace found errors by true Country values

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31889898, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31889898, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31610880, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31610880, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31806381, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31806381, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31653969 , ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31653969 , ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31800712, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31800712, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 32170155, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 32170155, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31624682, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31624682, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31694435, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31694435, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31825796, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31825796, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 32001804, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 32001804, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31927131, ["Country", "Country_source"]] = fix_country_from_text("United Arab Emirates")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31927131, ["Country", "Country_source"]] = fix_country_from_text("United Arab Emirates")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31974746, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31974746, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31977630, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31977630, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31991851, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31991851, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 32077247, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 32077247, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 32097951, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 32097951, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31557689, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31557689, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31593616, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31593616, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31645920, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31645920, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31652993, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31652993, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31660932, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31660932, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31678589, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31678589, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31694633, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31694633, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31702390, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31702390, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31719909, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31719909, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31775229, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31775229, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31827670, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31827670, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31841457, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31841457, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31841459, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31841459, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31841464, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31841464, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31841465, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31841465, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31841484, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31841484, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31841487, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31841487, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31844720, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31844720, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31861801, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31861801, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31877967, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31877967, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31913068, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31913068, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31997799, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31997799, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 32046103, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 32046103, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 32047249, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 32047249, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 32072655, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 32072655, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 32160426, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 32160426, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 32160445, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 32160445, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31824558, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31824558, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31959857, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31959857, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 32004398, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 32004398, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31719300, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31719300, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")

df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == 31933024, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == 31933024, ["Country", "Country_source"]] = fix_country_from_text("India")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3400000_base.loc[df_parsedX_3400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3400000_match_GLOBOCAN.loc[df_parsedX_3400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [479]:
df_parsedX_3400000_base.to_csv(DF_input_base + "parsedX_3400000.csv", index = False)
df_parsedX_3400000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_3400000.csv", index = False)

del df_parsedX_3400000_base
del df_parsedX_3400000_match_GLOBOCAN

### Correcting *parsedX_3500000.csv*

In [480]:
# Read csv
df_parsedX_3500000_base = pd.read_csv(DF_input_base + "parsedX_3500000.csv")
df_parsedX_3500000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_3500000.csv")

In [481]:
## Replace found errors by true Country values

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32719975, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32719975, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32192726, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32192726, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32522324, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32522324, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32734609, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32734609, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32631447, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32631447, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32277441, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32277441, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32416773, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32416773, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32311274, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32311274, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32443455, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32443455, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32469226, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32469226, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32496437, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32496437, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32556328, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32556328, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32606146, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32606146, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32677927, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32677927, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32720224, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32720224, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32792225, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32792225, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32332700, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32332700, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32598532, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32598532, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32376498, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32376498, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32186248, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32186248, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32234678, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32234678, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32293439, ["Country", "Country_source"]] = fix_country_from_text("Ecuador")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32293439, ["Country", "Country_source"]] = fix_country_from_text("Ecuador")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32596391, ["Country", "Country_source"]] = fix_country_from_text("Ecuador")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32596391, ["Country", "Country_source"]] = fix_country_from_text("Ecuador")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32743726, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32743726, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32616042, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32616042, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32588349, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32588349, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32231023, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32231023, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32232692, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32232692, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32349701, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32349701, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32368321, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32368321, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32429412, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32429412, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32514625, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32514625, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32577365, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32577365, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32591964, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32591964, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32612822, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32612822, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32642067, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32642067, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32660150, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32660150, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32699600, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32699600, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32780194, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32780194, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32193734, ["Country", "Country_source"]] = fix_country_from_text("Saudi Arabia")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32193734, ["Country", "Country_source"]] = fix_country_from_text("Saudi Arabia")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32217105, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32217105, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32231055, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32231055, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32326637, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32326637, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32362086, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32362086, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32392733, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32392733, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32435917, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32435917, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32442889, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32442889, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32446127, ["Country", "Country_source"]] = fix_country_from_text("Serbia")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32446127, ["Country", "Country_source"]] = fix_country_from_text("Serbia")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32462229, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32462229, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32492520, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32492520, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32499959, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32499959, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32517362, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32517362, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32556743, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32556743, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32570010, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32570010, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32593236, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32593236, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32604802, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32604802, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32640262, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32640262, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32665581, ["Country", "Country_source"]] = fix_country_from_text("Iran")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32665581, ["Country", "Country_source"]] = fix_country_from_text("Iran")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32726945, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32726945, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32746854, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32746854, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32788923, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32788923, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32804049, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32804049, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32578139, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32578139, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32659454, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32659454, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32758457, ["Country", "Country_source"]] = fix_country_from_text("Jamaica")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32758457, ["Country", "Country_source"]] = fix_country_from_text("Jamaica")

df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == 32252576, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == 32252576, ["Country", "Country_source"]] = fix_country_from_text("India")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3500000_base.loc[df_parsedX_3500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3500000_match_GLOBOCAN.loc[df_parsedX_3500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [482]:
df_parsedX_3500000_base.to_csv(DF_input_base + "parsedX_3500000.csv", index = False)
df_parsedX_3500000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_3500000.csv", index = False)

del df_parsedX_3500000_base
del df_parsedX_3500000_match_GLOBOCAN

### Correcting *parsedX_3600000.csv*

In [429]:
# Read csv
df_parsedX_3600000_base = pd.read_csv(DF_input_base + "parsedX_3600000.csv")
df_parsedX_3600000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_3600000.csv")

In [430]:
## Replace found errors by true Country values

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33073332, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33073332, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33334598, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33334598, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 32840836, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 32840836, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 32860203, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 32860203, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 32917401, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 32917401, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33161422, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33161422, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33083629, ["Country", "Country_source"]] = fix_country_from_text("Ecuador")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33083629, ["Country", "Country_source"]] = fix_country_from_text("Ecuador")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33213401, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33213401, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 32853017, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 32853017, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 32948356, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 32948356, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33026351, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33026351, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33044628, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33044628, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33284505, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33284505, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33180981, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33180981, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33200027, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33200027, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33200696, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33200696, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33292387, ["Country", "Country_source"]] = fix_country_from_text("Ecuador")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33292387, ["Country", "Country_source"]] = fix_country_from_text("Ecuador")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33260045, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33260045, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33058203, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33058203, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33199152, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33199152, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 32972454, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 32972454, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 32994182, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 32994182, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33059654, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33059654, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33066747, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33066747, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33112554, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33112554, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33136258, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33136258, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33224371, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33224371, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33338879, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33338879, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33374342, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33374342, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 32812772, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 32812772, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 32842535, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 32842535, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 32878646, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 32878646, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 32992913, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 32992913, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33035134, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33035134, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33050319, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33050319, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33090259, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33090259, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33096691, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33096691, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33110006, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33110006, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33110479, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33110479, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33207760, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33207760, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33230725, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33230725, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33230804, ["Country", "Country_source"]] = fix_country_from_text("Israel")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33230804, ["Country", "Country_source"]] = fix_country_from_text("Israel")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33287305, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33287305, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33323907, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33323907, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33326760, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33326760, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33340851, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33340851, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33342103, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33342103, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33355131, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33355131, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33361833, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33361833, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33361840, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33361840, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33361841, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33361841, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33361846, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33361846, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33361851, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33361851, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33361854, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33361854, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33361856, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33361856, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33375769, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33375769, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33377057, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33377057, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33388995, ["Country", "Country_source"]] = fix_country_from_text("Cuba")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33388995, ["Country", "Country_source"]] = fix_country_from_text("Cuba")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33412409, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33412409, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33221201, ["Country", "Country_source"]] = fix_country_from_text("Peru")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33221201, ["Country", "Country_source"]] = fix_country_from_text("Peru")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33189777, ["Country", "Country_source"]] = fix_country_from_text("Bangladesh")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33189777, ["Country", "Country_source"]] = fix_country_from_text("Bangladesh")

df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == 33008752, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")
df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == 33008752, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3600000_base.loc[df_parsedX_3600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3600000_match_GLOBOCAN.loc[df_parsedX_3600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [431]:
df_parsedX_3600000_base.to_csv(DF_input_base + "parsedX_3600000.csv", index = False)
df_parsedX_3600000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_3600000.csv", index = False)

del df_parsedX_3600000_base
del df_parsedX_3600000_match_GLOBOCAN

### Correcting *parsedX_3700000.csv*

In [483]:
# Read csv
df_parsedX_3700000_base = pd.read_csv(DF_input_base + "parsedX_3700000.csv")
df_parsedX_3700000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_3700000.csv")

In [484]:
## Replace found errors by true Country values

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33749551, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33749551, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33848668, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33848668, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33903933, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33903933, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33616840, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33616840, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33551073, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33551073, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33463852, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33463852, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33875272, ["Country", "Country_source"]] = fix_country_from_text("South Korea")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33875272, ["Country", "Country_source"]] = fix_country_from_text("South Korea")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33969896, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33969896, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33969911, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33969911, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33999463, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33999463, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 34097764, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 34097764, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 34097765, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 34097765, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33693986, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33693986, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33474679, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33474679, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33662680, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33662680, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33685938, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33685938, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33804167, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33804167, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33880999, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33880999, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33904314, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33904314, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33948629, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33948629, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 34071401, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 34071401, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 34089500, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 34089500, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33786256, ["Country", "Country_source"]] = fix_country_from_text("Colombia")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33786256, ["Country", "Country_source"]] = fix_country_from_text("Colombia")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33509717, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33509717, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33727172, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33727172, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33863497, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33863497, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33934286, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33934286, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 34106575, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 34106575, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33515632, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33515632, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33770579, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33770579, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33900532, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33900532, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 34062227, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 34062227, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33666842, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33666842, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33745006, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33745006, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33934976, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33934976, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33794385, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33794385, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33486073, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33486073, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33502740, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33502740, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33522707, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33522707, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33623898, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33623898, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33634473, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33634473, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33662914, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33662914, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33717555, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33717555, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33794843, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33794843, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33964982, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33964982, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 34007360, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 34007360, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 34065520, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 34065520, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33503830, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33503830, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33546220, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33546220, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33549385, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33549385, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33550751, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33550751, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33559098, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33559098, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33578990, ["Country", "Country_source"]] = fix_country_from_text("Israel")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33578990, ["Country", "Country_source"]] = fix_country_from_text("Israel")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33581289, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33581289, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33778703, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33778703, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33791771, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33791771, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33812449, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33812449, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 34052775, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 34052775, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 34059243, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 34059243, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 34093104, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 34093104, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 34099850, ["Country", "Country_source"]] = fix_country_from_text("Israel")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 34099850, ["Country", "Country_source"]] = fix_country_from_text("Israel")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33458794, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33458794, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33534908, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33534908, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33559808, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33559808, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33603235, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33603235, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33912249, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33912249, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 34050359, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 34050359, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 34099896, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 34099896, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 34088814, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 34088814, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == 33908328, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == 33908328, ["Country", "Country_source"]] = fix_country_from_text("India")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3700000_base.loc[df_parsedX_3700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3700000_match_GLOBOCAN.loc[df_parsedX_3700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [485]:
df_parsedX_3700000_base.to_csv(DF_input_base + "parsedX_3700000.csv", index = False)
df_parsedX_3700000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_3700000.csv", index = False)

del df_parsedX_3700000_base
del df_parsedX_3700000_match_GLOBOCAN

### Correcting *parsedX_3800000.csv*

In [486]:
# Read csv
df_parsedX_3800000_base = pd.read_csv(DF_input_base + "parsedX_3800000.csv")
df_parsedX_3800000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_3800000.csv")

In [487]:
## Replace found errors by true Country values

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34414870, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34414870, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34374389, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34374389, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34185219, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34185219, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34324055, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34324055, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34387150, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34387150, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34618601, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34618601, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34750636, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34750636, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34224061, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34224061, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34674491, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34674491, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34277287, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34277287, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34263651, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34263651, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34550463, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34550463, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34641699, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34641699, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34571462, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34571462, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34631227, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34631227, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34162044, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34162044, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34715356, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34715356, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34116182, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34116182, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34123758, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34123758, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34260836, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34260836, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34261455, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34261455, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34285624, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34285624, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34325210, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34325210, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34391390, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34391390, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34449562, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34449562, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34496815, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34496815, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34591289, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34591289, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34123510, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34123510, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34146062, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34146062, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34149957, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34149957, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34184747, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34184747, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34197250, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34197250, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34201116, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34201116, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34218524, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34218524, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34242914, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34242914, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34245512, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34245512, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34286378, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34286378, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34300315, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34300315, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34302297, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34302297, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34313441, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34313441, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34368831, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34368831, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34402034, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34402034, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34404560, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34404560, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34441979, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34441979, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34463558, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34463558, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34473212, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34473212, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34556252, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34556252, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34572289, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34572289, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34603753, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34603753, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34638910, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34638910, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34642740, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34642740, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34652316, ["Country", "Country_source"]] = fix_country_from_text("Colombia")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34652316, ["Country", "Country_source"]] = fix_country_from_text("Colombia")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34695450, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34695450, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34702207, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34702207, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34714630, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34714630, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34744351, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34744351, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34268284, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34268284, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34437915, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34437915, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34495569, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34495569, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34600573, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34600573, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34383612, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34383612, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == 34645364, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == 34645364, ["Country", "Country_source"]] = fix_country_from_text("India")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3800000_base.loc[df_parsedX_3800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3800000_match_GLOBOCAN.loc[df_parsedX_3800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [488]:
df_parsedX_3800000_base.to_csv(DF_input_base + "parsedX_3800000.csv", index = False)
df_parsedX_3800000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_3800000.csv", index = False)

del df_parsedX_3800000_base
del df_parsedX_3800000_match_GLOBOCAN

### Correcting *parsedX_3900000.csv*

In [364]:
# Read csv
df_parsedX_3900000_base = pd.read_csv(DF_input_base + "parsedX_3900000.csv")
df_parsedX_3900000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_3900000.csv")

In [365]:
## Replace found errors by true Country values

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34937625, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34937625, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34921786, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34921786, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34965047, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34965047, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35106961, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35106961, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35301096, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35301096, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35215276, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35215276, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34779383, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34779383, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34818028, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34818028, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34843006, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34843006, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35146168, ["Country", "Country_source"]] = fix_country_from_text("Peru")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35146168, ["Country", "Country_source"]] = fix_country_from_text("Peru")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35389573, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35389573, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34754837, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34754837, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34800790, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34800790, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34857768, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34857768, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35114612, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35114612, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35094685, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35094685, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35186554, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35186554, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35228941, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35228941, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34766229, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34766229, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35247094, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35247094, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34969711, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34969711, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34912620, ["Country", "Country_source"]] = fix_country_from_text("Greece")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34912620, ["Country", "Country_source"]] = fix_country_from_text("Greece")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34789192, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34789192, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34972509, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34972509, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35070303, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35070303, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35071010, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35071010, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35156177, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35156177, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35196631, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35196631, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35318539, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35318539, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35381047, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35381047, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34770371, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34770371, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34771734, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34771734, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34773196, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34773196, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34773578, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34773578, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34804641, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34804641, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34830512, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34830512, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34830934, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34830934, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34833432, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34833432, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34839417, ["Country", "Country_source"]] = fix_country_from_text("Israel")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34839417, ["Country", "Country_source"]] = fix_country_from_text("Israel")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34846810, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34846810, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34857829, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34857829, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34873574, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34873574, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34945219, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34945219, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34965541, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34965541, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34974881, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34974881, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35000753, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35000753, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35008362, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] ==35008362, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35060018, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35060018, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35127319, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35127319, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35158944, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35158944, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35159016, ["Country", "Country_source"]] = fix_country_from_text("Israel")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35159016, ["Country", "Country_source"]] = fix_country_from_text("Israel")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35181413, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35181413, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35246046, ["Country", "Country_source"]] = fix_country_from_text("Taiwan")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35246046, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35265362, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35265362, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35317277, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35317277, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35331510, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35331510, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35342397, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35342397, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35369402, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35369402, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35382184, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35382184, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35390128, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35390128, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35390131, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35390131, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34919824, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34919824, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35260808, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35260808, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 34945704, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 34945704, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == 35158182, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == 35158182, ["Country", "Country_source"]] = fix_country_from_text("Italy")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_3900000_base.loc[df_parsedX_3900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_3900000_match_GLOBOCAN.loc[df_parsedX_3900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [366]:
df_parsedX_3900000_base.to_csv(DF_input_base + "parsedX_3900000.csv", index = False)
df_parsedX_3900000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_3900000.csv", index = False)

del df_parsedX_3900000_base
del df_parsedX_3900000_match_GLOBOCAN

### Correcting *parsedX_400000.csv*

In [150]:
# Read csv
df_parsedX_400000_base = pd.read_csv(DF_input_base + "parsedX_400000.csv")
df_parsedX_400000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_400000.csv")

In [151]:
## Replace found errors by true Country values

df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == 3432692, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == 3432692, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == 3503452, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == 3503452, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == 3671025, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == 3671025, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == 3658341, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == 3658341, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == 3658342, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == 3658342, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == 3431704, ["Country", "Country_source"]] = fix_country_from_text("Portugal")
df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == 3431704, ["Country", "Country_source"]] = fix_country_from_text("Portugal")

df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == 3477471, ["Country", "Country_source"]] = fix_country_from_text("Sweden")
df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == 3477471, ["Country", "Country_source"]] = fix_country_from_text("Sweden")

df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == 3482509, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == 3482509, ["Country", "Country_source"]] = fix_country_from_text("Australia")

#df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_400000_base.loc[df_parsedX_400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_400000_match_GLOBOCAN.loc[df_parsedX_400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [152]:
df_parsedX_400000_base.to_csv(DF_input_base + "parsedX_400000.csv", index = False)
df_parsedX_400000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_400000.csv", index = False)

del df_parsedX_400000_base
del df_parsedX_400000_match_GLOBOCAN

### Correcting *parsedX_4000000.csv*

In [489]:
# Read csv
df_parsedX_4000000_base = pd.read_csv(DF_input_base + "parsedX_4000000.csv")
df_parsedX_4000000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_4000000.csv")

In [490]:
## Replace found errors by true Country values

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35641826, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35641826, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35953110, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35953110, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35974658, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35974658, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35455464, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35455464, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35795510, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35795510, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35606546, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35606546, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35888170, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35888170, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35447567, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35447567, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35440230, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35440230, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35967130, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35967130, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35462008, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35462008, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35579140, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35579140, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35403935, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35403935, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35992599, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35992599, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35931942, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35931942, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35868124, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35868124, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35936029, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35936029, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 36029331, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 36029331, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35461381, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35461381, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35546238, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35546238, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35568860, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35568860, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35743996, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35743996, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35868288, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35868288, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35896840, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35896840, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35944311, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35944311, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35987641, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35987641, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 36052808, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 36052808, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35397692, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35397692, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35424170, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35424170, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35442592, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35442592, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35454903, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35454903, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35544375, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35544375, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35559871, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35559871, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35606604, ["Country", "Country_source"]] = fix_country_from_text("Egypt")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35606604, ["Country", "Country_source"]] = fix_country_from_text("Egypt")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35623778, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35623778, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35625921, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35625921, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35725075, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35725075, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35741039, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35741039, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35805014, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35805014, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35837299, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35837299, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35849167, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35849167, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35864130, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35864130, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35869542, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35869542, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35874908, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35874908, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35877436, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35877436, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35884467, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35884467, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35896589, ["Country", "Country_source"]] = fix_country_from_text("Israel")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35896589, ["Country", "Country_source"]] = fix_country_from_text("Israel")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35898172, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35898172, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35923859, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35923859, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35997929, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35997929, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35998623, ["Country", "Country_source"]] = fix_country_from_text("Cuba")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35998623, ["Country", "Country_source"]] = fix_country_from_text("Cuba")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 36011153, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 36011153, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35737243, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35737243, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35658211, ["Country", "Country_source"]] = fix_country_from_text("Bangladesh")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35658211, ["Country", "Country_source"]] = fix_country_from_text("Bangladesh")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35794609, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35794609, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35832429, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35832429, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35850772, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35850772, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35951087, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35951087, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35623925, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35623925, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35694190, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35694190, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35583002, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35583002, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35524835, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35524835, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35525882, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35525882, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == 35937719, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == 35937719, ["Country", "Country_source"]] = fix_country_from_text("India")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4000000_base.loc[df_parsedX_4000000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4000000_match_GLOBOCAN.loc[df_parsedX_4000000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [491]:
df_parsedX_4000000_base.to_csv(DF_input_base + "parsedX_4000000.csv", index = False)
df_parsedX_4000000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_4000000.csv", index = False)

del df_parsedX_4000000_base
del df_parsedX_4000000_match_GLOBOCAN

### Correcting *parsedX_4100000.csv*

In [432]:
# Read csv
df_parsedX_4100000_base = pd.read_csv(DF_input_base + "parsedX_4100000.csv")
df_parsedX_4100000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_4100000.csv")

In [433]:
## Replace found errors by true Country values

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36216187, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36216187, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36521078, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36521078, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36631707, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36631707, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36207082, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36207082, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36207115, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36207115, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36207122, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36207122, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36191785, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36191785, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36290665, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36290665, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36334695, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36334695, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36724554, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36724554, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36567145, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36567145, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36648521, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36648521, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36471108, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36471108, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36307558, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36307558, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36129914, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36129914, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36165084, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36165084, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36177984, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36177984, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36342647, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36342647, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36345980, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36345980, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36384685, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36384685, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36459304, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36459304, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36482484, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36482484, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36583762, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36583762, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36595982, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36595982, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36155118, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36155118, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36209930, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36209930, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36231910, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36231910, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36233525, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36233525, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36248816, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36248816, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36292985, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36292985, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36293523, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36293523, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36371129, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36371129, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36374618, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36374618, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36499101, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36499101, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36582080, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36582080, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36582081, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36582081, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36582086, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36582086, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36582092, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36582092, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36582093	, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36582093	, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36582095, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36582095, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36582097, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36582097, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36582102, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36582102, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36598557, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36598557, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36612056, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36612056, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36648868, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36648868, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36675073, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36675073, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36683777, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36683777, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36707673, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36707673, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36740642, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36740642, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36249070, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36249070, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36075865, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36075865, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36173560, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36173560, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36600261, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36600261, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36735848, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36735848, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36579755, ["Country", "Country_source"]] = fix_country_from_text("Jamaica")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36579755, ["Country", "Country_source"]] = fix_country_from_text("Jamaica")

df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == 36427594, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")
df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == 36427594, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4100000_base.loc[df_parsedX_4100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4100000_match_GLOBOCAN.loc[df_parsedX_4100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [434]:
df_parsedX_4100000_base.to_csv(DF_input_base + "parsedX_4100000.csv", index = False)
df_parsedX_4100000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_4100000.csv", index = False)

del df_parsedX_4100000_base
del df_parsedX_4100000_match_GLOBOCAN

### Correcting *parsedX_4200000.csv*

In [159]:
# Read csv
df_parsedX_4200000_base = pd.read_csv(DF_input_base + "parsedX_4200000.csv")
df_parsedX_4200000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_4200000.csv")

In [160]:
## Replace found errors by true Country values

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36994202, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36994202, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37096110, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37096110, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36944555 , ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36944555 , ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37053216, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37053216, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37166740, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37166740, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37316777, ["Country", "Country_source"]] = fix_country_from_text("South Korea")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37316777, ["Country", "Country_source"]] = fix_country_from_text("South Korea")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37344790, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37344790, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36921709, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36921709, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36960362, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36960362, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37014774, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37014774, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37033601, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37033601, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37089930, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37089930, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36816957, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36816957, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37202298, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37202298, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36818732, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36818732, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36945273, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36945273, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37168167, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37168167, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37175984, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37175984, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37214073, ["Country", "Country_source"]] = fix_country_from_text("St. Vincent and the Grenadines")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37214073, ["Country", "Country_source"]] = fix_country_from_text("St. Vincent and the Grenadines")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37035854, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37035854, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36947346, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36947346, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37367167, ["Country", "Country_source"]] = fix_country_from_text("Romania")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37367167, ["Country", "Country_source"]] = fix_country_from_text("Romania")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36747228, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36747228, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36879276, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36879276, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36941666, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36941666, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36994539, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36994539, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37157037, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37157037, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37170693, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37170693, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37271041, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37271041, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37302899, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37302899, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36778405, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36778405, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36812415, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36812415, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36827840, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36827840, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36835201, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36835201, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36848399, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36848399, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36870038, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36870038, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36922819, ["Country", "Country_source"]] = fix_country_from_text("Iran")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36922819, ["Country", "Country_source"]] = fix_country_from_text("Iran")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36961315, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36961315, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36974151, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36974151, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36982357, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36982357, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36986710, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36986710, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37000004, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37000004, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37033209, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37033209, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37072467, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37072467, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37079983, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37079983, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37149358, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37149358, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37162735, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37162735, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37201597, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37201597, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37242276, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37242276, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37247411, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37247411, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37269794, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37269794, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37362244, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37362244, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37370680, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37370680, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37373091, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37373091, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 36912068, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 36912068, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37074322, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37074322, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37154969, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37154969, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37157003, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37157003, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37232052, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37232052, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == 37310026, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == 37310026, ["Country", "Country_source"]] = fix_country_from_text("Australia")

#df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4200000_base.loc[df_parsedX_4200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4200000_match_GLOBOCAN.loc[df_parsedX_4200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [161]:
df_parsedX_4200000_base.to_csv(DF_input_base + "parsedX_4200000.csv", index = False)
df_parsedX_4200000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_4200000.csv", index = False)

del df_parsedX_4200000_base
del df_parsedX_4200000_match_GLOBOCAN

### Correcting *parsedX_4300000.csv*

In [435]:
# Read csv
df_parsedX_4300000_base = pd.read_csv(DF_input_base + "parsedX_4300000.csv")
df_parsedX_4300000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_4300000.csv")

In [436]:
## Replace found errors by true Country values

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37543453, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37543453, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37668792, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37668792, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37409260, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37409260, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

# Drop PMID which was found duplicate in PubMed
df_parsedX_4300000_base.drop(df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37451991].index, inplace = True)
df_parsedX_4300000_match_GLOBOCAN.drop(df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37451991].index, inplace = True)

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37478301, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37478301, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37511913, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37511913, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37702445, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37702445, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37789780, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37789780, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37869102, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37869102, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37442929, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37442929, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37898832, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37898832, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37947061, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37947061, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37453240, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37453240, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37700808, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37700808, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37933367, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37933367, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 38021583, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 38021583, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37970929, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37970929, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37929739, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37929739, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37672053, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37672053, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37433283, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37433283, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37555876, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37555876, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37846414, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37846414, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37890269, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37890269, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37899040, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37899040, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37901221, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37901221, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37906295, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37906295, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37935158, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37935158, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37958475, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37958475, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37984347, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37984347, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 38001613, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 38001613, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37425217, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37425217, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37436823, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37436823, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37444414, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37444414, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37628927, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37628927, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37664414, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37664414, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37684264, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37684264, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37685928, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37685928, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37686562, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37686562, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37760470, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37760470, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37760489, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37760489, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37760734, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37760734, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37821747, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37821747, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37835129, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37835129, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37842231, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37842231, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37852156, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37852156, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37892173, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37892173, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 38001751, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 38001751, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 38010185, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 38010185, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 38059526, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 38059526, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37490150, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37490150, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37554328, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37554328, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37789409, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37789409, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37875579, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37875579, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37888914, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37888914, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37957910, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37957910, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == 37429448, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")
df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == 37429448, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4300000_base.loc[df_parsedX_4300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4300000_match_GLOBOCAN.loc[df_parsedX_4300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [437]:
df_parsedX_4300000_base.to_csv(DF_input_base + "parsedX_4300000.csv", index = False)
df_parsedX_4300000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_4300000.csv", index = False)

del df_parsedX_4300000_base
del df_parsedX_4300000_match_GLOBOCAN

### Correcting *parsedX_4400000.csv*

In [438]:
# Read csv
df_parsedX_4400000_base = pd.read_csv(DF_input_base + "parsedX_4400000.csv")
df_parsedX_4400000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_4400000.csv")

In [439]:
## Replace found errors by true Country values

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38527596, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38527596, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38316056, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38316056, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38706720, ["Country", "Country_source"]] = fix_country_from_text("Peru")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38706720, ["Country", "Country_source"]] = fix_country_from_text("Peru")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38396679, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38396679, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38273861, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38273861, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38486718, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38486718, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38471632, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38471632, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38611078, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38611078, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38174748, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38174748, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38559889, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38559889, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38711696, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38711696, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38384041, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38384041, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38498955, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38498955, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38526738, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38526738, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38663154, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38663154, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38719217, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38719217, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38071159, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38071159, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38073389, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38073389, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38075543, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38075543, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38068818, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38068818, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38069210, ["Country", "Country_source"]] = fix_country_from_text("Mexico")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38069210, ["Country", "Country_source"]] = fix_country_from_text("Mexico")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38069408, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38069408, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38070058, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38070058, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38070139, ["Country", "Country_source"]] = fix_country_from_text("Nigeria")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38070139, ["Country", "Country_source"]] = fix_country_from_text("Nigeria")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38071286, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38071286, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38073240, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38073240, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38074100, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38074100, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38075004, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38075004, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38081888, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38081888, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38083933, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38083933, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38086438, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38086438, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38086769, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38086769, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38087365, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38087365, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38091101, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38091101, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38091199, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38091199, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38092572, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38092572, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38092697, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38092697, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38093266, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38093266, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38093355, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38093355, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38094042, ["Country", "Country_source"]] = fix_country_from_text("Nigeria")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38094042, ["Country", "Country_source"]] = fix_country_from_text("Nigeria")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38096312, ["Country", "Country_source"]] = fix_country_from_text("Nigeria")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38096312, ["Country", "Country_source"]] = fix_country_from_text("Nigeria")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38097723, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38097723, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38232717, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38232717, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38266499, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38266499, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38294618, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38294618, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38502514, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38502514, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38619098, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38619098, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38104918, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38104918, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38106185, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38106185, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38136332, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38136332, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38139300, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38139300, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38155125, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38155125, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38155128, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38155128, ["Country", "Country_source"]] = fix_country_from_text("Ukraine")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38162115, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38162115, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38197800, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38197800, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38267965, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38267965, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38343990, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38343990, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38367302, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38367302, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38382150, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38382150, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38466535, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38466535, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38473228, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38473228, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38495711, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38495711, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38551406, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38551406, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38566760, ["Country", "Country_source"]] = fix_country_from_text("Rwanda")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38566760, ["Country", "Country_source"]] = fix_country_from_text("Rwanda")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38591061, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38591061, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38612619, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38612619, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38616222, ["Country", "Country_source"]] = fix_country_from_text("Iran")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38616222, ["Country", "Country_source"]] = fix_country_from_text("Iran")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38645685, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38645685, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38692686, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38692686, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38260019, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38260019, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38265492, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38265492, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38426070, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38426070, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38465222, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38465222, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38526540, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38526540, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38654605, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38654605, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38460899, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38460899, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == 38278449, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")
df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == 38278449, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4400000_base.loc[df_parsedX_4400000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4400000_match_GLOBOCAN.loc[df_parsedX_4400000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [440]:
df_parsedX_4400000_base.to_csv(DF_input_base + "parsedX_4400000.csv", index = False)
df_parsedX_4400000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_4400000.csv", index = False)

del df_parsedX_4400000_base
del df_parsedX_4400000_match_GLOBOCAN

### Correcting *parsedX_4454000.csv*

In [209]:
# Read csv
df_parsedX_4454000_base = pd.read_csv(DF_input_base + "parsedX_4454000.csv")
df_parsedX_4454000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_4454000.csv")

In [210]:
## Replace found errors by true Country values

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 38750928, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 38750928, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 38903136, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 38903136, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 38760945, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 38760945, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 38766854, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 38766854, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 38926485, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 38926485, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 39166520, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 39166520, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 39435289, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 39435289, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 38978891, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 38978891, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 39278673, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 39278673, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 39420726, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 39420726, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 39678637, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 39678637, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 39280576, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 39280576, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 40336871, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 40336871, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 38785200, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 38785200, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 38915109, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 38915109, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 38936093, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 38936093, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 38970988, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 38970988, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 39483425, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 39483425, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 38893249, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 38893249, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 38893269, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 38893269, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 38926517, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 38926517, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 38927941, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 38927941, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 39139119, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 39139119, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 40375971, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 40375971, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 39154541, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 39154541, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == 38877575, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == 38877575, ["Country", "Country_source"]] = fix_country_from_text("Italy")

#df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_4454000_base.loc[df_parsedX_4454000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_4454000_match_GLOBOCAN.loc[df_parsedX_4454000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [211]:
df_parsedX_4454000_base.to_csv(DF_input_base + "parsedX_4454000.csv", index = False)
df_parsedX_4454000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_4454000.csv", index = False)

del df_parsedX_4454000_base
del df_parsedX_4454000_match_GLOBOCAN

### Correcting *parsedX_500000.csv*

In [133]:
# Read csv
df_parsedX_500000_base = pd.read_csv(DF_input_base + "parsedX_500000.csv")
df_parsedX_500000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_500000.csv")

In [134]:
## Replace found errors by true Country values

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 7716605, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 7716605, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 7629293, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 7629293, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 7694997, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 7694997, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 7724273, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 7724273, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 7742592, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 7742592, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 7805514, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 7805514, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 7842068, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 7842068, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 7899810, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 7899810, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 8116445, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 8116445, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 8119094, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 8119094, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 7565036, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 7565036, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 7619765, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 7619765, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 7619770, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 7619770, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 8086356, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 8086356, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 8094841, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 8094841, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 7493376, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 7493376, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 7892582, ["Country", "Country_source"]] = fix_country_from_text("Denmark")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 7892582, ["Country", "Country_source"]] = fix_country_from_text("Denmark")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 7976858, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 7976858, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 8015619, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 8015619, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 8032828, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 8032828, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 8044326, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 8044326, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 7625992, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 7625992, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 7636527, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 7636527, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 7697503, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 7697503, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 7783824, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 7783824, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 7928414, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 7928414, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == 8133248, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == 8133248, ["Country", "Country_source"]] = fix_country_from_text("Italy")

#df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_500000_base.loc[df_parsedX_500000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_500000_match_GLOBOCAN.loc[df_parsedX_500000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [135]:
df_parsedX_500000_base.to_csv(DF_input_base + "parsedX_500000.csv", index = False)
df_parsedX_500000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_500000.csv", index = False)

del df_parsedX_500000_base
del df_parsedX_500000_match_GLOBOCAN

### Correcting *parsedX_600000.csv*

In [376]:
# Read csv
df_parsedX_600000_base = pd.read_csv(DF_input_base + "parsedX_600000.csv")
df_parsedX_600000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_600000.csv")

In [377]:
## Replace found errors by true Country values

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8671916, ["Country", "Country_source"]] = fix_country_from_text("Austria")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8671916, ["Country", "Country_source"]] = fix_country_from_text("Austria")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8196470, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8196470, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8330586, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8330586, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8419750, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8419750, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8571181, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8571181, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8577568, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8577568, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8643973, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8643973, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8657483, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8657483, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8692551, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8692551, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8693078, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8693078, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8761744, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8761744, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8763667, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8763667, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8785560, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8785560, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8812871, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8812871, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8572636, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8572636, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8746467, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8746467, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8806698, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8806698, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8180673, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8180673, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8334678, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8334678, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8580060, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8580060, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8656198, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8656198, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8744756, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8744756, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8217793, ["Country", "Country_source"]] = fix_country_from_text("Cuba")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8217793, ["Country", "Country_source"]] = fix_country_from_text("Cuba")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8664485, ["Country", "Country_source"]] = fix_country_from_text("Cuba")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8664485, ["Country", "Country_source"]] = fix_country_from_text("Cuba")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8355888, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8355888, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8277621, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8277621, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8320912, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8320912, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8370982, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8370982, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8454257, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8454257, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8488825, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8488825, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8579123, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8579123, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8604699, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8604699, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8605013, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8605013, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8742914, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8742914, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8797307, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8797307, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8318372, ["Country", "Country_source"]] = fix_country_from_text("Sweden")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8318372, ["Country", "Country_source"]] = fix_country_from_text("Sweden")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8395101, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8395101, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8401175, ["Country", "Country_source"]] = fix_country_from_text("Sweden")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8401175, ["Country", "Country_source"]] = fix_country_from_text("Sweden")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8490657, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8490657, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8508841, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8508841, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8564351, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8564351, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8674071, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8674071, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8710034, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8710034, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8751131, ["Country", "Country_source"]] = fix_country_from_text("Sweden")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8751131, ["Country", "Country_source"]] = fix_country_from_text("Sweden")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8775747, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8775747, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8803694, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8803694, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == 8448081, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == 8448081, ["Country", "Country_source"]] = fix_country_from_text("Italy")

#df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_600000_base.loc[df_parsedX_600000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_600000_match_GLOBOCAN.loc[df_parsedX_600000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [378]:
df_parsedX_600000_base.to_csv(DF_input_base + "parsedX_600000.csv", index = False)
df_parsedX_600000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_600000.csv", index = False)

del df_parsedX_600000_base
del df_parsedX_600000_match_GLOBOCAN

### Correcting *parsedX_700000.csv*

In [139]:
# Read csv
df_parsedX_700000_base = pd.read_csv(DF_input_base + "parsedX_700000.csv")
df_parsedX_700000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_700000.csv")

In [140]:
## Replace found errors by true Country values

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9312254, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9312254, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9413693, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9413693, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 8830876, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 8830876, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 8871702, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 8871702, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 8929371, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 8929371, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 8929372, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 8929372, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 8936067, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 8936067, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 8950615, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 8950615, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 8952911, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 8952911, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 8978585, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 8978585, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9002755, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9002755, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9036065, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9036065, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9057598, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9057598, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9155520, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9155520, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9239266, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9239266, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9303940, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9303940, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9340970, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9340970, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9449448, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9449448, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9452361, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9452361, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9522674, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9522674, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9299251, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9299251, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9538179, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9538179, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9538180, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9538180, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9538181, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9538181, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9143825, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9143825, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9040183, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9040183, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9133295, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9133295, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 8934052, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 8934052, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9039737, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9039737, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9039813, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9039813, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9128655, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9128655, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9315394, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9315394, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9505836, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9505836, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9421937, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9421937, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9000124, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9000124, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9018102, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9018102, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9525276, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9525276, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 8909239, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 8909239, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9079612, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9079612, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9346222, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9346222, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9354229, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9354229, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9375033, ["Country", "Country_source"]] = fix_country_from_text("Portugal")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9375033, ["Country", "Country_source"]] = fix_country_from_text("Portugal")

df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == 9481037, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == 9481037, ["Country", "Country_source"]] = fix_country_from_text("United States")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_700000_base.loc[df_parsedX_700000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_700000_match_GLOBOCAN.loc[df_parsedX_700000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [141]:
df_parsedX_700000_base.to_csv(DF_input_base + "parsedX_700000.csv", index = False)
df_parsedX_700000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_700000.csv", index = False)

del df_parsedX_700000_base
del df_parsedX_700000_match_GLOBOCAN

### Correcting *parsedX_800000.csv*

In [171]:
# Read csv
df_parsedX_800000_base = pd.read_csv(DF_input_base + "parsedX_800000.csv")
df_parsedX_800000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_800000.csv")

In [172]:
## Replace found errors by true Country values

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 9589843, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 9589843, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 9714688, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 9714688, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 9720697, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 9720697, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 10084537, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 10084537, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 10219451, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 10219451, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 10235026, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 10235026, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 10369541, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 10369541, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 10495567, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 10495567, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 9639642, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 9639642, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 10474450, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 10474450, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 9683822, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 9683822, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 9919765, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 9919765, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 9929839, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 9929839, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 9637203, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 9637203, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 9810527, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 9810527, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 9875642, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 9875642, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 10472567, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 10472567, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 9635577, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 9635577, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 9855116, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 9855116, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 9870502, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 9870502, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 9872567, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 9872567, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 10030736, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 10030736, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 10232583, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 10232583, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 10408901, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 10408901, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 10426795, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 10426795, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 10478762, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 10478762, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 9615806, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 9615806, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 9676935, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 9676935, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 9683692, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 9683692, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 9815589, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 9815589, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 10048290, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 10048290, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == 10070433, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == 10070433, ["Country", "Country_source"]] = fix_country_from_text("Italy")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_800000_base.loc[df_parsedX_800000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_800000_match_GLOBOCAN.loc[df_parsedX_800000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [173]:
df_parsedX_800000_base.to_csv(DF_input_base + "parsedX_800000.csv", index = False)
df_parsedX_800000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_800000.csv", index = False)

del df_parsedX_800000_base
del df_parsedX_800000_match_GLOBOCAN

### Correcting *parsedX_900000.csv*

In [379]:
# Read csv
df_parsedX_900000_base = pd.read_csv(DF_input_base + "parsedX_900000.csv")
df_parsedX_900000_match_GLOBOCAN = pd.read_csv(DF_input_match_GLOBOCAN + "parsedX_900000.csv")

In [380]:
## Replace found errors by true Country values

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10791581, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10791581, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10609170, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10609170, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10624626, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10624626, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10632200, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10632200, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10807503, ["Country", "Country_source"]] = fix_country_from_text("Ireland")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10807503, ["Country", "Country_source"]] = fix_country_from_text("Ireland")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11026949, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11026949, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11084400, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11084400, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11173747, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11173747, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11176230, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11176230, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11129732, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11129732, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10795080, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10795080, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11092613, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11092613, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10528278, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10528278, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10584812, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10584812, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11148324, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11148324, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10595174, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10595174, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10605546, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10605546, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10749016, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10749016, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10571665, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10571665, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10597375, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10597375, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10767359, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10767359, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10771283, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10771283, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10837008, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10837008, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10905515, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10905515, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10948340, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10948340, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10962453, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10962453, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10975289, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10975289, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11159759, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11159759, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11163294, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11163294, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11237184, ["Country", "Country_source"]] = fix_country_from_text("Japan")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11237184, ["Country", "Country_source"]] = fix_country_from_text("Japan")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10529011, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10529011, ["Country", "Country_source"]] = fix_country_from_text("Russia10671616")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10671616, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10671616, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10746981, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10746981, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10835727, ["Country", "Country_source"]] = fix_country_from_text("Chile")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10835727, ["Country", "Country_source"]] = fix_country_from_text("Chile")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10954466, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10954466, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10971773, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10971773, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11016945, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11016945, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11046180, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11046180, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11050322, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11050322, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11136251, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11136251, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11159084, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11159084, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11166890, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11166890, ["Country", "Country_source"]] = fix_country_from_text("Netherlands")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11222188, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11222188, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11230668, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11230668, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11120332, ["Country", "Country_source"]] = fix_country_from_text("Uruguay")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11120332, ["Country", "Country_source"]] = fix_country_from_text("Uruguay")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11154886, ["Country", "Country_source"]] = fix_country_from_text("Belgium")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11154886, ["Country", "Country_source"]] = fix_country_from_text("Belgium")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 10785411, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 10785411, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == 11093805, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == 11093805, ["Country", "Country_source"]] = fix_country_from_text("France")

#df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_900000_base.loc[df_parsedX_900000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_900000_match_GLOBOCAN.loc[df_parsedX_900000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [381]:
df_parsedX_900000_base.to_csv(DF_input_base + "parsedX_900000.csv", index = False)
df_parsedX_900000_match_GLOBOCAN.to_csv(DF_input_match_GLOBOCAN + "parsedX_900000.csv", index = False)

del df_parsedX_900000_base
del df_parsedX_900000_match_GLOBOCAN

# Fixing articles first update

In [12]:
DF_input_first_update_base = input().strip()

  C:\Users\svalb\OneDrive\Escritorio\Data_40_years_cancer_studies\parsedXMLs_first_update_base\


In [13]:
DF_input_first_update_match_GLOBOCAN = input().strip()

  C:\Users\svalb\OneDrive\Escritorio\Data_40_years_cancer_studies\parsedXMLs_first_update_match_GLOBOCAN\


In [14]:
# Import list of csvs to parse
list_csvs_first_update_base = []

for file in os.listdir(DF_input_first_update_base):
    if file[-4:] == ".csv":
        list_csvs_first_update_base.append(file)

n_csvs_first_update_base = len(list_csvs_first_update_base)

In [15]:
# Import list of csvs to parse
list_csvs_first_update_match_GLOBOCAN = []

for file in os.listdir(DF_input_first_update_match_GLOBOCAN):
    if file[-4:] == ".csv":
        list_csvs_first_update_match_GLOBOCAN.append(file)

n_csvs_first_update_match_GLOBOCAN = len(list_csvs_first_update_match_GLOBOCAN)

In [10]:
# Global correction: Replace "Pitcairn" by "Italy" in Country. All assignments to Pitcairn correspond to italian research centers
# Base dataset
print("--Fixing base dataset")

for csv in list_csvs_first_update_base:
    print(f"Parsing csv: {csv} ({list_csvs_first_update_base.index(csv) + 1}/{n_csvs_first_update_base})")
    df = pd.read_csv(DF_input_first_update_base + csv)
    df.loc[df["Country"] == "Pitcairn", "Country"] = "Italy"
    df.to_csv(DF_input_first_update_base + csv, index = False)
    del df

print("--Fixing done for base dataset")

print("--Fixing match GLobocan dataset")

# Match Globocan dataset
for csv in list_csvs_first_update_match_GLOBOCAN:
    print(f"Parsing csv: {csv} ({list_csvs_first_update_match_GLOBOCAN.index(csv) + 1}/{n_csvs_first_update_match_GLOBOCAN})")
    df = pd.read_csv(DF_input_first_update_match_GLOBOCAN + csv)
    df.loc[df["Country"] == "Pitcairn", "Country"] = "Italy"
    df.to_csv(DF_input_first_update_match_GLOBOCAN + csv, index = False)
    del df

print("--Fixing done for match GLobocan dataset")

--Fixing base dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for base dataset
--Fixing match GLobocan dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for match GLobocan dataset


In [127]:
## Fix wrong Country assignment in Most_common_list_countries: China instead of Japan in records with Uchinada
fixSystematicMost_common_list_countries("base", DF_input_first_update_base, list_csvs_first_update_base, "China", "Uchinada", "Japan")
fixSystematicMost_common_list_countries("match GLobocan", DF_input_first_update_match_GLOBOCAN, list_csvs_first_update_match_GLOBOCAN, "China", "Uchinada", "Japan")

--Fixing base dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for base dataset
--Fixing match GLobocan dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for match GLobocan dataset


In [13]:
## Fix wrong Country assignment in Most_common_list_countries: Peru instead of Italy in records with Perugia
fixSystematicMost_common_list_countries("base", DF_input_first_update_base, list_csvs_first_update_base, "Peru", "Perugia", "Italy")
fixSystematicMost_common_list_countries("match GLobocan", DF_input_first_update_match_GLOBOCAN, list_csvs_first_update_match_GLOBOCAN, "Peru", "Perugia", "Italy")

--Fixing base dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for base dataset
--Fixing match GLobocan dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for match GLobocan dataset


In [132]:
## Fix wrong Country assignment in Most_common_list_countries: Peru instead of Brazil in records with PE

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "PE"
# Important: some records should be assinged to other countries. These are done manually below

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_first_update_base, list_csvs_first_update_base, "Peru", "PE", "Brazil")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_first_update_match_GLOBOCAN, list_csvs_first_update_match_GLOBOCAN, "Peru", "PE", "Brazil")

--Fixing base dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for base dataset
--Fixing match GLobocan dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for match GLobocan dataset


In [184]:
## Fix wrong Country assignment in Most_common_list_countries: United Kingdom instead of Sweden in records with Sweden

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "Sweden"

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_first_update_base, list_csvs_first_update_base, "United Kingdom", "Sweden", "Sweden")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_first_update_match_GLOBOCAN, list_csvs_first_update_match_GLOBOCAN, "United Kingdom", "Sweden", "Sweden")

--Fixing base dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for base dataset
--Fixing match GLobocan dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for match GLobocan dataset


In [186]:
## Fix wrong Country assignment in Most_common_list_countries: Denmark instead of United Kingdom in records with Denmark Hill

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "Denmark Hill"

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_first_update_base, list_csvs_first_update_base, "Denmark", "Denmark Hill", "United Kingdom")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_first_update_match_GLOBOCAN, list_csvs_first_update_match_GLOBOCAN, "Denmark", "Denmark Hill", "United Kingdom")

--Fixing base dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for base dataset
--Fixing match GLobocan dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for match GLobocan dataset


In [222]:
## Fix wrong Country assignment in Most_common_list_countries: Madagascar instead of Brazil in records with MG

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "MG"

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_first_update_base, list_csvs_first_update_base, "Madagascar", "MG", "Madagascar")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_first_update_match_GLOBOCAN, list_csvs_first_update_match_GLOBOCAN, "Madagascar", "Denmark Hill", "Madagascar")

--Fixing base dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for base dataset
--Fixing match GLobocan dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for match GLobocan dataset


In [305]:
## Fix wrong Country assignment in Most_common_list_countries: Mexico instead of Colombia in records with Col

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "Col"

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_first_update_base, list_csvs_first_update_base, "Colombia", "Col", "Mexico")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_first_update_match_GLOBOCAN, list_csvs_first_update_match_GLOBOCAN, "Colombia", "Col", "Mexico")

--Fixing base dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for base dataset
--Fixing match GLobocan dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for match GLobocan dataset


In [389]:
## Fix wrong Country assignment in Most_common_list_countries: Jamaica instead of Colombia in records with Jamaica

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "Jamaica"
# Important: some records should be assinged to other countries. These are done manually below

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_first_update_base, list_csvs_first_update_base, "Jamaica", "Jamaica", "United States")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_first_update_match_GLOBOCAN, list_csvs_first_update_match_GLOBOCAN, "Jamaica", "Jamaica", "United States")

--Fixing base dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for base dataset
--Fixing match GLobocan dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for match GLobocan dataset


In [400]:
## Fix wrong Country assignment in Most_common_list_countries: Spain instead of Brazil in records with ES

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "ES"
# Important: some records should be assinged to other countries. These are done manually below

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_first_update_base, list_csvs_first_update_base, "Spain", "ES", "Brazil")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_first_update_match_GLOBOCAN, list_csvs_first_update_match_GLOBOCAN, "Spain", "ES", "Brazil")

--Fixing base dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for base dataset
--Fixing match GLobocan dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for match GLobocan dataset


In [411]:
## Fix wrong Country assignment in Most_common_list_countries: Serbia instead of Brazil in records with RS

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "RS"

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_first_update_base, list_csvs_first_update_base, "Serbia", "RS", "Brazil")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_first_update_match_GLOBOCAN, list_csvs_first_update_match_GLOBOCAN, "Serbia", "RS", "Brazil")

--Fixing base dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for base dataset
--Fixing match GLobocan dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for match GLobocan dataset


In [413]:
## Fix wrong Country assignment in Most_common_list_countries: Paraguay instead of Argentina in records with Paraguay

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "Paraguay"
# Important: some records should be assinged to other countries. These are done manually below

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_first_update_base, list_csvs_first_update_base, "Paraguay", "Paraguay", "Argentina")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_first_update_match_GLOBOCAN, list_csvs_first_update_match_GLOBOCAN, "Paraguay", "Paraguay", "Argentina")

--Fixing base dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for base dataset
--Fixing match GLobocan dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for match GLobocan dataset


In [456]:
## Fix wrong Country assignment in Most_common_list_countries: Slovakia instead of Canada in records with SK

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "SK"
# Important: some records should be assinged to other countries. These are done manually below

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_first_update_base, list_csvs_first_update_base, "Slovakia", "SK", "Canada")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_first_update_match_GLOBOCAN, list_csvs_first_update_match_GLOBOCAN, "Slovakia", "SK", "Canada")

--Fixing base dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for base dataset
--Fixing match GLobocan dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for match GLobocan dataset


In [16]:
## Fix wrong Country assignment in Most_common_list_countries: Bosnia and Herzegovina instead of Brazil in records with BA

# Important: SUBSTRINGS not taken into consideration, corrected only if el["text"] == "BA"
# Important: some records should be assinged to other countries. These are done manually below

fixSystematicMost_common_list_countries_FULL_MATCH("base", DF_input_first_update_base, list_csvs_first_update_base, "Bosnia and Herzegovina", "BA", "Brazil")
fixSystematicMost_common_list_countries_FULL_MATCH("match GLobocan", DF_input_first_update_match_GLOBOCAN, list_csvs_first_update_match_GLOBOCAN, "Bosnia and Herzegovina", "BA", "Brazil")

--Fixing base dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for base dataset
--Fixing match GLobocan dataset
Parsing csv: parsedXMLs_first_upd_100000.csv (1/4)
Parsing csv: parsedXMLs_first_upd_200000.csv (2/4)
Parsing csv: parsedXMLs_first_upd_300000.csv (3/4)
Parsing csv: parsedXMLs_first_upd_340800.csv (4/4)
--Fixing done for match GLobocan dataset


### Correcting *parsedXMLs_first_upd_100000.csv*

In [458]:
# Read csv
df_parsedX_first_upd_100000_base = pd.read_csv(DF_input_first_update_base + "parsedXMLs_first_upd_100000.csv")
df_parsedX_first_upd_100000_match_GLOBOCAN = pd.read_csv(DF_input_first_update_match_GLOBOCAN + "parsedXMLs_first_upd_100000.csv")

In [459]:
## Replace found errors by true Country values

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38903136, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38903136, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 39208927, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 39208927, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38706720, ["Country", "Country_source"]] = fix_country_from_text("Peru")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38706720, ["Country", "Country_source"]] = fix_country_from_text("Peru")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38978891, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38978891, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 39255534, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 39255534, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 39034920, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 39034920, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38760945, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38760945, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 39278673, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 39278673, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38573078, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38573078, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38926485, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38926485, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 39123429, ["Country", "Country_source"]] = fix_country_from_text("France")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 39123429, ["Country", "Country_source"]] = fix_country_from_text("France")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 39134785, ["Country", "Country_source"]] = fix_country_from_text("Germany")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 39134785, ["Country", "Country_source"]] = fix_country_from_text("Germany")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 39282046, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 39282046, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38750928, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38750928, ["Country", "Country_source"]] = fix_country_from_text("United Kingdom")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 39057114, ["Country", "Country_source"]] = fix_country_from_text("Aruba")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 39057114, ["Country", "Country_source"]] = fix_country_from_text("Aruba")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38316056, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38316056, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38950950, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38950950, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38865548, ["Country", "Country_source"]] = fix_country_from_text("Brazil")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38865548, ["Country", "Country_source"]] = fix_country_from_text("Brazil")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 36870038, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 36870038, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38367302, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38367302, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38893249, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38893249, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38893269, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38893269, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38927941, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38927941, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38959852, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38959852, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 39096575, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 39096575, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 39136900, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 39136900, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 39160361, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 39160361, ["Country", "Country_source"]] = fix_country_from_text("Switzerland")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 39216338, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 39216338, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 39254913, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 39254913, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38700502, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38700502, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 39117800, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 39117800, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 39141184, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 39141184, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38877575, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38877575, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38460899, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38460899, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38996544, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38996544, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 38549535, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 38549535, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == 39227791, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == 39227791, ["Country", "Country_source"]] = fix_country_from_text("India")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_100000_base.loc[df_parsedX_first_upd_100000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_100000_match_GLOBOCAN.loc[df_parsedX_first_upd_100000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [460]:
df_parsedX_first_upd_100000_base.to_csv(DF_input_first_update_base + "parsedXMLs_first_upd_100000.csv", index = False)
df_parsedX_first_upd_100000_match_GLOBOCAN.to_csv(DF_input_first_update_match_GLOBOCAN + "parsedXMLs_first_upd_100000.csv", index = False)

del df_parsedX_first_upd_100000_base
del df_parsedX_first_upd_100000_match_GLOBOCAN

### Correcting *parsedXMLs_first_upd_200000.csv*

In [385]:
# Read csv
df_parsedX_first_upd_200000_base = pd.read_csv(DF_input_first_update_base + "parsedXMLs_first_upd_200000.csv")
df_parsedX_first_upd_200000_match_GLOBOCAN = pd.read_csv(DF_input_first_update_match_GLOBOCAN + "parsedXMLs_first_upd_200000.csv")

In [386]:
## Replace found errors by true Country values

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39898787, ["Country", "Country_source"]] = fix_country_from_text("The Netherlands")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39898787, ["Country", "Country_source"]] = fix_country_from_text("The Netherlands")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39524161, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39524161, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39458985, ["Country", "Country_source"]] = fix_country_from_text("Puerto Rico")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39458985, ["Country", "Country_source"]] = fix_country_from_text("Puerto Rico")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39602041, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39602041, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39772235, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39772235, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39753058, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39753058, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39898506, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39898506, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39327927, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39327927, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39529216, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39529216, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39847205, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39847205, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39856614, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39856614, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39341758, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39341758, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39427504, ["Country", "Country_source"]] = fix_country_from_text("Peru")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39427504, ["Country", "Country_source"]] = fix_country_from_text("Peru")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39435289, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39435289, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39477439, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39477439, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39747500, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39747500, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39964812, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39964812, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39330043, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39330043, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39337432, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39337432, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39362046, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39362046, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39376718, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39376718, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39456534, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39456534, ["Country", "Country_source"]] = fix_country_from_text("United States")

# Taiwan in the base, China in the match GLOBOCAN
df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39482661, ["Country", "Country_source"]] = fix_country_from_text("Taiwan")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39482661, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39495097, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39495097, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39516395, ["Country", "Country_source"]] = fix_country_from_text("Colombia")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39516395, ["Country", "Country_source"]] = fix_country_from_text("Colombia")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39586872, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39586872, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39598488, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39598488, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39659903, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39659903, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39675307, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39675307, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39694930, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39694930, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39769179, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39769179, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39769315, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39769315, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39809982, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39809982, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39931407, ["Country", "Country_source"]] = fix_country_from_text("Vietnam")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39931407, ["Country", "Country_source"]] = fix_country_from_text("Vietnam")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39958565, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39958565, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39607898, ["Country", "Country_source"]] = fix_country_from_text("Canada")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39607898, ["Country", "Country_source"]] = fix_country_from_text("Canada")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39448861, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39448861, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39728071, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39728071, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39901923, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39901923, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39922814, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39922814, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39961598, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39961598, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == 39826875, ["Country", "Country_source"]] = fix_country_from_text("The Netherlands")
df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == 39826875, ["Country", "Country_source"]] = fix_country_from_text("The Netherlands")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_200000_base.loc[df_parsedX_first_upd_200000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_200000_match_GLOBOCAN.loc[df_parsedX_first_upd_200000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [387]:
df_parsedX_first_upd_200000_base.to_csv(DF_input_first_update_base + "parsedXMLs_first_upd_200000.csv", index = False)
df_parsedX_first_upd_200000_match_GLOBOCAN.to_csv(DF_input_first_update_match_GLOBOCAN + "parsedXMLs_first_upd_200000.csv", index = False)

del df_parsedX_first_upd_200000_base
del df_parsedX_first_upd_200000_match_GLOBOCAN

### Correcting *parsedXMLs_first_upd_300000.csv*

In [461]:
# Read csv
df_parsedX_first_upd_300000_base = pd.read_csv(DF_input_first_update_base + "parsedXMLs_first_upd_300000.csv")
df_parsedX_first_upd_300000_match_GLOBOCAN = pd.read_csv(DF_input_first_update_match_GLOBOCAN + "parsedXMLs_first_upd_300000.csv")

In [462]:
## Replace found errors by true Country values

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40036840, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40036840, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40220908, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40220908, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40159468, ["Country", "Country_source"]] = fix_country_from_text("Ecuador")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40159468, ["Country", "Country_source"]] = fix_country_from_text("Ecuador")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40209461, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40209461, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40225770, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40225770, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40270185, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40270185, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40481983, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40481983, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40198273, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40198273, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40625142, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40625142, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40639449, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40639449, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40544104, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40544104, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40155249, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40155249, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40167786, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40167786, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40182053, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40182053, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40304827, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40304827, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40591208, ["Country", "Country_source"]] = fix_country_from_text("China")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40591208, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40370481, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40370481, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40385535, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40385535, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40479529, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40479529, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40002220, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40002220, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40076625, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40076625, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40121148, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40121148, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40141790, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40141790, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40163700, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40163700, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40205400, ["Country", "Country_source"]] = fix_country_from_text("Iran")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40205400, ["Country", "Country_source"]] = fix_country_from_text("Iran")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40218878, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40218878, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40357025, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40357025, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40403699, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40403699, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40430856, ["Country", "Country_source"]] = fix_country_from_text("Russia")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40430856, ["Country", "Country_source"]] = fix_country_from_text("Russia")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40443068, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40443068, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40529157, ["Country", "Country_source"]] = fix_country_from_text("Puerto Rico")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40529157, ["Country", "Country_source"]] = fix_country_from_text("Puerto Rico")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40537682, ["Country", "Country_source"]] = fix_country_from_text("Egypt")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40537682, ["Country", "Country_source"]] = fix_country_from_text("Egypt")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40561495, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40561495, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40600509, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40600509, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40603713, ["Country", "Country_source"]] = fix_country_from_text("The Netherlands")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40603713, ["Country", "Country_source"]] = fix_country_from_text("The Netherlands")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40629205, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40629205, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40647492, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40647492, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40651100, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40651100, ["Country", "Country_source"]] = fix_country_from_text("United States")

# Taiwan in base, China in match GLOBOCAN
df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40637905, ["Country", "Country_source"]] = fix_country_from_text("Taiwan")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40637905, ["Country", "Country_source"]] = fix_country_from_text("China")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40266025, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40266025, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40275878, ["Country", "Country_source"]] = fix_country_from_text("Australia")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40275878, ["Country", "Country_source"]] = fix_country_from_text("Australia")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40412491, ["Country", "Country_source"]] = fix_country_from_text("Italy")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40412491, ["Country", "Country_source"]] = fix_country_from_text("Italy")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40156762, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40156762, ["Country", "Country_source"]] = fix_country_from_text("Paraguay")

df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == 40157548, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == 40157548, ["Country", "Country_source"]] = fix_country_from_text("India")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_300000_base.loc[df_parsedX_first_upd_300000_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_300000_match_GLOBOCAN.loc[df_parsedX_first_upd_300000_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [463]:
df_parsedX_first_upd_300000_base.to_csv(DF_input_first_update_base + "parsedXMLs_first_upd_300000.csv", index = False)
df_parsedX_first_upd_300000_match_GLOBOCAN.to_csv(DF_input_first_update_match_GLOBOCAN + "parsedXMLs_first_upd_300000.csv", index = False)

del df_parsedX_first_upd_300000_base
del df_parsedX_first_upd_300000_match_GLOBOCAN

### Correcting *parsedXMLs_first_upd_340800.csv*

In [447]:
# Read csv
df_parsedX_first_upd_340800_base = pd.read_csv(DF_input_first_update_base + "parsedXMLs_first_upd_340800.csv")
df_parsedX_first_upd_340800_match_GLOBOCAN = pd.read_csv(DF_input_first_update_match_GLOBOCAN + "parsedXMLs_first_upd_340800.csv")

In [448]:
## Replace found errors by true Country values

df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == 40828257, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == 40828257, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == 40866457, ["Country", "Country_source"]] = fix_country_from_text("Peru")
df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == 40866457, ["Country", "Country_source"]] = fix_country_from_text("Peru")

df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == 40747175, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == 40747175, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == 40771078, ["Country", "Country_source"]] = fix_country_from_text("India")
df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == 40771078, ["Country", "Country_source"]] = fix_country_from_text("India")

df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == 40755327, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == 40755327, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == 40826981, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")
df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == 40826981, ["Country", "Country_source"]] = fix_country_from_text("Slovakia")

df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == 40833671, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == 40833671, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == 40949483, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == 40949483, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == 40717186, ["Country", "Country_source"]] = fix_country_from_text("The Netherlands")
df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == 40717186, ["Country", "Country_source"]] = fix_country_from_text("The Netherlands")

df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == 40764035, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == 40764035, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == 40789232, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == 40789232, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == 40862778, ["Country", "Country_source"]] = fix_country_from_text("Poland")
df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == 40862778, ["Country", "Country_source"]] = fix_country_from_text("Poland")

df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == 40867554, ["Country", "Country_source"]] = fix_country_from_text("United States")
df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == 40867554, ["Country", "Country_source"]] = fix_country_from_text("United States")

df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == 40940186, ["Country", "Country_source"]] = fix_country_from_text("Spain")
df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == 40940186, ["Country", "Country_source"]] = fix_country_from_text("Spain")

df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == 40834990, ["Country", "Country_source"]] = fix_country_from_text("Argentina")
df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == 40834990, ["Country", "Country_source"]] = fix_country_from_text("Argentina")

#df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

#df_parsedX_first_upd_340800_base.loc[df_parsedX_first_upd_340800_base["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")
#df_parsedX_first_upd_340800_match_GLOBOCAN.loc[df_parsedX_first_upd_340800_match_GLOBOCAN["PMID"] == , ["Country", "Country_source"]] = fix_country_from_text("")

In [449]:
df_parsedX_first_upd_340800_base.to_csv(DF_input_first_update_base + "parsedXMLs_first_upd_340800.csv", index = False)
df_parsedX_first_upd_340800_match_GLOBOCAN.to_csv(DF_input_first_update_match_GLOBOCAN + "parsedXMLs_first_upd_340800.csv", index = False)

del df_parsedX_first_upd_340800_base
del df_parsedX_first_upd_340800_match_GLOBOCAN