In [3]:
!pip install -q geopy tqdm
!pip install -q pycountry_convert



In [4]:
import pandas as pd

In [5]:
df = pd.read_csv('df_1.csv')
df

Unnamed: 0.1,Unnamed: 0,Entity,Year,Records,Organization type,Method,Sources
0,0,21st Century Oncology,2016,2200000,healthcare,hacked,[5][6]
1,1,500px,2020,14870304,social networking,hacked,[7]
2,2,Accendo Insurance Co.,2020,175350,healthcare,poor security,[8][9]
3,3,Adobe Systems Incorporated,2013,152000000,tech,hacked,[10]
4,4,Adobe Inc.,2019,7500000,tech,poor security,[11][12]
...,...,...,...,...,...,...,...
347,347,Zynga,2019,173000000,social network,hacked,[406][407]
348,348,Unknown agency(believed to be tied to United S...,2020,200000000,financial,accidentally published,[408]
349,349,National Health Information Center (NCZI) of S...,2020,391250,healthcare,poor security,[409]
350,350,50 companies and government institutions,2022,6400000,various,poor security,[410] [411]


# Create a Country feature from the Entity feature
- Will use the Nominatim free api in a function that takes an organization/company name and returns the country

In [6]:
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
import time

geolocator = Nominatim(user_agent="data_breach_locator")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)  # API polite usage

def entity_to_country(entity):
    try:
        # Add “company headquarters” to improve accuracy
        query = f"{entity} headquarters"
        location = geocode(query)

        if location is None:
            # fallback: try entity name only
            location = geocode(entity)

        if location:
            # country is usually the last part of the address
            return location.address.split(",")[-1].strip()

        return None
    except Exception as e:
        return None


In [8]:
from tqdm import tqdm

entities = df['Entity'].unique().tolist()

entity_country_map = {}

for e in tqdm(entities):
    country = entity_to_country(e)
    entity_country_map[e] = country


  3%|▎         | 10/331 [00:20<08:15,  1.54s/it]RateLimiter caught an error, retrying (0/2 tries). Called with (*('Amazon Japan G.K. headquarters',), **{}).
Traceback (most recent call last):
  File "c:\Users\mat\anaconda3\Lib\site-packages\urllib3\connectionpool.py", line 536, in _make_request
    response = conn.getresponse()
               ^^^^^^^^^^^^^^^^^^
  File "c:\Users\mat\anaconda3\Lib\site-packages\urllib3\connection.py", line 464, in getresponse
    httplib_response = super().getresponse()
                       ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\mat\anaconda3\Lib\http\client.py", line 1428, in getresponse
    response.begin()
  File "c:\Users\mat\anaconda3\Lib\http\client.py", line 331, in begin
    version, status, reason = self._read_status()
                              ^^^^^^^^^^^^^^^^^^^
  File "c:\Users\mat\anaconda3\Lib\http\client.py", line 292, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
               ^^^^^^^^^^^^^^^^^^^^^^^^

In [9]:
loc_df = pd.DataFrame(list(entity_country_map.items()), 
                      columns=['Entity', 'Country'])
loc_df.head()


Unnamed: 0,Entity,Country
0,21st Century Oncology,United States of America
1,500px,
2,Accendo Insurance Co.,
3,Adobe Systems Incorporated,
4,Adobe Inc.,United States of America


In [10]:
df = df.merge(loc_df, on="Entity", how="left")


# Add Continent

In [11]:
import pycountry_convert as pc

def country_to_continent(country):
    try:
        code = pc.country_name_to_country_alpha2(country)
        cont_code = pc.country_alpha2_to_continent_code(code)
        return pc.convert_continent_code_to_continent_name(cont_code)
    except:
        return "Unknown"

df['Continent'] = df['Country'].apply(
    lambda x: country_to_continent(x) if pd.notnull(x) else "Unknown"
)


In [12]:
unknowns = df[df['Country'].isna() | (df['Continent'] == 'Unknown')]['Entity'].unique()
unknowns


array(['500px', 'Accendo Insurance Co.', 'Adobe Systems Incorporated',
       'AerServ (subsidiary of InMobi)', 'Affinity Health Plan, Inc.',
       'Amazon Japan G.K.', 'Ancestry.com', 'Animal Jam',
       'Ankle & Foot Center of Tampa Bay, Inc.', 'Apple, Inc./BlueToad',
       'Apple Health Medicaid', 'Atraf', 'Auction.co.kr',
       'Australian Immigration Department', 'Automatic Data Processing',
       'AvMed, Inc.', 'Benesse', 'Betfair', 'Betsson Group',
       'Blank Media Games', 'Blizzard Entertainment', 'BMO and Simplii',
       '2018 British Airways cyberattack',
       '2019 Bulgarian revenue agency hack',
       'California Department of Child Support Services', 'Canva',
       'CardSystems Solutions Inc. (MasterCard, Visa, Discover Financial Services and American Express)',
       'Cathay Pacific Airways',
       'CareFirst BlueCross Blue Shield - Maryland',
       'CheckFree Corporation', 'CheckPeople',
       'China Software Developer Network',
       'Chinese gaming we

## Batch A

In [13]:
# Run this ONCE at the top of your mapping section
df['Country'] = None
df['Continent'] = None

batch_a_mapping = {
    "21st Century Oncology": ("United States", "North America"),
    "500px": ("Canada", "North America"),
    "Accendo Insurance Co.": ("United States", "North America"),
    "Adobe Systems Incorporated": ("United States", "North America"),
    "Adobe Inc.": ("United States", "North America"),
    "Advocate Medical Group": ("United States", "North America"),
    "AerServ (subsidiary of InMobi)": ("United States", "North America"),
    "Affinity Health Plan, Inc.": ("United States", "North America"),
    "Airtel": ("India", "Asia"),
    "Air Canada": ("Canada", "North America"),
    "Amazon Japan G.K.": ("Japan", "Asia"),
    "TD Ameritrade": ("United States", "North America"),
    "Ancestry.com": ("United States", "North America"),
    "Animal Jam": ("United States", "North America"),
    "Ankle & Foot Center of Tampa Bay, Inc.": ("United States", "North America"),
    "Anthem Inc.": ("United States", "North America"),
    "AOL": ("United States", "North America"),
    "Apple, Inc./BlueToad": ("United States", "North America"),
    "Apple": ("United States", "North America"),
    "Apple Health Medicaid": ("United States", "North America"),
    "Ashley Madison": ("Canada", "North America"),
    "AT&T": ("United States", "North America"),
    "Atraf": ("Israel", "Asia"),
    "Auction.co.kr": ("South Korea", "Asia"),
    "Australian Immigration Department": ("Australia", "Oceania"),
    "Australian National University": ("Australia", "Oceania"),
    "Automatic Data Processing": ("United States", "North America"),
    "AvMed, Inc.": ("United States", "North America"),
    "Bailey's Inc.": ("United States", "North America"),
    "The Bank of New York Mellon": ("United States", "North America"),
    "Bank of America": ("United States", "North America"),
    "Barnes & Noble": ("United States", "North America"),
    "Bell Canada": ("Canada", "North America"),
    "Benesse": ("Japan", "Asia"),
    "Betfair": ("United Kingdom", "Europe"),
    "Bethesda Game Studios": ("United States", "North America"),
    "Betsson Group": ("Malta", "Europe"),
    "Blank Media Games": ("United States", "North America"),
    "Blizzard Entertainment": ("United States", "North America"),
    "BlueCross BlueShield of Tennessee": ("United States", "North America"),
    "BMO and Simplii": ("Canada", "North America"),
    "2018 British Airways cyberattack": ("United Kingdom", "Europe"),
    "British Airways": ("United Kingdom", "Europe"),
    "2019 Bulgarian revenue agency hack": ("Bulgaria", "Europe"),
    "California Department of Child Support Services": ("United States", "North America"),
    "Canva": ("Australia", "Oceania"),
    "Capcom": ("Japan", "Asia"),
    "Capital One": ("United States", "North America"),
    "CardSystems Solutions Inc. (MasterCard, Visa, Discover Financial Services and American Express)": ("United States", "North America"),
    "Cathay Pacific Airways": ("Hong Kong", "Asia"),
    "CareFirst BlueCross Blue Shield - Maryland": ("United States", "North America"),
    "Central Coast Credit Union": ("Australia", "Oceania"),
    "Central Hudson Gas & Electric": ("United States", "North America"),
    "CheckFree Corporation": ("United States", "North America"),
    "CheckPeople": ("United States", "North America"),
    "China Software Developer Network": ("China", "Asia"),
    "Chinese gaming websites (three: Duowan, 7K7K, 178.com)": ("China", "Asia"),
    "Citigroup": ("United States", "North America"),
    "City and Hackney Teaching Primary Care Trust": ("United Kingdom", "Europe"),
    "Clearview AI": ("United States", "North America"),
    "Colorado government": ("United States", "North America"),
    "Community Health Systems": ("United States", "North America"),
    "Compass Bank": ("United States", "North America"),
    "Countrywide Financial Corp": ("United States", "North America"),
    "Centers for Medicare & Medicaid Services": ("United States", "North America"),
    "Cox Communications": ("United States", "North America"),
    "Crescent Health Inc., Walgreens": ("United States", "North America"),
    "CVS": ("United States", "North America"),
    "CyberServe": ("United Kingdom", "Europe"),
    "Dai Nippon Printing": ("Japan", "Asia"),
    "Data Processors International (MasterCard, Visa, Discover Financial Services and American Express)": ("United States", "North America"),
    "Defense Integrated Data Center (South Korea)": ("South Korea", "Asia"),
    "Dedalus": ("Italy", "Europe"),
    "Deloitte": ("United Kingdom", "Europe"),
    "US Department of Homeland Security": ("United States", "North America"),
    "Desjardins": ("Canada", "North America"),
    "Domino's Pizza (France)": ("France", "Europe"),
    "DoorDash": ("United States", "North America"),
    "UK Driving Standards Agency": ("United Kingdom", "Europe"),
    "Dropbox": ("United States", "North America"),
}

for entity, (country, continent) in batch_a_mapping.items():
    df.loc[df['Entity'] == entity, ['Country', 'Continent']] = [country, continent]

df[df['Entity'].isin(batch_a_mapping.keys())][['Entity', 'Country', 'Continent']].head(20)




Unnamed: 0,Entity,Country,Continent
0,21st Century Oncology,United States,North America
1,500px,Canada,North America
2,Accendo Insurance Co.,United States,North America
3,Adobe Systems Incorporated,United States,North America
4,Adobe Inc.,United States,North America
5,Advocate Medical Group,United States,North America
6,AerServ (subsidiary of InMobi),United States,North America
7,"Affinity Health Plan, Inc.",United States,North America
8,Airtel,India,Asia
9,Air Canada,Canada,North America


## Batch B

In [15]:
batch_b_mapping = {
    "Drupal": ("United States", "North America"),
    "DSW Inc.": ("United States", "North America"),
    "Dubsmash": ("United States", "North America"),
    "Dun & Bradstreet": ("United States", "North America"),
    "EasyJet": ("United Kingdom", "Europe"),
    "eBay": ("United States", "North America"),
    "Earl Enterprises(Buca di Beppo, Earl of Sandwich, Planet Hollywood,Chicken Guy, Mixology, Tequila Taqueria)": ("United States", "North America"),
    "Educational Credit Management Corporation": ("United States", "North America"),
    "Eisenhower Medical Center": ("United States", "North America"),
    "ElasticSearch": ("United States", "North America"),
    "Embassy Cables": ("Multiple Countries", "Global"),
    "Emergency Healthcare Physicians, Ltd.": ("United States", "North America"),
    "Emory Healthcare": ("United States", "North America"),
    "Equifax": ("United States", "North America"),
    "European Central Bank": ("Germany", "Europe"),
    "Evernote": ("United States", "North America"),
    "Exactis": ("United States", "North America"),
    "Excellus BlueCross BlueShield": ("United States", "North America"),
    "Experian - T-Mobile US": ("United States", "North America"),
    "EyeWire": ("United States", "North America"),
    "Facebook": ("United States", "North America"),
    "Fast Retailing": ("Japan", "Asia"),
    "Federal Reserve Bank of Cleveland": ("United States", "North America"),
    "Fidelity National Information Services": ("United States", "North America"),
    "First American Corporation": ("United States", "North America"),
    "FireEye": ("United States", "North America"),
    "Florida Department of Juvenile Justice": ("United States", "North America"),
    "Friend Finder Networks": ("United States", "North America"),
    "Funimation": ("United States", "North America"),
    "Formspring": ("United States", "North America"),
    "Unknown": ("Unknown", "Unknown"),
    "Gamigo": ("Germany", "Europe"),
    "Gap Inc.": ("United States", "North America"),
    "Gawker": ("United States", "North America"),
    "Global Payments": ("United States", "North America"),
    "Gmail": ("United States", "North America"),
    "Google Plus": ("United States", "North America"),
    "Greek government": ("Greece", "Europe"),
    "Grozio Chirurgija": ("Lithuania", "Europe"),
    "GS Caltex": ("South Korea", "Asia"),
    "Gyft": ("United States", "North America"),
    "Hannaford Brothers Supermarket Chain": ("United States", "North America"),
    "HauteLook": ("United States", "North America"),
    "Health Net": ("United States", "North America"),
    "Health Net — IBM": ("United States", "North America"),
    "Health Sciences Authority (Singapore)": ("Singapore", "Asia"),
    "Health Service Executive": ("Ireland", "Europe"),
    "Heartland": ("United States", "North America"),
    "Heathrow Airport": ("United Kingdom", "Europe"),
    "Hewlett Packard": ("United States", "North America"),
    "Hilton Hotels": ("United States", "North America"),
    "Home Depot": ("United States", "North America"),
    "Honda Canada": ("Canada", "North America"),
    "Hyatt Hotels": ("United States", "North America"),
    "Iberdrola": ("Spain", "Europe"),
    "Instagram": ("United States", "North America"),
    "Internal Revenue Service": ("United States", "North America"),
    "International Committee of the Red Cross": ("Switzerland", "Europe"),
    "Inuvik hospital": ("Canada", "North America"),
    "Iranian banks (three: Saderat, Eghtesad Novin, and Saman)": ("Iran", "Asia"),
    "Japan Pension Service": ("Japan", "Asia"),
    "Japanet Takata": ("Japan", "Asia"),
    "Jefferson County, West Virginia": ("United States", "North America"),
    "JP Morgan Chase": ("United States", "North America"),
    "Justdial": ("India", "Asia"),
    "KDDI": ("Japan", "Asia"),
    "Kirkwood Community College": ("United States", "North America"),
    "KM.RU": ("Russia", "Europe"),
    "Koodo Mobile": ("Canada", "North America"),
    "Korea Credit Bureau": ("South Korea", "Asia"),
    "Kroll Background America": ("United States", "North America"),
    "KT Corporation": ("South Korea", "Asia"),
    "LexisNexis": ("United States", "North America"),
    "Landry's, Inc.": ("United States", "North America"),
    "Les Éditions Protégez-vous": ("Canada", "North America"),
    "LifeLabs": ("Canada", "North America"),
    "Lincoln Medical & Mental Health Center": ("United States", "North America"),
    "LinkedIn, eHarmony, Last.fm": ("Multiple Countries", "Global"),
    "Living Social": ("United States", "North America"),
    "MacRumors.com": ("United States", "North America"),
    "Mandarin Oriental Hotels": ("Hong Kong", "Asia"),
    "Marriott International": ("United States", "North America"),
    "Massachusetts Government": ("United States", "North America")
}

for entity, (country, continent) in batch_b_mapping.items():
    df.loc[df['Entity'] == entity, ['Country', 'Continent']] = [country, continent]

df[df['Entity'].isin(batch_b_mapping.keys())][['Entity', 'Country', 'Continent']].head(20)


Unnamed: 0,Entity,Country,Continent
90,Drupal,United States,North America
91,DSW Inc.,United States,North America
92,Dubsmash,United States,North America
93,Dun & Bradstreet,United States,North America
94,EasyJet,United Kingdom,Europe
95,eBay,United States,North America
96,"Earl Enterprises(Buca di Beppo, Earl of Sandwi...",United States,North America
97,Educational Credit Management Corporation,United States,North America
98,Eisenhower Medical Center,United States,North America
99,ElasticSearch,United States,North America


## Batch C

In [20]:
batch_c_mapping = {
    "Massive American business hack including 7-Eleven and Nasdaq": ("Multiple Countries", "Global"),
    "US Medicaid": ("United States", "North America"),
    "Medical Informatics Engineering": ("United States", "North America"),
    "Memorial Healthcare System": ("United States", "North America"),
    "Michaels": ("United States", "North America"),
    "Microsoft": ("United States", "North America"),
    "Microsoft Exchange servers": ("United States", "North America"),
    "Militarysingles.com": ("United States", "North America"),
    "Ministry of Education (Chile)": ("Chile", "South America"),
    "Ministry of Health (Singapore)": ("Singapore", "Asia"),
    "Mitsubishi Tokyo UFJ Bank": ("Japan", "Asia"),
    "MongoDB": ("United States", "North America"),
    "Monster.com": ("United States", "North America"),
    "Morgan Stanley Smith Barney": ("United States", "North America"),
    "Morinaga Confectionery": ("Japan", "Asia"),
    "Mozilla": ("United States", "North America"),
    "MyHeritage": ("Israel", "Asia"),
    "NASDAQ": ("United States", "North America"),
    "Natural Grocers": ("United States", "North America"),
    "NEC Networks, LLC": ("United States", "North America"),
    "Neiman Marcus": ("United States", "North America"),
    "Nemours Foundation": ("United States", "North America"),
    "Network Solutions": ("United States", "North America"),
    "New York City Health & Hospitals Corp.": ("United States", "North America"),
    "New York State Electric & Gas": ("United States", "North America"),
    "New York Taxis": ("United States", "North America"),
    "Nexon Korea Corp": ("South Korea", "Asia"),
    "NHS": ("United Kingdom", "Europe"),
    "Nintendo (Club Nintendo)": ("Japan", "Asia"),
    "Nintendo (Nintendo Account)": ("Japan", "Asia"),
    "Nippon Television": ("Japan", "Asia"),
    "Nival Networks": ("Russia", "Europe"),
    "Norwegian Tax Administration": ("Norway", "Europe"),
    "Now:Pensions": ("United Kingdom", "Europe"),
    "Ofcom": ("United Kingdom", "Europe"),
    "US Office of Personnel Management": ("United States", "North America"),
    "Office of the Texas Attorney General": ("United States", "North America"),
    "Ohio State University": ("United States", "North America"),
    "Orbitz": ("United States", "North America"),
    "Oregon Department of Transportation": ("United States", "North America"),
    "OVH": ("France", "Europe"),
    "Patreon": ("United States", "North America"),
    "PayPay": ("Japan", "Asia"),
    "Popsugar": ("United States", "North America"),
    "Premera": ("United States", "North America"),
    "Puerto Rico Department of Health": ("United States", "North America"),
    "Quest Diagnostics": ("United States", "North America"),
    "Quora": ("United States", "North America"),
    "Rakuten": ("Japan", "Asia"),
    "Rambler.ru": ("Russia", "Europe"),
    "RBS Worldpay": ("United Kingdom", "Europe"),
    "Reddit": ("United States", "North America"),
    "Restaurant Depot": ("United States", "North America"),
    "RockYou!": ("United States", "North America"),
    "Rosen Hotels": ("United States", "North America"),
    "Sakai City, Japan": ("Japan", "Asia"),
    "San Francisco Public Utilities Commission": ("United States", "North America"),
    "Scottrade": ("United States", "North America"),
    "Scribd": ("United States", "North America"),
    "Seacoast Radiology, PA": ("United States", "North America"),
    "Sega": ("Japan", "Asia"),
    "Service Personnel and Veterans Agency (UK)": ("United Kingdom", "Europe"),
    "ShopBack": ("Singapore", "Asia"),
    "SingHealth": ("Singapore", "Asia"),
    "Slack": ("United States", "North America"),
    "SlickWraps": ("United States", "North America"),
    "SnapChat": ("United States", "North America"),
    "SolarWinds": ("United States", "North America"),
    "Sony Online Entertainment": ("United States", "North America"),
    "Sony Pictures": ("United States", "North America"),
    "Sony PlayStation Network": ("Japan", "Asia"),
    "South Africa police": ("South Africa", "Africa"),
    "South Carolina Government": ("United States", "North America"),
    "South Shore Hospital, Massachusetts": ("United States", "North America"),
    "Southern California Medical-Legal Consultants": ("United States", "North America"),
    "Spartanburg Regional Healthcare System": ("United States", "North America"),
    "Stanford University": ("United States", "North America"),
    "Starbucks": ("United States", "North America"),
    "Starwoodincluding Westin Hotels & Resorts and Sheraton Hotels and Resorts": ("United States", "North America"),
    "State of Texas": ("United States", "North America"),
    "Steam": ("United States", "North America"),
    "StockX": ("United States", "North America"),
    "Stratfor": ("United States", "North America"),
    "Supervalu": ("United States", "North America"),
    "Sutter Medical Center": ("United States", "North America"),
    "Syrian government (Syria Files)": ("Syria", "Asia")
}

for entity, (country, continent) in batch_c_mapping.items():
    df.loc[df['Entity'] == entity, ['Country', 'Continent']] = [country, continent]

df[df['Entity'].isin(batch_c_mapping.keys())][['Entity', 'Country', 'Continent']].head(20)

Unnamed: 0,Entity,Country,Continent
179,Massive American business hack including 7-Ele...,Multiple Countries,Global
180,US Medicaid,United States,North America
181,Medical Informatics Engineering,United States,North America
182,Memorial Healthcare System,United States,North America
183,Michaels,United States,North America
184,Microsoft,United States,North America
185,Microsoft Exchange servers,United States,North America
186,Militarysingles.com,United States,North America
187,Ministry of Education (Chile),Chile,South America
188,Ministry of Health (Singapore),Singapore,Asia


## Batch D

In [21]:
batch_d_mapping = {
    "Taobao": ("China", "Asia"),
    "Taringa!": ("Argentina", "South America"),
    "Target Corporation": ("United States", "North America"),
    "TaxSlayer.com": ("United States", "North America"),
    "TD Bank": ("United States", "North America"),
    "TerraCom & YourTel": ("United States", "North America"),
    "Tetrad": ("United States", "North America"),
    "Texas Lottery": ("United States", "North America"),
    "Ticketfly (subsidiary of Eventbrite)": ("United States", "North America"),
    "Tianya Club": ("China", "Asia"),
    "TikTok": ("China", "Asia"),
    "TK / TJ Maxx": ("United States", "North America"),
    "T-Mobile, Deutsche Telekom": ("Germany", "Europe"),
    "T-Mobile": ("United States", "North America"),
    "Tricare": ("United States", "North America"),
    "Triple-S Salud, Inc.": ("United States", "North America"),
    "Truecaller": ("Sweden", "Europe"),
    "Trump Hotels": ("United States", "North America"),
    "Tumblr": ("United States", "North America"),
    "Twitch": ("United States", "North America"),
    "Twitter": ("United States", "North America"),
    "Typeform": ("Spain", "Europe"),
    "Uber": ("United States", "North America"),
    "Ubisoft": ("France", "Europe"),
    "Ubuntu": ("United Kingdom", "Europe"),
    "UCLA Medical Center, Santa Monica": ("United States", "North America"),
    "UK Home Office": ("United Kingdom", "Europe"),
    "UK Ministry of Defence": ("United Kingdom", "Europe"),
    "UK Revenue & Customs": ("United Kingdom", "Europe"),
    "Universiti Teknologi MARA": ("Malaysia", "Asia"),
    "Under Armour": ("United States", "North America"),
    "University of California, Berkeley": ("United States", "North America"),
    "University of Maryland, College Park": ("United States", "North America"),
    "University of Central Florida": ("United States", "North America"),
    "University of Miami": ("United States", "North America"),
    "University of Utah Hospital & Clinics": ("United States", "North America"),
    "University of Wisconsin–Milwaukee": ("United States", "North America"),
    "United States Postal Service": ("United States", "North America"),
    "UPS": ("United States", "North America"),
    "U.S. Army": ("United States", "North America"),
    "U.S. Army(classified Iraq War documents)": ("United States", "North America"),
    "U.S. Department of Defense": ("United States", "North America"),
    "U.S. Department of Veteran Affairs": ("United States", "North America"),
    "U.S. federal government (2020 United States federal government data breach)": ("United States", "North America"),
    "U.S. law enforcement (70 different agencies)": ("Multiple Countries", "Global"),
    "National Archives and Records Administration (U.S. military veterans records)": ("United States", "North America"),
    "U.S. government (United States diplomatic cables leak)": ("United States", "North America"),
    "National Guard of the United States": ("United States", "North America"),
    "Vastaamo": ("Finland", "Europe"),
    "Verizon Communications": ("United States", "North America"),
    "View Media": ("United States", "North America"),
    "Virgin Media": ("United Kingdom", "Europe"),
    "Virginia Department of Health": ("United States", "North America"),
    "Virginia Prescription Monitoring Program": ("United States", "North America"),
    "Vodafone": ("United Kingdom", "Europe"),
    "VTech": ("Hong Kong", "Asia"),
    "Walmart": ("United States", "North America"),
    "Washington Post": ("United States", "North America"),
    "Washington State court system": ("United States", "North America"),
    "Wattpad": ("Canada", "North America"),
    "Wawa (company)": ("United States", "North America"),
    "Weebly": ("United States", "North America"),
    "Wendy's": ("United States", "North America"),
    "Westpac": ("Australia", "Oceania"),
    "Woodruff Arts Center": ("United States", "North America"),
    "Writerspace.com": ("United States", "North America"),
    "Xat.com": ("United States", "North America"),
    "Yahoo": ("United States", "North America"),
    "Yahoo Japan": ("Japan", "Asia"),
    "Yahoo! Voices": ("United States", "North America"),
    "Yale University": ("United States", "North America"),
    "YouTube": ("United States", "North America"),
    "Zappos": ("United States", "North America"),
    "Zynga": ("United States", "North America"),
    "Unknown agency(believed to be tied to United States Census Bureau)": ("United States", "North America"),
    "National Health Information Center (NCZI) of Slovakia": ("Slovakia", "Europe"),
    "50 companies and government institutions": ("Multiple Countries", "Global"),
    "IKEA": ("Netherlands", "Europe")
}

for entity, (country, continent) in batch_d_mapping.items():
    df.loc[df['Entity'] == entity, ['Country', 'Continent']] = [country, continent]

df[df['Entity'].isin(batch_d_mapping.keys())][['Entity', 'Country', 'Continent']].head(20)


Unnamed: 0,Entity,Country,Continent
268,Taobao,China,Asia
269,Taringa!,Argentina,South America
270,Target Corporation,United States,North America
271,TaxSlayer.com,United States,North America
273,TD Bank,United States,North America
274,TerraCom & YourTel,United States,North America
275,Tetrad,United States,North America
276,Texas Lottery,United States,North America
277,Ticketfly (subsidiary of Eventbrite),United States,North America
278,Tianya Club,China,Asia
