In [2]:
# Cell 0: Synthetic PII Dataset Generator
# This notebook generates fictional personal data for training PII detection models
# IMPORTANT: All data is synthetic and fictional - do not use for real applications
import pandas as pd
import random
import os

# --- Configuration Section ---
# Define how many records to generate for different country categories
NUM_RECORDS_PER_MAJOR_COUNTRY = 1000
NUM_RECORDS_PER_OTHER_COUNTRY = 100  # Adjust if you need more for other countries, 500 might make the file very large
OUTPUT_FILENAME = "final_dataset.csv"

# --- Data Source Configuration ---
# This dictionary contains fictional name lists and formatting patterns for each country
# Note: Names are simplified English transliterations for demonstration purposes
# In production, you would use more extensive and accurate name databases
country_data = {
    "China": {
        "first_names": ["Wei", "Jing", "Lei", "Min", "Li", "Chen", "Wang", "Zhang", "Liu", "Zhao", "Zhou", "Yang", "Huang", "Wu", "Xu", "Sun", "Hu", "Guo", "He", "Gao"],
        "last_names": ["Li", "Wang", "Zhang", "Liu", "Chen", "Yang", "Huang", "Zhao", "Wu", "Zhou", "Xu", "Sun", "Ma", "Zhu", "Hu", "Gao", "Lin", "Deng"],
        "phone_prefixes": ["+86-13", "+86-15", "+86-18"], # Chinese mobile numbers often start with 13, 15, or 18
        "address_templates": [
            "{number} {street_name} Road, {district}, {city}",
            "{building} {number}, {street_name} Street, {city}",
            "Unit {unit}, {building} Building, {street_name} Avenue, {city}"
        ],
        "cities": ["Shanghai", "Beijing", "Guangzhou", "Shenzhen", "Chengdu", "Chongqing", "Tianjin", "Hangzhou", "Nanjing", "Wuhan"]
    },
    "India": {
        "first_names": [
    "Abhishek", "Aman", "Harsh", "Ayush", "Aditi", "Anjali", "Shubham", "Anushka", "Rohit", "Saurabh",
    "Muskan", "Rahul", "Utkarsh", "Vaibhav", "Amit", "Saumya", "Rishabh", "Shruti", "Himanshu", "Kajal",
    "Ankit", "Gaurav", "Nikhil", "Siddharth", "Prashant", "Priya", "Harshit", "Shashank", "Akash", "Varun",
    "Yash", "Shreya", "Harshita", "Anurag", "Vivek", "Swati", "Vishal", "Aditya", "Nidhi", "Ayushi",
    "Krishna", "Anshika", "Sakshi", "Shivani", "Prakhar", "Mansi", "Tushar", "Abhinav", "Shivangi", "Ashutosh",
    "Adarsh", "Divya", "Piyush", "Pragya", "Ajay", "Akanksha", "Neeraj", "Ritika", "Tanya", "Nisha",
    "Arun", "Pallavi", "Aniket", "Nikita", "Vijay", "Ananya", "Priyanshi", "Suraj", "Akshat", "Ishika",
    "Mohit", "Palak", "Ankur", "Richa", "Ravi", "Arpit", "Ankita", "Shraddha", "Deepak", "Priyanka",
    "Khushi", "Shweta", "Kavya", "Kunal", "Dheeraj", "Akshita", "Riya", "Sneha", "Pranjal", "Isha",
    "Sumit", "Kishan", "Pawan", "Soumya", "Neelesh", "Sarthak", "Alok", "Raghav", "Rishi", "Pragati",
    "Shivam", "Lakshya", "Ashish", "Sandeep", "Ishita", "Shaurya", "Kashish", "Vineet", "Mayank", "Jyoti",
    "Parul", "Shambhavi", "Anshu", "Keshav", "Prince", "Prakash", "Pratibha", "Praveen", "Priyanshu", "Kshitij",
    "Arushi", "Ishan", "Garima", "Manish", "Vaishnavi", "Shreyansh", "Atul", "Sarvesh", "Vidya", "Shubhangi",
    "Mohsin", "Saran", "Vikash", "Aayush", "Akhilesh", "Nitin", "Himani", "Sushil", "Vartika", "Aviral",
    "Abdul", "Sumeet", "Prerna", "Nupur", "Neha", "Vikas", "Vanshika", "Rohan", "Shivansh", "Sunil",
    "Juhi", "Nitesh", "Ganesh", "Naveen", "Shikhar", "Jitendra", "Chirag", "Abhay", "Lucky", "Smriti",
    "Manu", "Deependra", "Anoop", "Devendra", "Diksha", "Muskaan", "Divyansh", "Kushal", "Nitish", "Preeti",
    "Anand", "Hemant", "Amol", "Sonal", "Rishab", "Shashwat", "Shatakshi", "Supriya", "Anchal", "Simran",
    "Srijan", "Ashwani", "Stuti", "Abhijeet", "Harshvardhan", "Ajit", "Poornima", "Anil", "Mahendra", "Ahmad",
    "Shalini", "Abhijit", "Suyash", "Sanskriti", "Danish", "Tamanna", "Unnati", "Sachin", "Anupam", "Shailesh",
    "Shoaib", "Vishnu", "Surya", "Pankaj", "Kaif", "Pooja", "Gayathri", "Pradeep", "Ritu", "Dinesh",
    "Shriya", "Tharun", "Karan", "Kiran", "Tarun", "Samarth", "Ruchi", "Udit", "Rashi", "Shailendra",
    "Sameer", "Shubhi", "Siddhant", "Mahi", "Kanika", "Aishwarya", "Rajat", "Rituraj", "Naman", "Mukesh",
    "Roshan", "Nandini", "Rashmi", "Vigneshwaran", "Kriti", "Arpita", "Divyanshi", "Yogesh", "Deepika", "Astha",
    "Sanjay", "Mahek", "Narendra", "Ekta", "Adil", "Ritesh", "Bhumika", "Samriddhi", "Anmol", "Aayushi",
    "Akshay", "Shekhar", "Amisha", "Amrita", "Sekhar", "Kundalik", "Durga", "Apurva", "Alka", "Indora",
    "Aqsa", "Anwar", "Apratim", "Hasni", "Dalia", "Aadharsh", "Amarnath", "Bramoni", "Nethaji", "Chetna",
    "Aastha", "Aakriti", "Abhyuday", "Akarsh", "Navneet", "Om", "Swapnil", "Akhil", "Madhav", "Divyam",
    "Kirti", "Annanya", "Arunima", "Niteesh", "Nithisha", "Sandhya", "Uttam", "Vatsalya", "Aparna", "Disha",
    "Seema", "Suryansh", "Yashaswi", "Avinash", "Smita", "Subhra", "Tuhin", "Arti", "Aakash", "Ayan",
    ],
        "last_names": [
    "Singh", "Gupta", "Kumar", "Yadav", "Pandey", "Mishra", "Srivastava", "Agarwal", "Sharma", "Verma",
    "Jaiswal", "Tiwari", "Jain", "Rai", "Tripathi", "Khan", "Shukla", "Agrawal", "Dubey", "Rastogi",
    "Patel", "Maurya", "Reddy", "Saxena", "Kumari", "Chaudhary", "Rathore", "Gautam", "Pal", "Soni",
    "Dixit", "Pathak", "Meena", "Upadhyay", "Sinha", "Mehrotra", "Dwivedi", "Kushwaha", "Raj", "Aggarwal",
    "Ali", "Arora", "Saini", "Vishwakarma", "Chaurasia", "Tandon", "Alam", "Sahu", "Bisht", "Chaturvedi",
    "Bhardwaj", "Goyal", "Tomar", "Ahmad", "Chandra", "Siddiqui", "Chauhan", "Patil", "Ojha", "Kashyap",
    "Garg", "Choudhary", "Mehra", "Kaur", "Nigam", "Bhatia", "Rana", "Rawat", "Mehta", "Roy",
    "Bhatt", "Trivedi", "Shah", "Rizvi", "Negi", "Pant", "Gurnani", "Khanna", "Gurubhaiye", "Singhal",
    "Ansari", "Vaish", "Bajpai", "Anand", "Prasad", "Prakash", "Baranwal", "Kapoor", "Dutta", "Panghal",
    "Chaudhari", "Mustafa", "Barnwal", "Mathur", "Kesarwani", "Rajput", "Pattnaik", "Prajapati", "Deshmukh", "Azmi",
    "Chavan", "Misra", "Kaushik", "Khandelwal", "Mittal", "Giri", "Gite", "Goswami", "Narayan", "Krishna",
    "Nayak", "Maheshwari", "Chaubey", "Rao", "Raza", "Sachan", "Varma", "Quraishi", "Bhadauriya", "Arya",
    "Thakur", "Porwal", "Chaurasiya", "Awasthi", "Agrahari", "Bansal", "Anwar", "Gurjar", "Rathour", "Jha",
    "Varshney", "Hasni", "Kulal", "Zope", "Huque", "Madala", "Sonwani", "Bhatnagar", "Kanchapu", "Shinde",
    "Aenugu", "Jay", "Gudepu", "Sitaram", "Vilas", "Bhaskar", "Wagh", "Waykos", "Wathore", "Tyagi",
    "Bharadwaj", "Hanif", "Tibrewal", "Vikram", "Yamala", "Suman", "Sura", "Khandekar", "Macharla", "Sugathan",
    "Narala", "Tayde", "Swamy", "Nadhe", "Suresh", "Banothu", "Agarawal", "Akhtar", "Asthana", "Chary",
    "Shahi", "Gaur", "Shrivastava", "Chopra", "Dasila", "Yaddanapudi", "Teja", "Bano", "Behuria", "Ahuja",
    "Chawla", "Tewary", "Seth", "Ranjan", "Keshari", "Sandilya", "Balot", "Balmiki", "Deo", "Kannaujia",
    "Bhat", "Bhasin", "Ahmed", "Manchanda", "Nanwani", "Dahiya", "Dhiman", "Hanul", "Singam", "Grover",
    "Bhukya", "Shoaib", "Pawar", "Nag", "Kanojia", "Sagar", "Dutt", "Priyadarshini", "Khare", "Khatoon",
    "Kukreja", "Mukherjee", "Baghel", "Keshri", "Shivastava", "Dohrey", "Dhindhwal", "Dhamane", "Malwan", "Garud",
    "Gangwar", "Gaikwad", "Chandran", "Chakma", "Bugalia", "Nadaf", "Desabathula", "Dagade", "Dadarwal", "Khobragade",
    "Mourya", "Katta", "Kochar", "Mani", "Kori", "Koppula", "Jajoo", "Haokip", "Gutti", "Jivani",
    "Katiyar", "Karkhele", "Kamble", "Souza", "Vashisth", "Goel", "Hansdah", "Khokhar", "Sikarwar", "Indora",
    "Bistagond", "Bapna", "Zaidi", "Chahal", "Gill", "Chouhan", "Sutariya", "Singhania", "Solanki", "Ray",
    "Quadiri", "Bhushan", "Akolkar", "Putta", "Tewari", "Sekhar", "Kothari", "Panesar", "Sonkar", "Pradhan",
    "Rehman", "Kancharla", "Randhawa", "Raman", "Parashar", "Kasaudhan", "Palata", "Mohandas", "Munir", "Dwevedi",
    "Mhaske", "Jauhari", "Prabhu", "Priyadarshi", "Malviya", "Pasupuleti", "Malekar", "Mandadi", "Mahawar", "Pareek",
    "Rakhecha", "Rajendra", "Meghwal", "Nalla", "Shahid", "Shiromani", "Chellani", "Saran", "Sarma", "Sen",
    "Nandanwar", "Sengar", "Magdum", "Raykar", "Oswal", "Sah", "Jafri", "Rausa", "Sain", "Hasnain",
    "Wasif", "Afreen", "Barman", "Ataher", "Gond", "Barai", "Athar", "Bais", "Ahamad", "Shrivastav",
    "Baiswar", "Shivagauri", "Aggrawal", "Chirag", "Chhateja", "Barnawal", "Vaishnavi", "Kalra", "Gakhar", "Chitranshi",
    "Chowdhury", "Aftab", "Wadhawan", "Vasistha", "Waseem", "Balkoty", "Azam", "Arif", "Sarawagi", "Purwar",
    "Bhalla", "Sarraf", "Tanu", "Dev", "Sasidharan", "Uppal", "Lakhmani", "Thawani", "Kansal", "Lamba",
    "Talwar", "Nigan", "Maddeshiya", "Bhriguvanshi", "Malik", "Priya", "Poddar", "Pokhriyal", "Rajesh", "Rauniyar",
    "Pundir", "Rajak", "Majeed", "Parween", "Nath", "Asija", "Motwani", "Parihar", "Patra", "Anjum",
    "Pallav", "Sahai", "Aleem", "Aishwary", "Azhar", "Upreti", "Zehra", "Warsi", "Afsar", "Virani",
    "Srivastav", "Shekhar", "Shingh", "Sareen", "Sawlani", "Sirohi", "Sonker", "Anam", "Siraj", "Jawed",
    "Johari", "Shandilya", "Israni", "Kalsi", "Kanchhal", "Juned", "Kairati", "Gujrati", "Choubey", "Shankdhar",
    "Chandel", "Chetan", "Dikshit", "Fatima", "Jaitley", "Juneja", "Kasera", "Lockwani", "Mahalka", "Kulsum",
    "Kwatra", "Malhotra", "Masood", "Rizwan", "Mahmood", "Rungta", "Saluja", "Kewlani", "Sanwal", "Kavi",
    "Khatri", "Kidwai", "Ruqaiya", "Keshwani", "Chakrabortty", "Durgavanshi", "Bindal", "Bhattacharjee", "Bhattacharya", "Devi",
    "Dhaka", "Das", "Chhipa", "Darshan", "Belide", "Ganapathy", "Ashraf", "Alwadhi", "Adhaulia", "Adhya",
    "Bapat", "Behura", "Baliga", "Atri", "Attraa", "Inavolu", "Islam", "Hasija", "Gomashe", "Guduri",
    "Jushantan", "Kar", "Joseph", "Jagaragallu", "Jose", "Ghosh", "Fathima", "Francis", "Dinu", "Dhaulakhandi",
    "Dhinoja", "Gayathri", "Gehlot", "Durgavansh", "Gadge", "Gangwal", "Jagtap", "Jeykumaran", "Hosur", "Kushalappa",
    "Heblikar", "Kulkarni", "Kisshan", "Kasniya", "Machhan", "Sarath", "Jhindal", "Chechani", "Subbapati", "Murlidhar",
    "Ritikesh", "Iqbal", "Sonawane", "Sankary", "Jodha", "Rishiraj", "Wanve", "Waybhase", "Venkateshwaran", "Theratipally",
    "Tulshidas", "Parul", "Kishor", "Meharda", "Mahajan", "Lavudya", "Machra", "Nischal", "Omkar", "Nagoria",
    "Rasheed", "Rawlani", "Pravesh", "Navas", "Praveenchand", "Shokeen", "Shrivas", "Shankar", "Saraf", "Selvaraj",
    "Mann", "Jadhav", "Jamloki", "Guha", "Ganguly", "Ganore", "Lalchandani", "Maddipatla", "Kotturu", "Jassal",
    "Khaliq", "Jindal", "Kanchan", "Govil", "Dahlan", "Dudeja", "Tekchandani", "Umar", "Sood", "Naqvi",
    "Samdharshni", "Bhadauria", "Bhadouriya", "Taneja", "Swain", "Surabhi", "Suri", "Veerepalli", "Vishal", "Unnikrishnan",
    "Tangirala", "Tidke", "Nagraj", "Nair", "Naaz", "Mitra", "Mukhopadhyay", "Palisetti", "Paul", "Paighan",
    "Nandakumar", "Nasreen", "Middinti", "Mahapatra", "Manchikanti", "Mahankali", "Madaan", "Madina", "Mehendale", "Menon",
    "Manna", "Mangla", "Manju", "Sundar", "Tejaswini", "Suchi", "Shenoy", "Sivan", "Bhal", "Bhorkhade",
    "Baddur", "Varghese", "Anupama", "Shaji", "Puri", "Purkayastha", "Preetham", "Pereddy", "Prasath", "Sasanka",
    "Sehgal", "Sarnobat", "Samaddar", "Saraswat", "Pahade"
],
        "phone_prefixes": ["+91-9", "+91-8", "+91-7"], # Indian mobile numbers often start with 9, 8, 7
        "address_templates": [
            "{number} {area_name}, {street_name} Road, {city}",
            "Flat {flat_num}, {building_name}, {locality}, {city}",
            "{sector} {block}, {street_name} Marg, {city}"
        ],
        "cities": ["New Delhi", "Mumbai", "Bangalore", "Hyderabad", "Chennai", "Kolkata", "Ahmedabad", "Pune", "Jaipur", "Lucknow"]
    },
    "Japan": {
        "first_names": ["Hiroshi", "Sakura", "Kenji", "Yuki", "Daiki", "Akari", "Takeshi", "Mio", "Kaito", "Rina", "Ryota", "Saki", "Yuta", "Hana", "Haruto", "Aoi", "Sota", "Yui", "Ren", "Koharu"],
        "last_names": ["Tanaka", "Suzuki", "Sato", "Takahashi", "Watanabe", "Ito", "Nakamura", "Kobayashi", "Kato", "Yoshida", "Yamada", "Sasaki", "Yamamoto", "Matsumoto", "Inoue", "Kimura", "Hayashi", "Shimizu", "Mori", "Abe"],
        "phone_prefixes": ["+81-90", "+81-80", "+81-70"], # Japanese mobile numbers often start with 90, 80, 70
        "address_templates": [
            "{block}-{street}-{number}, {district}, {city}",
            "{number} {chome}, {area}, {city}",
            "Apt {apt_num}, {building_name}, {area}, {city}"
        ],
        "cities": ["Tokyo", "Osaka", "Kyoto", "Nagoya", "Sapporo", "Fukuoka", "Kobe", "Yokohama", "Sendai", "Hiroshima"]
    },
    "South Korea": {
        "first_names": ["Min-Jae", "Ji-Hye", "Seo-Jun", "Ha-Eun", "Do-Yun", "Ji-Woo", "Si-Woo", "Seo-Yeon", "Ye-Jun", "Su-Bin", "Jun-Ho", "Chae-Won", "Woo-Jin", "Da-Eun", "Hyun-Woo", "Ye-Jun", "Jae-Won", "Soo-Jin", "Sung-Min", "Hye-Jin"],
        "last_names": ["Kim", "Lee", "Park", "Choi", "Jung", "Kang", "Jo", "Yoon", "Jang", "Lim", "Han", "Oh", "Shin", "Seo", "Kwon", "Hwang", "Ahn", "Song", "Ryu", "Hong"],
        "phone_prefixes": ["+82-10"], # Korean mobile numbers often start with 10
        "address_templates": [
            "{number} {street_name}, {district}, {city}",
            "{building_name}, {number} {road_name}, {city}",
            "{apt_num}, {building_name}, {street_name}-ro, {city}"
        ],
        "cities": ["Seoul", "Busan", "Incheon", "Daegu", "Daejeon", "Gwangju", "Ulsan", "Suwon", "Changwon", "Goyang"]
    },
    "Indonesia": {
        "first_names": ["Budi", "Siti", "Joko", "Dewi", "Agus", "Ani", "Rudi", "Nur", "Eko", "Putri", "Bayu", "Sri", "Fajar", "Indah", "Adi", "Ratna", "Dian", "Wati", "Yoga", "Lestari"],
        "last_names": ["Santoso", "Wijaya", "Putra", "Dewi", "Nugroho", "Sari", "Pratama", "Hidayat", "Susanto", "Lestari", "Setiawan", "Aminah", "Rahman", "Kartika", "Handayani", "Wibowo", "Gunawan", "Puspita", "Utami", "Saputra"],
        "phone_prefixes": ["+62-8"], # Indonesian mobile numbers often start with 8
        "address_templates": [
            "Jalan {street_name} No. {number}, {subdistrict}, {city}",
            "{number} {street_name} Street, {district}, {city}",
            "Komplek {complex_name} Blok {block_num} No. {number}, {city}"
        ],
        "cities": ["Jakarta", "Surabaya", "Bandung", "Medan", "Semarang", "Makassar", "Palembang", "Denpasar", "Yogyakarta", "Malang"]
    },
    "Saudi Arabia": {
        "first_names": ["Abdullah", "Fatima", "Khalid", "Sara", "Mohammed", "Noura", "Fahad", "Reem", "Ahmed", "Laila", "Sultan", "Maha", "Omar", "Huda", "Ali", "Amira", "Nasser", "Mona", "Yousef", "Salma"],
        "last_names": ["Al-Fahad", "Al-Zahrani", "Al-Qahtani", "Al-Ghamdi", "Al-Harbi", "Al-Shammary", "Al-Otaibi", "Al-Dossary", "Al-Mutairi", "Al-Subaie", "Al-Anzi", "Al-Juhani", "Al-Shehri", "Al-Malki", "Al-Hajri", "Al-Rashidi", "Al-Qahtani", "Al-Shammari", "Al-Amri", "Al-Sulaimani"],
        "phone_prefixes": ["+966-5"], # Saudi mobile numbers often start with 5
        "address_templates": [
            "{number} {street_name} Street, {district}, {city}",
            "Building {building_num}, {road_name} Road, {city}",
            "Villa {villa_num}, {area_name}, {city}"
        ],
        "cities": ["Riyadh", "Jeddah", "Mecca", "Medina", "Dammam", "Khobar", "Abha", "Tabuk", "Buraydah", "Hail"]
    },
    # Other Asian countries
    "Afghanistan": {
        "first_names": ["Ahmad", "Fatima", "Mohammad", "Zahra", "Ali", "Aisha"],
        "last_names": ["Khan", "Mohammadi", "Ahmadi", "Noori", "Hussaini", "Sultani"],
        "phone_prefixes": ["+93-7"],
        "address_templates": ["{number} {street}, {district}, {city}"],
        "cities": ["Kabul", "Herat", "Kandahar"]
    },
    "Armenia": {
        "first_names": ["Armen", "Ani", "David", "Mariam", "Tigran", "Nare"],
        "last_names": ["Sargsyan", "Khachatryan", "Harutyunyan", "Grigoryan", "Hovhannisyan", "Petrosyan"],
        "phone_prefixes": ["+374-XX"], # Specific prefix varies, using XX for generic
        "address_templates": ["Apt {apt}, {street} St, {city}"],
        "cities": ["Yerevan", "Gyumri", "Vanadzor"]
    },
    "Azerbaijan": {
        "first_names": ["Elvin", "Aynur", "Ruslan", "Leyla", "Farid", "Nigar"],
        "last_names": ["Aliyev", "Mammadov", "Hasanov", "Guliyev", "Ismayilov", "Safarov"],
        "phone_prefixes": ["+994-XX"],
        "address_templates": ["{number} {street} Prospekt, {city}"],
        "cities": ["Baku", "Ganja", "Sumqayit"]
    },
    "Bahrain": {
        "first_names": ["Ali", "Fatima", "Ahmed", "Noor", "Hassan", "Mariam"],
        "last_names": ["Al-Khalifa", "Al-Ansari", "Al-Dhaen", "Al-Dosari", "Al-Sayegh", "Al-Jowder"],
        "phone_prefixes": ["+973-XX"],
        "address_templates": ["Road {road}, Block {block}, {area}, {city}"],
        "cities": ["Manama", "Riffa", "Muharraq"]
    },
    "Bangladesh": {
        "first_names": ["Rahman", "Fatima", "Mohammed", "Aisha", "Kamal", "Nazma"],
        "last_names": ["Khan", "Ahmed", "Hossain", "Begum", "Chowdhury", "Islam"],
        "phone_prefixes": ["+880-1"],
        "address_templates": ["House {house}, Road {road}, {area}, {city}"],
        "cities": ["Dhaka", "Chittagong", "Khulna"]
    },
    "Bhutan": {
        "first_names": ["Karma", "Dechen", "Jigme", "Sonam", "Tashi", "Pema"],
        "last_names": ["Dorji", "Wangchuk", "Phuntsho", "Namgyel", "Choden", "Penjor"],
        "phone_prefixes": ["+975-17"],
        "address_templates": ["{number} {street}, {thromde}, {city}"],
        "cities": ["Thimphu", "Phuntsholing", "Paro"]
    },
    "Brunei": {
        "first_names": ["Haji", "Siti", "Awang", "Dayang", "Mohd", "Nurul"],
        "last_names": ["Abdullah", "Ali", "Hassan", "Omar", "Salleh", "Yusof"],
        "phone_prefixes": ["+673-XX"],
        "address_templates": ["No. {number}, Jalan {street}, {mukim}, {city}"],
        "cities": ["Bandar Seri Begawan", "Kuala Belait", "Seria"]
    },
    "Cambodia": {
        "first_names": ["Sok", "Srey", "Vann", "Dara", "Chan", "Mao"],
        "last_names": ["Lim", "Chhay", "Sok", "Chea", "Kim", "Phan"],
        "phone_prefixes": ["+855-XX"],
        "address_templates": ["# {number}, Street {street}, {sangkat}, {city}"],
        "cities": ["Phnom Penh", "Siem Reap", "Battambang"]
    },
    "Cyprus": {
        "first_names": ["Andreas", "Maria", "George", "Eleni", "Constantinos", "Andri"],
        "last_names": ["Georgiou", "Hadjigeorgiou", "Constantinou", "Nikolaou", "Kyriacou", "Ioannou"],
        "phone_prefixes": ["+357-XX"],
        "address_templates": ["{number} {street} Ave, {area}, {city}"],
        "cities": ["Nicosia", "Limassol", "Larnaca"]
    },
    "Georgia": {
        "first_names": ["Giorgi", "Nino", "David", "Tamar", "Levan", "Mariam"],
        "last_names": ["Baramidze", "Kapanadze", "Gorgiladze", "Dolidze", "Japaridze", "Gelashvili"],
        "phone_prefixes": ["+995-XX"],
        "address_templates": ["{number} {street} St, {district}, {city}"],
        "cities": ["Tbilisi", "Batumi", "Kutaisi"]
    },
    "Iran": {
        "first_names": ["Mohammad", "Fatemeh", "Ali", "Zahra", "Reza", "Maryam"],
        "last_names": ["Mohammadi", "Hosseini", "Karimi", "Ahmadi", "Davoodi", "Akbari"],
        "phone_prefixes": ["+98-9"],
        "address_templates": ["No. {number}, {street} Ave, {district}, {city}"],
        "cities": ["Tehran", "Mashhad", "Isfahan"]
    },
    "Iraq": {
        "first_names": ["Ahmed", "Zainab", "Mustafa", "Noor", "Hassan", "Sara"],
        "last_names": ["Al-Baghdadi", "Al-Dulaimi", "Al-Jubouri", "Al-Maliki", "Al-Tamimi", "Al-Shammari"],
        "phone_prefixes": ["+964-7"],
        "address_templates": ["House {number}, {street} St, {area}, {city}"],
        "cities": ["Baghdad", "Basra", "Mosul"]
    },
    "Israel": {
        "first_names": ["David", "Noa", "Omer", "Tali", "Daniel", "Maya"],
        "last_names": ["Cohen", "Levi", "Mizrahi", "Peretz", "Dahan", "Biton"],
        "phone_prefixes": ["+972-5"],
        "address_templates": ["{number} {street} St, {city}"],
        "cities": ["Jerusalem", "Tel Aviv", "Haifa"]
    },
    "Jordan": {
        "first_names": ["Omar", "Laila", "Yousef", "Huda", "Sami", "Rana"],
        "last_names": ["Al-Abdullah", "Al-Hassan", "Al-Khalil", "Al-Masri", "Al-Qudah", "Al-Zoubi"],
        "phone_prefixes": ["+962-7"],
        "address_templates": ["Building {num}, {street} St, {district}, {city}"],
        "cities": ["Amman", "Zarqa", "Irbid"]
    },
    "Kazakhstan": {
        "first_names": ["Alisher", "Amina", "Daniyar", "Zarina", "Nurlan", "Gulnara"],
        "last_names": ["Akhmetov", "Suleimenov", "Kazakhbayev", "Nurmagambetov", "Ismailov", "Serikova"],
        "phone_prefixes": ["+7-7"],
        "address_templates": ["Apt {apt}, {street} St, {city}"],
        "cities": ["Nur-Sultan", "Almaty", "Shymkent"]
    },
    "Kuwait": {
        "first_names": ["Abdullah", "Fatima", "Khalid", "Sara", "Mohammed", "Noura"],
        "last_names": ["Al-Sabah", "Al-Adwani", "Al-Mutairi", "Al-Ajmi", "Al-Rashidi", "Al-Enezi"],
        "phone_prefixes": ["+965-XX"],
        "address_templates": ["Block {block}, Street {street}, {area}, {city}"],
        "cities": ["Kuwait City", "Hawalli", "Salmiya"]
    },
    "Kyrgyzstan": {
        "first_names": ["Azamat", "Aigul", "Ruslan", "Aidana", "Erlan", "Jyldyz"],
        "last_names": ["Abdullaev", "Asanov", "Jumabekov", "Osmonov", "Sydykov", "Toktogulov"],
        "phone_prefixes": ["+996-XX"],
        "address_templates": ["{number} {street} St, {district}, {city}"],
        "cities": ["Bishkek", "Osh", "Jalal-Abad"]
    },
    "Laos": {
        "first_names": ["Boun", "Phou", "Kham", "Noy", "Seng", "Souk"],
        "last_names": ["Phommasone", "Vongphachanh", "Keomany", "Sisavath", "Chanthavong", "Sayavong"],
        "phone_prefixes": ["+856-XX"],
        "address_templates": ["Unit {unit}, {village}, {district}, {city}"],
        "cities": ["Vientiane", "Luang Prabang", "Savannakhet"]
    },
    "Lebanon": {
        "first_names": ["Georges", "Maria", "Charbel", "Nour", "Rami", "Lina"],
        "last_names": ["Khoury", "Haddad", "Sleiman", "Nassar", "Fakhoury", "Saad"],
        "phone_prefixes": ["+961-XX"],
        "address_templates": ["{number} {street} St, {area}, {city}"],
        "cities": ["Beirut", "Tripoli", "Sidon"]
    },
    "Malaysia": {
        "first_names": ["Ahmad", "Siti", "Mohd", "Nur", "Lim", "Tan", "Lee", "Chong", "Wong", "Ong"],
        "last_names": ["Abdullah", "Ali", "Hassan", "Omar", "Tan", "Lim", "Lee", "Chong", "Wong", "Ong"],
        "phone_prefixes": ["+60-1"],
        "address_templates": [
            "No. {number}, Jalan {street_name}, {postcode} {city}",
            "Unit {unit}, {building_name}, {area}, {city}"
        ],
        "cities": ["Kuala Lumpur", "Penang", "Johor Bahru", "Ipoh", "Malacca"]
    },
    "Maldives": {
        "first_names": ["Ahmed", "Fathimath", "Mohamed", "Mariyam", "Ali", "Aisha"],
        "last_names": ["Didi", "Manik", "Rasheed", "Shareef", "Hussain", "Abdulla"],
        "phone_prefixes": ["+960-XX"],
        "address_templates": ["G. {house}, {street}, {atoll}, {city}"],
        "cities": ["Malé", "Addu City", "Fuvahmulah"]
    },
    "Mongolia": {
        "first_names": ["Bat", "Nomin", "Gan", "Tsetseg", "Khulan", "Enkh"],
        "last_names": ["Bold", "Ganbaatar", "Erdene", "Munkhbat", "Purevdorj", "Battulga"],
        "phone_prefixes": ["+976-XX"],
        "address_templates": ["Apt {apt}, {khoroo}, {district}, {city}"],
        "cities": ["Ulaanbaatar", "Erdenet", "Darkhan"]
    },
    "Myanmar": {
        "first_names": ["Aung", "Aye", "Kyaw", "Mya", "Nyein", "Thida"],
        "last_names": ["Zaw", "Win", "Oo", "Tun", "Myint", "Htwe"],
        "phone_prefixes": ["+95-9"],
        "address_templates": ["No. {number}, {street} St, {township}, {city}"],
        "cities": ["Yangon", "Mandalay", "Naypyidaw"]
    },
    "Nepal": {
        "first_names": ["Bishnu", "Pooja", "Sagar", "Laxmi", "Nabin", "Sharmila"],
        "last_names": ["Thapa", "Sharma", "Rai", "Gurung", "Tamang", "Magar"],
        "phone_prefixes": ["+977-98"],
        "address_templates": ["House No. {number}, {area}, {ward}, {city}"],
        "cities": ["Kathmandu", "Pokhara", "Lalitpur"]
    },
    "North Korea": {
        "first_names": ["Kim", "Ri", "Pak", "Choi", "Jong", "Un"],
        "last_names": ["Kim", "Ri", "Pak", "Choi"],
        "phone_prefixes": ["+850-X"],
        "address_templates": ["{district}, {street}, {city}"],
        "cities": ["Pyongyang", "Hamhung", "Chongjin"]
    },
    "Oman": {
        "first_names": ["Sultan", "Fatima", "Khalid", "Aisha", "Mohammed", "Salma"],
        "last_names": ["Al-Busaidi", "Al-Hinai", "Al-Balushi", "Al-Hasani", "Al-Maamari", "Al-Shukaili"],
        "phone_prefixes": ["+968-XX"],
        "address_templates": ["Villa {num}, {area}, {city}"],
        "cities": ["Muscat", "Salalah", "Sohar"]
    },
    "Pakistan": {
        "first_names": ["Ahmed", "Aisha", "Usman", "Fatima", "Ali", "Zainab"],
        "last_names": ["Khan", "Butt", "Malik", "Rana", "Chaudhry", "Siddiqui"],
        "phone_prefixes": ["+92-3"],
        "address_templates": ["House {num}, Street {street}, {block}, {city}"],
        "cities": ["Karachi", "Lahore", "Islamabad"]
    },
    "Philippines": {
        "first_names": ["Maria", "Jose", "Ana", "Juan", "Cristina", "Michael"],
        "last_names": ["Santos", "Reyes", "Cruz", "Garcia", "Del Rosario", "Gonzales"],
        "phone_prefixes": ["+63-9"],
        "address_templates": ["{number} {street} St, {barangay}, {city}"],
        "cities": ["Manila", "Quezon City", "Cebu City"]
    },
    "Qatar": {
        "first_names": ["Mohammed", "Fatima", "Ahmed", "Noura", "Khalifa", "Mariam"],
        "last_names": ["Al-Thani", "Al-Marri", "Al-Nuaimi", "Al-Kaabi", "Al-Kubaisi", "Al-Mohannadi"],
        "phone_prefixes": ["+974-XX"],
        "address_templates": ["Zone {zone}, Street {street}, Building {building}, {city}"],
        "cities": ["Doha", "Al Rayyan", "Al Wakrah"]
    },
    "Singapore": {
        "first_names": ["Wei", "Mei", "Swee", "Jian", "Hui", "Li", "John", "Mary", "David", "Sarah"],
        "last_names": ["Tan", "Lim", "Lee", "Ng", "Wong", "Chong", "Koh", "Goh", "Chua", "Teo"],
        "phone_prefixes": ["+65-8", "+65-9"], # Singapore mobile numbers often start with 8 or 9
        "address_templates": [
            "Blk {block} {number} {street_name}, {postcode}",
            "{number} {road_name} Road, {city}"
        ],
        "cities": ["Singapore"]
    },
    "Sri Lanka": {
        "first_names": ["Nimal", "Kamala", "Ravi", "Dilini", "Chamara", "Tharani"],
        "last_names": ["Perera", "Silva", "Bandara", "Fernando", "Jayawardena", "De Silva"],
        "phone_prefixes": ["+94-7"],
        "address_templates": ["No. {number}, {street} Rd, {city}"],
        "cities": ["Colombo", "Kandy", "Galle"]
    },
    "Syria": {
        "first_names": ["Ahmed", "Fatima", "Omar", "Lina", "Ali", "Zahra"],
        "last_names": ["Al-Hassan", "Al-Ali", "Al-Mohammad", "Al-Ahmad", "Al-Khaled", "Al-Saleh"],
        "phone_prefixes": ["+963-9"],
        "address_templates": ["{number} {street} St, {district}, {city}"],
        "cities": ["Damascus", "Aleppo", "Homs"]
    },
    "Taiwan": {
        "first_names": ["Ming", "Hui", "Wei", "Yi", "Cheng", "Yu"],
        "last_names": ["Chen", "Lin", "Huang", "Chang", "Li", "Wang"],
        "phone_prefixes": ["+886-9"],
        "address_templates": ["No. {number}, {section}, {street} Rd, {district}, {city}"],
        "cities": ["Taipei", "Kaohsiung", "Taichung"]
    },
    "Tajikistan": {
        "first_names": ["Alisher", "Madina", "Rustam", "Zarina", "Faridun", "Gulnora"],
        "last_names": ["Rahmonov", "Sharipov", "Ismailov", "Nabiev", "Mirzoev", "Saidova"],
        "phone_prefixes": ["+992-XX"],
        "address_templates": ["{number} {street} Ave, {district}, {city}"],
        "cities": ["Dushanbe", "Khujand", "Kulob"]
    },
    "Thailand": {
        "first_names": ["Ploy", "Somchai", "Supaporn", "Wichai", "Nattaya", "Chai"],
        "last_names": ["Charoen", "Suk", "Somsri", "Wong", "Phrom", "Thong"],
        "phone_prefixes": ["+66-8", "+66-9"],
        "address_templates": ["{number} {moo}, {soi}, {road}, {district}, {city}"],
        "cities": ["Bangkok", "Chiang Mai", "Pattaya"]
    },
    "Timor-Leste": {
        "first_names": ["Jose", "Maria", "Joao", "Ana", "Francisco", "Rosa"],
        "last_names": ["Guterres", "Ximenes", "Carvalho", "Da Silva", "Martins", "Soares"],
        "phone_prefixes": ["+670-7"],
        "address_templates": ["{number} {street}, {suco}, {city}"],
        "cities": ["Dili", "Baucau", "Maliana"]
    },
    "Turkey": {
        "first_names": ["Ahmet", "Ayşe", "Mehmet", "Fatma", "Mustafa", "Zeynep"],
        "last_names": ["Yılmaz", "Demir", "Çelik", "Şahin", "Can", "Kaya"],
        "phone_prefixes": ["+90-5"],
        "address_templates": ["No. {number}, {street} Sk., {mahalle}, {ilçe}, {city}"],
        "cities": ["Istanbul", "Ankara", "Izmir"]
    },
    "Turkmenistan": {
        "first_names": ["Dovlet", "Gulnara", "Merdan", "Aynur", "Serdar", "Jeren"],
        "last_names": ["Berdimuhamedov", "Atayev", "Geldyev", "Orazov", "Saparmuradov", "Rahmanov"],
        "phone_prefixes": ["+993-XX"],
        "address_templates": ["{number} {street} Ave, {district}, {city}"],
        "cities": ["Ashgabat", "Turkmenabat", "Dashoguz"]
    },
    "United Arab Emirates": {
        "first_names": ["Mohammed", "Fatima", "Ahmed", "Noura", "Khalid", "Sara"],
        "last_names": ["Al-Nahyan", "Al-Maktoum", "Al-Qassimi", "Al-Falahi", "Al-Suwaidi", "Al-Hammadi"],
        "phone_prefixes": ["+971-5"],
        "address_templates": ["Villa {num}, {community}, {emirate}, {city}"],
        "cities": ["Dubai", "Abu Dhabi", "Sharjah"]
    },
    "Uzbekistan": {
        "first_names": ["Alisher", "Dilnoza", "Rustam", "Gulnora", "Aziz", "Shahnoza"],
        "last_names": ["Karimov", "Abdullaev", "Rasulov", "Saidov", "Mirzaev", "Usmonov"],
        "phone_prefixes": ["+998-9"],
        "address_templates": ["Block {block}, {street} St, {district}, {city}"],
        "cities": ["Tashkent", "Samarkand", "Bukhara"]
    },
    "Vietnam": {
        "first_names": ["Nguyen", "Tran", "Le", "Pham", "Hoang", "Huynh", "Phan", "Vu", "Dang", "Bui"],
        "last_names": ["An", "Anh", "Bao", "Chi", "Dung", "Giang", "Huy", "Khanh", "Linh", "Minh"], # Common given names as last names
        "phone_prefixes": ["+84-9"],
        "address_templates": [
            "No. {number}, {street_name} Street, {ward}, {district}, {city}",
            "{number} {road_name} Road, {city}"
        ],
        "cities": ["Ho Chi Minh City", "Hanoi", "Da Nang", "Can Tho", "Hai Phong"]
    },
    "Yemen": {
        "first_names": ["Ahmed", "Fatima", "Mohammed", "Zahra", "Ali", "Aisha"],
        "last_names": ["Al-Hamdani", "Al-Hajri", "Al-Zahrani", "Al-Yemani", "Al-Amri", "Al-Houthi"],
        "phone_prefixes": ["+967-7"],
        "address_templates": ["House {num}, {street} St, {district}, {city}"],
        "cities": ["Sana'a", "Aden", "Taiz"]
    }
}

# Helper function to generate a random phone number
def generate_phone_number(prefixes, total_length):
    """
    Generate a random phone number with the specified country prefix and length.
    
    Args:
        prefix (str): Country code prefix (e.g., "+86-1")
        length (int): Total length of the phone number (excluding country code and hyphen)
    
    Returns:
        str: Formatted phone number (e.g., "+86-1XXXXXXXXX")
    """
    # Select a random prefix from the provided list
    prefix = random.choice(prefixes)
    # Calculate how many additional digits are needed to reach the total length
    digits_to_generate = total_length - len(prefix)
    # Generate the random digits
    digits = ''.join(random.choices('0123456789', k=digits_to_generate))
    return f"{prefix}{digits}"

# Helper function to generate a random address
def generate_address(country_name):
    """
    Generate a random address using templates specific to the given country.
    
    Args:
        country_name (str): Name of the country for address formatting
    
    Returns:
        str: Formatted address string
    """
    data = country_data[country_name]
    template = random.choice(data["address_templates"])
    city = random.choice(data["cities"])

    # Generic components for addresses
    street_names = ["Main", "Central", "Garden", "Park", "Green", "High", "Market", "River", "Lake", "Hill"]
    districts = ["North", "South", "East", "West", "Central", "Downtown", "Uptown", "New Town", "Old Town"]
    buildings = ["Tower", "Plaza", "Center", "Residence", "Apartments", "Complex", "Building"]
    areas = ["Block", "Sector", "Zone", "Area", "District"]
    road_types = ["Road", "Street", "Avenue", "Boulevard", "Lane", "Drive"]
    subdistricts = ["Subdistrict A", "Subdistrict B", "Subdistrict C"]
    mukims = ["Mukim A", "Mukim B", "Mukim C"]
    sangkats = ["Sangkat A", "Sangkat B", "Sangkat C"]
    thromdes = ["Thromde A", "Thromde B", "Thromde C"]
    wards = ["Ward 1", "Ward 2", "Ward 3"]
    sections = ["Sec 1", "Sec 2", "Sec 3"]
    moos = ["Moo 1", "Moo 2", "Moo 3"]
    sois = ["Soi 1", "Soi 2", "Soi 3"]
    mahalles = ["Mahalle A", "Mahalle B", "Mahalle C"]
    ilces = ["Ilce A", "Ilce B", "Ilce C"]
    communities = ["Community A", "Community B", "Community C"]
    emirates = ["Dubai", "Abu Dhabi", "Sharjah"]
    villages = ["Village A", "Village B", "Village C"]
    atolls = ["Atoll A", "Atoll B", "Atoll C"]
    blocks_generic = ["Block A", "Block B", "Block C"]

    # Dictionary mapping template placeholders to random values
    replacements = {
        "{number}": str(random.randint(1, 999)),
        "{street_name}": random.choice(street_names),
        "{district}": random.choice(districts),
        "{city}": city,
        "{building}": random.choice(buildings),
        "{unit}": str(random.randint(1, 50)),
        "{flat_num}": str(random.randint(101, 999)),
        "{building_name}": f"{random.choice(['Grand', 'Royal', 'City'])} {random.choice(buildings)}",
        "{locality}": f"{random.choice(areas)} {random.choice(street_names)}",
        "{sector}": random.choice(areas),
        "{block}": random.choice(blocks_generic),
        "{block_num}": str(random.randint(1, 20)),
        "{area_name}": random.choice(areas),
        "{chome}": str(random.randint(1, 10)),
        "{apt_num}": str(random.randint(101, 999)),
        "{road_name}": random.choice(road_types),
        "{postcode}": str(random.randint(10000, 99999)),
        "{subdistrict}": random.choice(subdistricts),
        "{mukim}": random.choice(mukims),
        "{sangkat}": random.choice(sangkats),
        "{thromde}": random.choice(thromdes),
        "{ward}": random.choice(wards),
        "{section}": random.choice(sections),
        "{moo}": random.choice(moos),
        "{soi}": random.choice(sois),
        "{mahalle}": random.choice(mahalles),
        "{ilçe}": random.choice(ilces),
        "{community}": random.choice(communities),
        "{emirate}": random.choice(emirates),
        "{villa_num}": str(random.randint(1, 200)),
        "{zone}": str(random.randint(1, 50)),
        "{village}": random.choice(villages),
        "{atoll}": random.choice(atolls),
        "{house}": str(random.randint(1, 500)),
    }

    # Replace all placeholders in the template with random values
    for key, value in replacements.items():
        template = template.replace(key, value)

    return template.strip()

# Main Data Generation Loop
# List to store all generated records
all_records = []

# Generate data for each country
for country, data in country_data.items():
    num_records = NUM_RECORDS_PER_MAJOR_COUNTRY if country in ["China", "India", "Japan", "South Korea", "Indonesia", "Saudi Arabia"] else NUM_RECORDS_PER_OTHER_COUNTRY

    print(f"Generating {num_records} records for {country}...")

    # Define phone number lengths for each country
    phone_length_map = {
        "China": 11, "India": 10, "Japan": 10, "South Korea": 10, "Indonesia": 10, "Saudi Arabia": 9,
        "Afghanistan": 9, "Armenia": 8, "Azerbaijan": 9, "Bahrain": 8, "Bangladesh": 10, "Bhutan": 8,
        "Brunei": 7, "Cambodia": 9, "Cyprus": 8, "Georgia": 9, "Iran": 10, "Iraq": 10,
        "Israel": 9, "Jordan": 9, "Kazakhstan": 10, "Kuwait": 8, "Kyrgyzstan": 9, "Laos": 8,
        "Lebanon": 8, "Malaysia": 9, "Maldives": 7, "Mongolia": 8, "Myanmar": 9, "Nepal": 10,
        "North Korea": 7, "Oman": 8, "Pakistan": 10, "Philippines": 10, "Qatar": 8, "Singapore": 8,
        "Sri Lanka": 9, "Syria": 9, "Taiwan": 9, "Tajikistan": 9, "Thailand": 9, "Timor-Leste": 7,
        "Turkey": 10, "Turkmenistan": 8, "United Arab Emirates": 9, "Uzbekistan": 9, "Vietnam": 9,
        "Yemen": 9
    }
    
    total_length = phone_length_map.get(country, 10)

    for _ in range(num_records):
        first_name = random.choice(data["first_names"])
        last_name = random.choice(data["last_names"])
        full_name = f"{first_name} {last_name}"

        # Generate email
        email_domains = ["example.com", "mail.net", "test.org", "sample.co"]
        email = f"{first_name.lower()}.{last_name.lower()}@{random.choice(email_domains)}"
        email = email.replace(" ", "") # Remove spaces if any in names

        # Generate phone number using the new function
        phone_number = generate_phone_number(data["phone_prefixes"], total_length)

        # Generate address
        address = generate_address(country)

        all_records.append({
            "Name": full_name,
            "Email": email,
            "Phone": phone_number,
            "Address": address,
            "Country": country # Added for easy filtering/analysis
        })

# Create a Pandas DataFrame
df = pd.DataFrame(all_records)

# Save to CSV
# Save the generated data to a CSV file
try:
    df.to_csv(OUTPUT_FILENAME, index=False, encoding='utf-8')
    print(f"\nSuccessfully generated '{OUTPUT_FILENAME}' with {len(df)} records.")
    print("This file contains synthetic, fictional data for PII detection training.")
    print("⚠️  IMPORTANT: All data is fictional and should only be used for training purposes.")
except Exception as e:
    print(f"\nError saving CSV file: {e}")
    print("Please check file permissions and disk space.")

# --- Summary Statistics ---
print(f"\nDataset Summary:")
print(f"Total records: {len(df)}")
print(f"Countries represented: {df['Country'].nunique()}")
print(f"Records per country (major): {NUM_RECORDS_PER_MAJOR_COUNTRY}")
print(f"Records per country (other): {NUM_RECORDS_PER_OTHER_COUNTRY}")

Generating 1000 records for China...
Generating 1000 records for India...
Generating 1000 records for Japan...
Generating 1000 records for South Korea...
Generating 1000 records for Indonesia...
Generating 1000 records for Saudi Arabia...
Generating 100 records for Afghanistan...
Generating 100 records for Armenia...
Generating 100 records for Azerbaijan...
Generating 100 records for Bahrain...
Generating 100 records for Bangladesh...
Generating 100 records for Bhutan...
Generating 100 records for Brunei...
Generating 100 records for Cambodia...
Generating 100 records for Cyprus...
Generating 100 records for Georgia...
Generating 100 records for Iran...
Generating 100 records for Iraq...
Generating 100 records for Israel...
Generating 100 records for Jordan...
Generating 100 records for Kazakhstan...
Generating 100 records for Kuwait...
Generating 100 records for Kyrgyzstan...
Generating 100 records for Laos...
Generating 100 records for Lebanon...
Generating 100 records for Malaysia..