* scraps data from Paank


In [3]:
import os
import requests
from bs4 import BeautifulSoup
import csv
from collections import defaultdict

base_url = "https://enforced-disappearances.paank.org/?page={}"
output_file = "disappearances.csv"

# Track name occurrences for unique filenames
name_counts = defaultdict(int)

# Open CSV to write data
with open(output_file, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["Name", "Abducted on", "Abducted by", "Abducted from", "Released on", "Status", "Profession"])

    for page in range(1, 39):  # Loop from page 1 to 38
        url = base_url.format(page)
        response = requests.get(url)

        if response.status_code != 200:
            print(f"Failed to fetch page {page}")
            continue

        soup = BeautifulSoup(response.text, "html.parser")

        # Find all people on the page
        people = soup.find_all("h2", class_="text-2xl font-bold mb-5")

        for index, person in enumerate(people, start=1):
            name = person.text.strip().replace("/", "-")  # Avoid illegal filename characters
            if not name:
                name = f"Unknown_{page}_{index}"  # Fallback for missing names

            # Ensure unique names
            name_counts[name] += 1
            if name_counts[name] > 1:
                name = f"{name}_{name_counts[name]}"  # Append count for duplicates

            # Format filename convention (Firstname_Secondname_Thirdname)
            formatted_name = "_".join(name.split())

            table = person.find_next("table", class_="text-sm text-left font-medium")

            # Default values
            abducted_on = abducted_by = abducted_from = released_on = status = profession = "N/A"

            # Extract abduction details
            if table:
                rows = table.find_all("tr")

                for row in rows:
                    th = row.find("th").text.strip()
                    td = row.find("td").text.strip()

                    if "Abducted on:" in th:
                        abducted_on = td
                    elif "Abducted by:" in th:
                        abducted_by = td
                    elif "Abducted from:" in th:
                        abducted_from = td
                    elif "Released on:" in th:
                        released_on = td

            # Extract status (checking multiple possible colors)
            status_div = person.find_previous(lambda tag: tag.name == "div" and 
                                              "absolute" in tag.get("class", []) and 
                                              "top-0" in tag.get("class", []) and 
                                              "right-0" in tag.get("class", []))

            if status_div:
                status = status_div.text.strip()
            else:
                status = "N/A"

            # Extract profession
            personal_info_section = person.find_next("p", class_="text-xs uppercase font-bold mb-2 mt-5")
            if personal_info_section and "Personal Information" in personal_info_section.text:
                personal_table = personal_info_section.find_next("table", class_="text-sm text-left font-medium")
                if personal_table:
                    for row in personal_table.find_all("tr"):
                        th = row.find("th").text.strip()
                        td = row.find("td").text.strip()
                        if "Profession:" in th:
                            profession = td
                            break  # Stop after finding profession

            # Save to CSV
            writer.writerow([name, abducted_on, abducted_by, abducted_from, released_on, status, profession])

print("Scraping complete. Data saved.")


Scraping complete. Data saved.


In [4]:
import pandas as pd


In [6]:
df = pd.read_csv("disappearances.csv")
df

Unnamed: 0,Name,Abducted on,Abducted by,Abducted from,Released on,Status,Profession
0,Abdul Basith Baloch,13th Jun 2024,Death Squad,"Panjgur, Balochistan",,Killed,Labor
1,Diljan Baloch,12th Jun 2024,Pakistani Forces,"Awaran, Balochistan",,Still Missing,
2,Essa Baloch,12th Jun 2024,Pakistani Forces,"Kech, Balochistan",14th Jun 2024,Released,Driver
3,Jaffar Bugti,12th Jun 2024,Pakistani Forces,"Dera Bugti, Balochistan",,Still Missing,
4,Changeiz Baloch,12th Jun 2024,Pakistani Forces,"Gwadar, Balochistan",,Still Missing,Student
...,...,...,...,...,...,...,...
960,Amira Bibi,,Pakistan Paramilitary Forces,"Barkhan, Balochistan",,Killed,
961,Statistics_37,,,,,Killed,
962,Sohail Naz,,Pakistani Forces,"Kech, Balochistan",6th Feb 2024,Released,
963,Abdul Ghaffar_2,,Pakistani Forces,"Chagai, Balochistan",,Killed,Driver


In [10]:
df = df[~df['Name'].str.contains('Statistics_', na=False)]
df

Unnamed: 0,Name,Abducted on,Abducted by,Abducted from,Released on,Status,Profession
0,Abdul Basith Baloch,13th Jun 2024,Death Squad,"Panjgur, Balochistan",,Killed,Labor
1,Diljan Baloch,12th Jun 2024,Pakistani Forces,"Awaran, Balochistan",,Still Missing,
2,Essa Baloch,12th Jun 2024,Pakistani Forces,"Kech, Balochistan",14th Jun 2024,Released,Driver
3,Jaffar Bugti,12th Jun 2024,Pakistani Forces,"Dera Bugti, Balochistan",,Still Missing,
4,Changeiz Baloch,12th Jun 2024,Pakistani Forces,"Gwadar, Balochistan",,Still Missing,Student
...,...,...,...,...,...,...,...
958,Abdul Qadir,,Pakistan Paramilitary Forces,"Barkhan, Balochistan",,Killed,
959,Muhammad Anwar,,Pakistan Paramilitary Forces,"Barkhan, Balochistan",,Killed,
960,Amira Bibi,,Pakistan Paramilitary Forces,"Barkhan, Balochistan",,Killed,
962,Sohail Naz,,Pakistani Forces,"Kech, Balochistan",6th Feb 2024,Released,


In [11]:
df['Status'].value_counts()

Status
Still Missing    497
Released         378
Killed            53
Name: count, dtype: int64

In [12]:
df.to_csv("disappearances.csv")

In [13]:
df['Profession'].value_counts()

Profession
Student                   246
Labor                     188
Driver                     16
Shopkeeper                 16
Teacher                    12
Gov. Servant               10
Doctor                      9
Shepherd                    8
Political Worker            6
Footballer                  5
Private Security Guard      5
Social Worker               5
Farmer                      4
Dispenser                   3
Tailor                      3
Shaper                      3
Businessman                 3
(Disabled)                  3
Tribal Leader               3
Engineer                    2
Muezzin                     2
Landholder                  2
Poet                        2
House wife                  2
Nurse                       1
Fisherman                   1
Levies                      1
Journalist                  1
Data operator               1
Writer                      1
Hotel man                   1
boxer                       1
Mechanic                    1