# Retrieving the json data from Ransomware.live and saving it as csv file.

In [None]:
import requests
import pandas as pd

# the list of coutries we re using
asean_countries = ['MY', 'SG', 'ID', 'TH', 'PH', 'VN', 'BN', 'MM', 'KH', 'LA']
base_url = "https://api.ransomware.live/v2/countryvictims/"

all_data = []

# retrieving from api
for code in asean_countries:
    url = base_url + code
    print(f"Fetching data for: {code}")
    response = requests.get(url)

    try:
        country_data = response.json()
        # if country is available
        if isinstance(country_data, list):
            for entry in country_data:
                entry["country_code"] = code
            all_data.extend(country_data)
        else:
            print(f"No data list returned for {code}, response: {country_data}")

    except Exception as e:
        print(f"Error fetching data for {code}: {e}")

# convert to df
df = pd.DataFrame(all_data)
df.head()


Fetching data for: MY
Fetching data for: SG
Fetching data for: ID
Fetching data for: TH
Fetching data for: PH
Fetching data for: VN
Fetching data for: BN
⚠️ No data list returned for BN, response: {'error': "No victims found for country code 'BN'."}
Fetching data for: MM
Fetching data for: KH
Fetching data for: LA


Unnamed: 0,activity,country,description,discovered,duplicates,extrainfos,group_name,post_title,post_url,published,website,country_code,modifications
0,Transportation/Logistics,MY,All data will be published online on Apr.20.\n...,2025-04-12 10:44:15.851365,[],[],qilin,MS SUPPLY CHAIN SOLUTIONS (MALAYSIA) SDN. BHD,http://ijzn3sicrcy7guixkzjkib4ukbiilwc3xhnmby4...,2025-04-12 00:00:00.000000,mscsm.com.my,MY,
1,Construction,MY,The company established its foothold in the in...,2025-04-02 15:19:27.983980,[],[],akira,Naza TTDI Sdn Bhd,,2025-04-02 00:00:00.000000,,MY,
2,Public Sector,MY,Mpaj.gov.my,2025-03-21 07:22:36.881712,[],[],babuk2,Mpaj.gov.my,http:/bxwu33iefqfc3rxigynn3ghvq4gdw3gxgxna5m4a...,2025-03-21 07:21:30.502051,Mpaj.gov.my,MY,
3,Public Sector,MY,rac.gov.my,2025-03-19 18:26:46.777294,[],[],babuk2,rac.gov.my,http:/bxwu33iefqfc3rxigynn3ghvq4gdw3gxgxna5m4a...,2025-03-19 18:25:41.107486,rac.gov.my,MY,
4,Transportation/Logistics,MY,Swift Haulage Berhad is Malaysia’s fastest gro...,2025-03-07 15:12:22.430856,[],[],akira,Swift Haulage Berhad,,2025-03-07 00:00:00.000000,,MY,


In [None]:
len(df)

367

In [None]:
df.to_csv('asean_victim_ransomware.csv')

In [None]:
# check columns name
df.columns

Index(['activity', 'country', 'description', 'discovered', 'duplicates',
       'extrainfos', 'group_name', 'post_title', 'post_url', 'published',
       'website', 'country_code', 'modifications'],
      dtype='object')

In [None]:
df.head(1)

Unnamed: 0,activity,country,description,discovered,duplicates,extrainfos,group_name,post_title,post_url,published,website,country_code,modifications
0,Transportation/Logistics,MY,All data will be published online on Apr.20.\n...,2025-04-12 10:44:15.851365,[],[],qilin,MS SUPPLY CHAIN SOLUTIONS (MALAYSIA) SDN. BHD,http://ijzn3sicrcy7guixkzjkib4ukbiilwc3xhnmby4...,2025-04-12 00:00:00.000000,mscsm.com.my,MY,


# Clean the data (date format & removing unnecessary columns)



In [None]:
# there were some null values int he column and unncssary column
def clean_ransomware_data(raw_df):
    keep = ['post_title', 'activity', 'country', 'country_code',
            'group_name', 'description', 'website', 'post_url',
            'published', 'discovered']
    df = raw_df[keep].copy()

    # making sure format is correct to be pushed in database
    df['published'] = pd.to_datetime(df['published'], errors='coerce').dt.strftime('%Y-%m-%dT%H:%M:%S')
    df['discovered'] = pd.to_datetime(df['discovered'], errors='coerce').dt.strftime('%Y-%m-%dT%H:%M:%S')
    return df


In [None]:
clean_df = clean_ransomware_data(df)

In [None]:
# clean_df.head()

Unnamed: 0,post_title,activity,country,country_code,group_name,description,website,post_url,published,discovered
0,MS SUPPLY CHAIN SOLUTIONS (MALAYSIA) SDN. BHD,Transportation/Logistics,MY,MY,qilin,All data will be published online on Apr.20.\n...,mscsm.com.my,http://ijzn3sicrcy7guixkzjkib4ukbiilwc3xhnmby4...,2025-04-12T00:00:00,2025-04-12T10:44:15


# Connecting to supabase


In [None]:
pip install supabase --quiet


In [None]:

from supabase import create_client

# key and url
SUPABASE_URL = "hush"
SUPABASE_KEY = "hush"

# connecting to sb
supabase = create_client(SUPABASE_URL, SUPABASE_KEY)




In [None]:
## dokay donerop duplicate post_urls before uploading
# clean_df = clean_df.drop_duplicates(subset=["post_url"])


In [None]:
#converting to dictionary bcs in supabase they expect dicitonary(key value)
data_to_upload = clean_df.to_dict(orient="records")

# upload data using upsert to avoid duplicates
for i in range(0, len(data_to_upload), 50):
    chunk = data_to_upload[i:i+50]
    res = supabase.table("asean_ransomware").upsert(chunk, on_conflict=["post_url"]).execute()
    print(f"Uploaded {len(chunk)} records")

In [None]:
clean_df.to_csv('asean_victim_ransomware.csv')