In [1]:
# 这个文件画地理热力图（攻击）

import pandas as pd
df = pd.read_csv("vcdb.csv", low_memory=False)

In [2]:

country_columns = [col for col in df.columns if col.startswith('actor.external.country')]
attacks_from_external_country = df[country_columns]
attacks_from_external_country.to_csv("attacks_from_external_country.csv")


In [3]:
import pandas as pd
import plotly.express as px
import pycountry
import requests


df = pd.read_csv("attacks_from_external_country.csv", index_col=0)


attack_counts = df.sum().reset_index()
attack_counts.columns = ['country_code', 'attack_count']
attack_counts['country_code'] = attack_counts['country_code'].str.replace('actor.external.country.', '')


def get_population_data():
    url = "https://restcountries.com/v3.1/all?fields=cca2,population"
    response = requests.get(url)
    data = response.json()
    return {country['cca2']: country['population'] for country in data}

population_data = get_population_data()


attack_counts['population'] = attack_counts['country_code'].map(population_data)

attack_counts['attack_density'] = (attack_counts['attack_count'] / attack_counts['population']) * 1_000_000


def code_to_iso3(code):
    try:
        return pycountry.countries.get(alpha_2=code).alpha_3
    except:
        return None

attack_counts['iso3'] = attack_counts['country_code'].apply(code_to_iso3)


valid_data = attack_counts.dropna(subset=['iso3', 'population'])

valid_data.to_csv("valid_data_a.csv")

In [4]:

from scipy.stats import zscore


valid_data = valid_data.copy()
if valid_data['attack_density'].isnull().any():
    valid_data['attack_density'].fillna(0, inplace=True)  # 填充缺失值

valid_data.loc[:, 'attack_density_zscore'] = zscore(valid_data['attack_density'])
valid_data.loc[:, 'attack_density_zscore_clipped'] = valid_data['attack_density_zscore'].clip(-3, 3)
valid_data.attack_density_zscore.head()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  valid_data['attack_density'].fillna(0, inplace=True)  # 填充缺失值


0   -0.143900
1   -0.143900
2   -0.115615
3   -0.143900
4   -0.143900
Name: attack_density_zscore, dtype: float64

In [25]:
fig = px.choropleth(
    valid_data,
    locations="iso3",
    color="attack_density_zscore_clipped",
    hover_name="country_code",
    hover_data={
        'attack_count': True,
        'population': True,
        'attack_density': ':.2f',
        'iso3': False
    },
    color_continuous_scale=px.colors.diverging.RdYlGn_r,
    color_continuous_midpoint=0,
    labels={'attack_density_zscore_clipped': 'Attack Density Z-Score'},
    title="<b>Global Cyber Attack Density</b><br><sup>Normalized by Z-Score</sup>",
    range_color=[-0.1, 3.1],
    projection="natural earth"
)

fig.update_layout(
    margin={"r": 20, "t": 80, "l": 20, "b": 20},
    coloraxis_colorbar=dict(
        title="Z-Score",
        thickness=15,
        lenmode="pixels",
        len=300,
        yanchor="middle",
        y=0.5,
        ticksuffix=" σ",
       # dtick=1
    ),
    geo=dict(
        showframe=True,
        showcoastlines=True,
        coastlinecolor="rgba(0,0,0,0.5)",
        coastlinewidth=0.8,
        landcolor="rgba(240,240,240,1)",
        projection=dict(
            type="natural earth",
            rotation_lon=10
        ),
        lakecolor="white",
        oceancolor="white"
    ),
    title={
        'text': "<b>Global Cyber Attack Density</b><br><sup>Normalized by Z-Score</sup>",
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': {'size': 20}
    },
    font=dict(
        family="Arial",
        size=12,
        color="#404040"
    ),
    plot_bgcolor="white",
    paper_bgcolor="white",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12,
        font_family="Arial"
    )
)

fig.update_geos(
    showocean=True,
    oceancolor="white",
    lakecolor="white",
    bgcolor="white",
    showcountries=True,
    countrycolor="rgba(0,0,0,0.2)",
    countrywidth=0.5
)

fig.add_annotation(
    x=0.5,
    y=-0.1,
    text="Source: Your Data Source | Visualization by Your Team",
    showarrow=False,
    xref="paper",
    yref="paper",
    font=dict(size=10, color="#666666")
)

fig.show()

In [30]:
import plotly.express as px


valid_data = valid_data.copy()

# 分位数归一化函数
def quantile_normalize(series):
    rank = series.rank(method='average')
    normalized = rank / len(series)
    return normalized

valid_data.loc[:, 'attack_density_normalized'] = quantile_normalize(valid_data['attack_density'])

# 创建热力
fig = px.choropleth(
    valid_data,
    locations="iso3",
    color="attack_density_normalized",
    hover_name="country_code",
    hover_data={
        'attack_count': True,
        'population': True,
        'attack_density': ':.2f',
        'iso3': False
    },
    color_continuous_scale=px.colors.diverging.RdYlGn_r,
    color_continuous_midpoint=0.5,
 #   labels={'attack_density_normalized': 'Normalized Attack Density'},
    title="<b>Cyber Attack Density by Country</b><br><sup>Quantile Normalization</sup>",
    range_color=[0.3, 1],  # 归一化范围
    projection="natural earth"
)


fig.update_layout(
    margin={"r": 20, "t": 80, "l": 20, "b": 20},
    coloraxis_colorbar=dict(
        title="Percentage",
        thickness=15,
        lenmode="pixels",
        len=300,
        yanchor="middle",
        y=0.5,
        #dtick=0.25,
        tickvals=[0, 0.25, 0.5, 0.75, 1],

    ),
    geo=dict(
        showframe=True,
        showcoastlines=True,
        coastlinecolor="rgba(0,0,0,0.5)",
        coastlinewidth=0.8,
        landcolor="rgba(240,240,240,1)",
        projection=dict(
            type="natural earth",
            rotation_lon=10
        ),
        lakecolor="white",
        oceancolor="white"
    ),
    title={
        'text': "<b>Global Cyber Attack Density</b><br><sup>Quantile Normalization</sup>",
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': {'size': 20}
    },
    font=dict(
        family="Arial",
        size=12,
        color="#404040"
    ),
    plot_bgcolor="white",
    paper_bgcolor="white",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12,
        font_family="Arial"
    )
)


fig.update_geos(
    showocean=True,
    oceancolor="white",
    lakecolor="white",
    bgcolor="white",
    showcountries=True,
    countrycolor="rgba(0,0,0,0.2)",
    countrywidth=0.5
)

fig.add_annotation(
    x=0.5,
    y=-0.1,
    text="Source: Your Data Source | Visualization by Your Team",
    showarrow=False,
    xref="paper",
    yref="paper",
    font=dict(size=10, color="#666666")
)

fig.show()