In [None]:
!pip install networkx
!pip install pandas scikit-learn

In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from itertools import combinations

# ƒê·ªçc dataset
file_path = 'D:/encoded_dataset.csv'  # ƒê·∫∑t ƒë∆∞·ªùng d·∫´n dataset c·ªßa b·∫°n
df = pd.read_csv(file_path)

df = df.sample(2000) # L·∫•y ng·∫´u nhi√™n 2000 d√≤ng d·ªØ li·ªáu

# Kh·ªüi t·∫°o ƒë·ªì th·ªã
G = nx.Graph()

# Th√™m n√∫t cho m·ªói v·ª• tai n·∫°n (d·ª±a tr√™n index)
for index, row in df.iterrows():
    G.add_node(index,
               traffic_control_device=row['traffic_control_device'],
               weather_condition=row['weather_condition'],
               lighting_condition=row['lighting_condition'],
               first_crash_type=row['first_crash_type'],
               trafficway_type=row['trafficway_type'],
               alignment=row['alignment'],
               roadway_surface_cond=row['roadway_surface_cond'],
               road_defect=row['road_defect'],
               crash_type=row['crash_type'],
               intersection_related_i=row['intersection_related_i'],
               damage=row['damage'],
               prim_contributory_cause=row['prim_contributory_cause'],
               num_units=row['num_units'],
               most_severe_injury=row['most_severe_injury'],
               injuries_total=row['injuries_total'],
               injuries_fatal=row['injuries_fatal'],
               injuries_incapacitating=row['injuries_incapacitating'],
               injuries_non_incapacitating=row['injuries_non_incapacitating'],
               injuries_reported_not_evident=row['injuries_reported_not_evident'],
               injuries_no_indication=row['injuries_no_indication'],
               crash_hour=row['crash_hour'],
               crash_day_of_week=row['crash_day_of_week'],
               crash_month=row['crash_month'])

# H√†m ki·ªÉm tra ƒëi·ªÅu ki·ªán k·∫øt n·ªëi gi·ªØa hai v·ª• tai n·∫°n
def is_similar(accident1, accident2):
    # C√°c ƒëi·ªÅu ki·ªán t∆∞∆°ng t·ª± d·ª±a tr√™n c√°c ƒë·∫∑c tr∆∞ng quan tr·ªçng
    time_diff = abs(accident1['crash_hour'] - accident2['crash_hour']) <= 1
    same_month = accident1['crash_month'] == accident2['crash_month']
    same_day_of_week = accident1['crash_day_of_week'] == accident2['crash_day_of_week']
    same_trafficway = accident1['trafficway_type'] == accident2['trafficway_type']
    same_crash_type = accident1['first_crash_type'] == accident2['first_crash_type']
    same_injury_no_indication = accident1['injuries_no_indication'] == accident2['injuries_no_indication']

    # K·∫øt n·ªëi n·∫øu √≠t nh·∫•t m·ªôt ƒëi·ªÅu ki·ªán t∆∞∆°ng t·ª±
    return (time_diff or same_month or same_day_of_week or same_trafficway or
            same_crash_type or same_injury_no_indication)

# Th√™m c√°c c·∫°nh d·ª±a tr√™n t√≠nh t∆∞∆°ng ƒë·ªìng
for u, v in combinations(G.nodes(data=True), 2):
    if is_similar(u[1], v[1]):
        G.add_edge(u[0], v[0])

# V·∫Ω ƒë·ªì th·ªã
plt.figure(figsize=(14, 14))
pos = nx.spring_layout(G, k=0.3)  # ƒêi·ªÅu ch·ªânh k ƒë·ªÉ t·ªëi ∆∞u b·ªë c·ª•c
nx.draw(G, pos, with_labels=False, node_color='lightblue', edge_color='gray', node_size=50, font_size=8)
plt.title("Traffic Accident Graph (Nodes as Accidents)")
plt.show()

In [None]:
import networkx as nx
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Gi·∫£ s·ª≠ ƒë·ªì th·ªã G ƒë√£ ƒë∆∞·ª£c t·∫°o t·ª´ tr∆∞·ªõc
print("üîé S·ªë l∆∞·ª£ng n√∫t ban ƒë·∫ßu:", G.number_of_nodes())

# L·∫•y danh s√°ch c√°c n√∫t
nodes = list(G.nodes)

# Chia t·∫≠p hu·∫•n luy·ªán (70%) v√† t·∫≠p c√≤n l·∫°i (30%)
train_nodes, remaining_nodes = train_test_split(
    nodes, test_size=0.3, random_state=42
)

# Chia ti·∫øp t·∫≠p c√≤n l·∫°i th√†nh ki·ªÉm tra (20%) v√† x√°c th·ª±c (10%)
test_nodes, val_nodes = train_test_split(
    remaining_nodes, test_size=0.333, random_state=42
)  # 0.333 * 30% = 10%

print(f"‚úÖ S·ªë l∆∞·ª£ng n√∫t - Train: {len(train_nodes)}, Test: {len(test_nodes)}, Val: {len(val_nodes)}")

# T·∫°o ƒë·ªì th·ªã con cho t·ª´ng t·∫≠p
G_train = G.subgraph(train_nodes).copy()
G_test = G.subgraph(test_nodes).copy()
G_val = G.subgraph(val_nodes).copy()

# V·∫Ω t·∫≠p hu·∫•n luy·ªán
plt.figure(figsize=(12, 12))
pos = nx.spring_layout(G_train)
nx.draw(G_train, pos, with_labels=False, node_color='lightgreen', 
        edge_color='blue', width=2, node_size=50, font_size=10)
plt.title("ƒê·ªì th·ªã - T·∫≠p Hu·∫•n Luy·ªán")
plt.show()

# V·∫Ω t·∫≠p ki·ªÉm tra
plt.figure(figsize=(12, 12))
pos = nx.spring_layout(G_test)
nx.draw(G_test, pos, with_labels=False, node_color='lightcoral', 
        edge_color='purple', width=2, node_size=50, font_size=10)
plt.title("ƒê·ªì th·ªã - T·∫≠p Ki·ªÉm Tra")
plt.show()

# V·∫Ω t·∫≠p x√°c th·ª±c
plt.figure(figsize=(12, 12))
pos = nx.spring_layout(G_val)
nx.draw(G_val, pos, with_labels=False, node_color='lightblue', 
        edge_color='green', width=2, node_size=50, font_size=10)
plt.title("ƒê·ªì th·ªã - T·∫≠p X√°c Th·ª±c")
plt.show()