In [23]:
from os import path as osp

import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd

# from nxviz.plots import CircosPlot

# curr_path = osp.dirname(osp.abspath('__file__'))
curr_path = "/home/sofia/lase/repo2/LASE/data/dataset/moreno_crime"

def load_crime_network():
    df = pd.read_csv(
        curr_path + "/out.moreno_crime_crime",
        sep=" ",
        skiprows=2,
        header=None,
    )
    df = df[[0, 1]]
    df.columns = ["personID", "crimeID"]
    df.index += 1

    # Read in the role metadata
    roles = pd.read_csv(
        curr_path + "/rel.moreno_crime_crime.person.role",
        header=None,
    )
    roles.columns = ["roles"]
    roles.index += 1

    # Add the edge data to the graph.
    G = nx.Graph()
    for r, d in df.join(roles).iterrows():
        pid = "p{0}".format(d["personID"])  # pid stands for "Person I.D."
        cid = "c{0}".format(d["crimeID"])  # cid stands for "Crime I.D."
        G.add_node(pid, id = pid, type="person")
        G.add_node(cid, id= cid, type="crime")
        G.add_edge(pid, cid, role=d["roles"])

    # Read in the gender metadata
    gender = pd.read_csv(
        curr_path + "/ent.moreno_crime_crime.person.sex",
        header=None,
    )
    gender.index += 1
    for n, gender_code in gender.iterrows():
        nodeid = "p{0}".format(n)
        G.nodes[nodeid]["gender"] = gender_code[0]

    return G

In [24]:
G = load_crime_network()
G = G.to_undirected()

# rename nodes
mapping = {node: i for i, node in enumerate(G.nodes())}
_G = nx.relabel_nodes(G, mapping)


for node, data in _G.nodes(data=True):
    print(node, data)

0 {'id': 'p1', 'type': 'person', 'gender': 1}
1 {'id': 'c1', 'type': 'crime'}
2 {'id': 'c2', 'type': 'crime'}
3 {'id': 'c3', 'type': 'crime'}
4 {'id': 'c4', 'type': 'crime'}
5 {'id': 'p2', 'type': 'person', 'gender': 1}
6 {'id': 'c5', 'type': 'crime'}
7 {'id': 'c6', 'type': 'crime'}
8 {'id': 'c7', 'type': 'crime'}
9 {'id': 'c8', 'type': 'crime'}
10 {'id': 'c9', 'type': 'crime'}
11 {'id': 'c10', 'type': 'crime'}
12 {'id': 'c11', 'type': 'crime'}
13 {'id': 'c12', 'type': 'crime'}
14 {'id': 'c13', 'type': 'crime'}
15 {'id': 'c14', 'type': 'crime'}
16 {'id': 'c15', 'type': 'crime'}
17 {'id': 'c16', 'type': 'crime'}
18 {'id': 'c17', 'type': 'crime'}
19 {'id': 'c18', 'type': 'crime'}
20 {'id': 'c19', 'type': 'crime'}
21 {'id': 'c20', 'type': 'crime'}
22 {'id': 'c21', 'type': 'crime'}
23 {'id': 'c22', 'type': 'crime'}
24 {'id': 'c23', 'type': 'crime'}
25 {'id': 'c24', 'type': 'crime'}
26 {'id': 'c25', 'type': 'crime'}
27 {'id': 'c26', 'type': 'crime'}
28 {'id': 'p3', 'type': 'person', 'gender

In [15]:
adj_matrix = nx.adjacency_matrix(G).todense().astype(int)

In [None]:

# rename nodes
mapping = {node: i for i, node in enumerate(G.nodes())}
G_ = nx.relabel_nodes(G, mapping)

country_indexes = []
res_indexes = []
features = []

for node, data in G_.nodes(data=True):
    if data["type"] == "country":
        country_indexes.append(node)
        features.append(data['country_features'])
    else:
        res_indexes.append(node)
        features.append(data['res_features'])


unknown_edges = []

for u, v, data in G_.edges(data=True):
    if data['unknown']:
        unknown_edges.append((u,v))
        unknown_edges.append((v,u))
    

adj_matrix = nx.adjacency_matrix(G).todense().astype(int)

len(country_indexes), len(res_indexes), adj_matrix.shape[0]

In [13]:
for node, data in G.nodes(data=True):
    print(node, data)

p1 {'bipartite': 'person', 'gender': 1}
c1 {'bipartite': 'crime'}
c2 {'bipartite': 'crime'}
c3 {'bipartite': 'crime'}
c4 {'bipartite': 'crime'}
p2 {'bipartite': 'person', 'gender': 1}
c5 {'bipartite': 'crime'}
c6 {'bipartite': 'crime'}
c7 {'bipartite': 'crime'}
c8 {'bipartite': 'crime'}
c9 {'bipartite': 'crime'}
c10 {'bipartite': 'crime'}
c11 {'bipartite': 'crime'}
c12 {'bipartite': 'crime'}
c13 {'bipartite': 'crime'}
c14 {'bipartite': 'crime'}
c15 {'bipartite': 'crime'}
c16 {'bipartite': 'crime'}
c17 {'bipartite': 'crime'}
c18 {'bipartite': 'crime'}
c19 {'bipartite': 'crime'}
c20 {'bipartite': 'crime'}
c21 {'bipartite': 'crime'}
c22 {'bipartite': 'crime'}
c23 {'bipartite': 'crime'}
c24 {'bipartite': 'crime'}
c25 {'bipartite': 'crime'}
c26 {'bipartite': 'crime'}
p3 {'bipartite': 'person', 'gender': 1}
c27 {'bipartite': 'crime'}
p4 {'bipartite': 'person', 'gender': 1}
p5 {'bipartite': 'person', 'gender': 1}
p6 {'bipartite': 'person', 'gender': 1}
c28 {'bipartite': 'crime'}
p7 {'bipartit

In [None]:



def load_crime_network():
    df = pd.read_csv(
        curr_path + "data/dataset/moreno_crime/out.moreno_crime_crime",
        sep=" ",
        skiprows=2,
        header=None,
    )
    df = df[[0, 1]]
    df.columns = ["personID", "crimeID"]
    df.index += 1

    # Read in the role metadata
    roles = pd.read_csv(
        curr_path + "data/dataset/moreno_crime/rel.moreno_crime_crime.person.role",
        header=None,
    )
    roles.columns = ["roles"]
    roles.index += 1

    # Add the edge data to the graph.
    G = nx.Graph()
    for r, d in df.join(roles).iterrows():
        pid = "p{0}".format(d["personID"])  # pid stands for "Person I.D."
        cid = "c{0}".format(d["crimeID"])  # cid stands for "Crime I.D."
        G.add_node(pid, bipartite="person")
        G.add_node(cid, bipartite="crime")
        G.add_edge(pid, cid, role=d["roles"])

    # Read in the gender metadata
    gender = pd.read_csv(
        curr_path + "data/dataset/moreno_crime/ent.moreno_crime_crime.person.sex",
        header=None,
    )
    gender.index += 1
    for n, gender_code in gender.iterrows():
        nodeid = "p{0}".format(n)
        G.node[nodeid]["gender"] = gender_code[0]

    return G




# Annotate each node with connectivity score
for n in G.nodes():
    dcs = nx.degree_centrality(G)
    G.node[n]["connectivity"] = dcs[n]

# Make a CircosPlot of the bipartite graph
# c = CircosPlot(
#     G,
#     node_grouping="bipartite",
#     node_order="connectivity",
#     node_color="bipartite",
# )
# c.draw()


# Make the "people" projection of the bipartite graph.
person_nodes = [n for n in G.nodes() if G.node[n]["bipartite"] == "person"]
pG = nx.bipartite.projection.projected_graph(G, person_nodes)

for n in pG.nodes():
    dcs = nx.degree_centrality(pG)
    pG.node[n]["connectivity"] = dcs[n]

# c = CircosPlot(
#     pG, node_grouping="gender", node_order="connectivity", node_color="gender"
# )
# c.draw()
# plt.show()

In [1]:
import pandas as pd

# Load the Moreno Crime dataset into a DataFrame
url = "https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-09/moreno_crime.csv"
crime_df = pd.read_csv(url)

# Display the first few rows of the DataFrame
print(crime_df.head())

# Check for missing values
print(crime_df.isnull().sum())

# Get basic statistics
print(crime_df.describe())

# Visualize the distribution of crime types
crime_df['crime_type'].value_counts().plot(kind='bar', figsize=(10, 6), title='Distribution of Crime Types')
plt.xlabel('Crime Type')
plt.ylabel('Frequency')
plt.show()


HTTPError: HTTP Error 404: Not Found