In [None]:
#COMPLEX SCIENCE SYSTEM APPLICATION
import pandas as pd
import networkx as nx

df = pd.read_csv('/Corruption_Dataset.csv')

G = nx.Graph()

# Add nodes and edges from the DataFrame
for _, row in df.iterrows():
    G.add_node(row['SourceID'], type=row['SourceType'], sector=row['SourceSector'])
    G.add_node(row['TargetID'])
    G.add_edge(row['SourceID'], row['TargetID'], relationship=row['RelationshipType'],
               amount=row['TransactionAmount'], date=row['TransactionDate'])

# Calculate basic network metrics
num_nodes = G.number_of_nodes()
num_edges = G.number_of_edges()
avg_degree = sum(dict(G.degree()).values()) / num_nodes
density = nx.density(G)
clustering_coefficient = nx.average_clustering(G)

metrics_summary = {
    "Number of Nodes": num_nodes,
    "Number of Edges": num_edges,
    "Average Degree": avg_degree,
    "Network Density": density,
    "Average Clustering Coefficient": clustering_coefficient
}

metrics_summary

In [None]:
#VIRTUALIZATION OF THE ABOVE
import matplotlib.pyplot as plt

# To manage the complexity, we limit the visualization to a subset of the network
H = G.subgraph(list(G.nodes)[:100])

plt.figure(figsize=(12, 8))
nx.draw(H, with_labels=True, node_color='lightblue', edge_color='gray',
        node_size=50, font_size=8)
plt.title("Subset of Corruption Network Visualization")
plt.show()

In [None]:
#NETWORK SCIENCE SYSTEM APPLICATION
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
from networkx.algorithms import community

df = pd.read_csv('/Corruption_Dataset.csv')

edges = [(row['SourceID'], row['TargetID']) for _, row in df.iterrows()]
G = nx.Graph()
G.add_edges_from(edges)

degree_centrality = nx.degree_centrality(G)

communities = community.label_propagation_communities(G)
community_map = {node: cid for cid, community in enumerate(communities) for node in community}

# Visualization
plt.figure(figsize=(12, 8))
pos = nx.spring_layout(G, seed=42)
colors = [community_map[node] for node in G.nodes()]
nx.draw_networkx_nodes(G, pos, node_color=colors, node_size=20, alpha=0.8)
nx.draw_networkx_edges(G, pos, alpha=0.5)
plt.title("Network Visualization with Label Propagation Community Detection")
plt.show()

In [None]:
#EMPERICAL APPROACH APPLICATION
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('/Corruption_Dataset.csv')

transaction_stats = df['TransactionAmount'].describe()

# Frequency Analysis
relationship_type_counts = df['RelationshipType'].value_counts()
sector_counts = df['SourceSector'].value_counts()
role_counts = df['SourceType'].value_counts()

# Temporal Analysis
df['TransactionDate'] = pd.to_datetime(df['TransactionDate'], errors='coerce')
transactions_over_time = df['TransactionDate'].value_counts().sort_index()

# Visualizing the trends in transactions over time
plt.figure(figsize=(12, 8))
transactions_over_time.plot(kind='line')
plt.title('Trend of Transactions Over Time')
plt.xlabel('Date')
plt.ylabel('Number of Transactions')
plt.show()

# Outputting the results
print("Transaction Statistics:\n", transaction_stats)
print("\nRelationship Type Counts:\n", relationship_type_counts)
print("\nSector Counts:\n", sector_counts)
print("\nRole Counts:\n", role_counts)

In [None]:
#CASE STUDY
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

df = pd.read_csv('/Corruption_Dataset.csv')

# Adjust the threshold and sector criteria
high_value_threshold = df['TransactionAmount'].quantile(0.85)
expanded_suspect_sectors = ['Social Program', 'Non-Profit', 'Government']

# Filter the dataset again with the new criteria
suspect_transactions = df[(df['TransactionAmount'] >= high_value_threshold) &
                              (df['SourceSector'].isin(expanded_suspect_sectors))]


if not suspect_transactions.empty:
    # Create a subgraph for the suspect transactions
    G_suspect = nx.from_pandas_edgelist(suspect_transactions, 'SourceID', 'TargetID',
                                        edge_attr=True)

    if len(G_suspect) > 0:
        # Find communities within the suspect subgraph
        communities_suspect = list(nx.algorithms.community.greedy_modularity_communities(G_suspect))

        # Convert the 'TransactionDate' to datetime
        suspect_transactions['TransactionDate'] = pd.to_datetime(suspect_transactions['TransactionDate'])
        # Group by month and sum the transaction amounts
        transactions_by_month = suspect_transactions.groupby(suspect_transactions['TransactionDate'].dt.to_period('M'))['TransactionAmount'].sum()

        # Plot the high-value suspect transactions over time
        plt.figure(figsize=(14, 7))
        ax = transactions_by_month.plot(kind='bar', color='skyblue', title='High-Value Suspect Transactions Over Time')
        ax.set_xlabel('Month')
        ax.set_ylabel('Total Transaction Amount')
        ax.xaxis.set_major_locator(MaxNLocator(integer=True))
        plt.xticks(rotation=90)
        plt.tight_layout()
        plt.show()
    else:
        print("The suspect subgraph is empty. Adjust your criteria and try again.")
else:
    print("No suspect transactions were found based on the criteria.")


In [None]:
# To create a table of network metrics decomposed by the 'SourceType' attribute as a layer of information,
# we will first need to group the data by 'SourceType' and then calculate the network metrics for each group.

import pandas as pd
import networkx as nx
import numpy as np

# Load the hypothetical dataset
df = pd.read_csv('/Corruption_Dataset.csv')

# This function will calculate the required metrics for a given graph
def calculate_network_metrics(G):
    if G.number_of_nodes() == 0:
        # Return None for all metrics if the graph is empty
        return (None, None, None, None, None, None)
    # Calculate metrics
    density = nx.density(G)
    avg_degree = np.mean([degree for node, degree in G.degree()])
    clustering_coefficient = nx.average_clustering(G)
    num_connected_components = nx.number_connected_components(G)
    # Diameter and average path length can only be calculated for the largest connected component
    if nx.is_connected(G):
        diameter = nx.diameter(G)
        avg_path_length = nx.average_shortest_path_length(G)
    else:
        # Use the largest connected component for the calculations
        largest_cc = max(nx.connected_components(G), key=len)
        subgraph = G.subgraph(largest_cc)
        diameter = nx.diameter(subgraph)
        avg_path_length = nx.average_shortest_path_length(subgraph)
    return (density, avg_degree, clustering_coefficient, num_connected_components, diameter, avg_path_length)

# Initialize an empty DataFrame to store the metrics
metrics_columns = ['Layer', 'Total Nodes', 'Total Edges', 'Density', 'Average Degree', 'Clustering Coefficient', 'Connected Components', 'Diameter', 'Average Path Length']
network_metrics_df = pd.DataFrame(columns=metrics_columns)

# Iterate over each unique 'SourceType' to create subgraphs and calculate metrics
for source_type in df['SourceType'].unique():
    # Filter the DataFrame for the current source type
    sub_df = df[df['SourceType'] == source_type]
    # Create the graph
    sub_G = nx.from_pandas_edgelist(sub_df, 'SourceID', 'TargetID')
    # Calculate metrics
    density, avg_degree, clustering_coefficient, num_connected_components, diameter, avg_path_length = calculate_network_metrics(sub_G)
    # Append the metrics to the DataFrame
    network_metrics_df = network_metrics_df.append({
        'Layer': source_type,
        'Total Nodes': sub_G.number_of_nodes(),
        'Total Edges': sub_G.number_of_edges(),
        'Density': density,
        'Average Degree': avg_degree,
        'Clustering Coefficient': clustering_coefficient,
        'Connected Components': num_connected_components,
        'Diameter': diameter,
        'Average Path Length': avg_path_length
    }, ignore_index=True)

# Reorder the DataFrame to match the order in the screenshot
network_metrics_df = network_metrics_df[['Layer', 'Total Nodes', 'Total Edges', 'Density', 'Diameter', 'Average Path Length', 'Average Degree', 'Clustering Coefficient', 'Connected Components']]

network_metrics_df