In [1]:
import pandas as pd
import random
from datetime import timedelta, datetime

def generate_transactions(num_lines, start_date, end_date, company_id_range=(1000, 9999), num_preferred_partners=5, save=False, file_name='transactions.csv'):
    # Convert start and end dates to datetime objects
    start_date = datetime.strptime(start_date, '%Y-%m')
    end_date = datetime.strptime(end_date, '%Y-%m')
    
    # Create empty DataFrame with desired columns
    columns = ['year-month', 'origin_company_id', 'destiny_company_id', 'transaction_quantity', 'transaction_value']
    transactions = pd.DataFrame(columns=columns)

    # Dictionary to store preferred partners for companies
    preferred_partners = {}

    # Generate transactions
    for i in range(num_lines):
        # Generate random date within the range
        random_date = start_date + (end_date - start_date) * random.random()
        year_month = random_date.strftime('%Y-%m')

        # Generate random origin company ID
        origin_company_id = random.randint(*company_id_range)

        # Select destiny company ID based on preferential attachment
        if origin_company_id in preferred_partners and random.random() < 0.7: # 70% chance to select a preferred partner
            destiny_company_id = random.choice(preferred_partners[origin_company_id])
        else:
            destiny_company_id = random.randint(*company_id_range)
            # Update preferred partners
            preferred_partners.setdefault(origin_company_id, []).append(destiny_company_id)
            if len(preferred_partners[origin_company_id]) > num_preferred_partners:
                preferred_partners[origin_company_id].pop(0)

        # Generate transaction quantity and value, with some correlation
        transaction_quantity = random.randint(1, 10)
        transaction_value = transaction_quantity * random.uniform(10, 1000)

        # Append transaction to DataFrame
        transactions.loc[i] = [year_month, origin_company_id, destiny_company_id, transaction_quantity, transaction_value]

    # Optionally save to CSV
    if save:
        transactions.to_csv(file_name, index=False)
    
    return transactions

# Example usage
transactions = generate_transactions(20000, '2020-01', '2023-08', company_id_range=(1000, 1070), save=True)
print(transactions.head())

  year-month origin_company_id destiny_company_id transaction_quantity  \
0    2020-09              1014               1045                   10   
1    2020-06              1025               1036                    2   
2    2021-06              1064               1028                    7   
3    2020-12              1046               1067                    8   
4    2023-07              1042               1030                    5   

   transaction_value  
0         896.510463  
1        1009.190912  
2        5263.620785  
3        3726.107144  
4        1928.224098  


In [2]:
import pandas as pd

def generate_financial_graphs(transactions, save=False, file_name_prefix='financial_graph'):
    # List to store financial graphs for each month
    monthly_graphs = []

    # Extract unique year-months from the transactions
    unique_months = transactions['year-month'].unique()

    for year_month in unique_months:
        # Filter transactions for the current month
        transactions_month = transactions[transactions['year-month'] == year_month]

        # Origin (Supplier) summary
        origin_summary = transactions_month.groupby('origin_company_id').agg(
            total_value_sent=('transaction_value', 'sum'),
            total_quantity_sent=('transaction_quantity', 'sum'),
            num_transactions_sent=('transaction_value', 'count')
        ).reset_index()

        # Destiny (Client) summary
        destiny_summary = transactions_month.groupby('destiny_company_id').agg(
            total_value_received=('transaction_value', 'sum'),
            total_quantity_received=('transaction_quantity', 'sum')
        ).reset_index()

        # Merging the transactions with the summaries to compute the percentages
        merged = pd.merge(transactions_month, origin_summary, on='origin_company_id', how='left')
        merged = pd.merge(merged, destiny_summary, on='destiny_company_id', how='left')

        # Calculating the required percentages
        merged['suppliersview_percent_value_sent'] = (merged['transaction_value'] / merged['total_value_sent']) * 100
        merged['suppliersview_percent_quantity_sent'] = (1 / merged['num_transactions_sent']) * 100
        merged['clientview_percent_value_received'] = (merged['transaction_value'] / merged['total_value_received']) * 100
        merged['clientview_percent_quantity_received'] = (1 / merged['total_quantity_received']) * 100

        # Aggregating by origin and destiny company IDs
        financial_graph = merged.groupby(['origin_company_id', 'destiny_company_id']).agg(
            transaction_volume=('transaction_value', 'sum'),
            num_transactions=('transaction_value', 'count'),
            suppliersview_percent_value_sent=('suppliersview_percent_value_sent', 'mean'),
            suppliersview_percent_quantity_sent=('suppliersview_percent_quantity_sent', 'mean'),
            clientview_percent_value_received=('clientview_percent_value_received', 'mean'),
            clientview_percent_quantity_received=('clientview_percent_quantity_received', 'mean')
        ).reset_index()

        # Optionally save to CSV for the current month
        if save:
            file_name = f"{file_name_prefix}_{year_month}.csv"
            financial_graph.to_csv(file_name, index=False)

        # Append the financial graph for the current month to the list
        monthly_graphs.append(financial_graph)

    return monthly_graphs

# Example usage
financial_graphs = generate_financial_graphs(transactions, save=True)
# The first financial graph (for the first month)
print(financial_graphs[0].head())

   origin_company_id  destiny_company_id  transaction_volume  \
0               1000                1000         2879.589388   
1               1000                1006         6969.061909   
2               1000                1025         3282.508771   
3               1000                1039         5795.386128   
4               1000                1049         3250.497172   

   num_transactions  suppliersview_percent_value_sent  \
0                 1                          9.203022   
1                 1                         22.272769   
2                 1                         10.490732   
3                 1                         18.521761   
4                 1                         10.388425   

   suppliersview_percent_quantity_sent  clientview_percent_value_received  \
0                            14.285714                          14.556453   
1                            14.285714                          21.647845   
2                            14.285714   

In [5]:
from igraph import Graph
import pandas as pd
import random

def generate_subgraph(patient_zero=None, depth_limit=2, year_month='2026-03'):
    # Read the graph table
    file_name = f"financial_graph_{year_month}.csv"
    graph_table = pd.read_csv(file_name)
    
    # Create the full graph from the table
    full_graph = Graph.TupleList(edges=graph_table[['origin_company_id', 'destiny_company_id']].itertuples(index=False), directed=True)

    # Get a random patient_zero if not provided
    if patient_zero is None:
        patient_zero = random.choice(full_graph.vs)['name']

    # Get the vertex ID for patient zero
    patient_zero_vertex = full_graph.vs.find(name=str(patient_zero))

    # Perform BFS to get vertices within the depth limit
    subgraph_vertices = set()
    bfs_result = full_graph.bfsiter(patient_zero_vertex, advanced=True)
    for vertex, edge, depth in bfs_result:
        if depth > depth_limit:
            break
        subgraph_vertices.add(vertex.index)

    # Create the subgraph
    subgraph = full_graph.subgraph(subgraph_vertices)
    
    return subgraph

# Example usage with random patient_zero
subgraph = generate_subgraph()

FileNotFoundError: [Errno 2] No such file or directory: 'financial_graph_2026-03.csv'

In [6]:
from igraph import Graph
import pandas as pd

def generate_subgraph(patient_zero, depth_limit=2):
    # Read the graph table
    file_name = "financial_graph_2023-06.csv"  # Adjust the file name accordingly
    graph_table = pd.read_csv(file_name)
    
    # Create the full graph from the table
    full_graph = Graph.TupleList(edges=graph_table[['origin_company_id', 'destiny_company_id']].itertuples(index=False), directed=True)

    # Get the vertex ID for patient zero
    patient_zero_vertex = full_graph.vs.find(name=str(patient_zero))

    # Perform BFS to get vertices within the depth limit
    bfs_result = full_graph.bfsiter(patient_zero_vertex, advanced=True)

    # Extract the vertices and edges within the depth limit
    subgraph_vertices = set()
    subgraph_edges = []
    for vertex, edge, depth in bfs_result:
        if depth > depth_limit:
            break
        subgraph_vertices.add(vertex.index)
        if edge is not None:
            subgraph_edges.append(edge.index)

    # Create the subgraph
    subgraph = full_graph.subgraph(subgraph_vertices)
    
    return subgraph

# Example usage with patient_zero as the given node ID
patient_zero = "1000"
subgraph = generate_subgraph()

TypeError: generate_subgraph() missing 1 required positional argument: 'patient_zero'

In [8]:
from igraph import Graph
import pandas as pd
import random

def generate_subgraph(patient_zero=None, depth_limit=2, year_month='2023-06'):
    # Read the graph table
    file_name = f"financial_graph_{year_month}.csv"
    graph_table = pd.read_csv(file_name)
    
    # Create the full graph from the table
    full_graph = Graph.TupleList(edges=graph_table[['origin_company_id', 'destiny_company_id']].itertuples(index=False), directed=True)

    # Get a random patient_zero if not provided
    if patient_zero is None:
        patient_zero = random.choice(full_graph.vs)['name']

    # Get the vertex ID for patient zero
    patient_zero_vertex = full_graph.vs.find(name=str(patient_zero))

    # Perform BFS to get vertices within the depth limit
    subgraph_vertices = set()
    bfs_result = full_graph.bfsiter(patient_zero_vertex, advanced=True)
    for vertex, edge, depth in bfs_result:
        if depth > depth_limit:
            break
        subgraph_vertices.add(vertex.index)

    # Create the subgraph
    subgraph = full_graph.subgraph(subgraph_vertices)
    
    return subgraph

# Example usage with random patient_zero
subgraph = generate_subgraph()

ValueError: no such vertex: '1026'

In [11]:
file_name = f"financial_graph_2023-06.csv"
graph_table = pd.read_csv(file_name)

In [12]:
graph_table

Unnamed: 0,origin_company_id,destiny_company_id,transaction_volume,num_transactions,suppliersview_percent_value_sent,suppliersview_percent_quantity_sent,clientview_percent_value_received,clientview_percent_quantity_received
0,1000,1007,4578.298646,1,18.499527,10.000000,18.113172,2.222222
1,1000,1021,816.294246,1,3.298399,10.000000,2.965248,2.173913
2,1000,1045,1726.359133,1,6.975698,10.000000,12.669352,2.500000
3,1000,1047,840.946430,1,3.398011,10.000000,6.498775,3.846154
4,1000,1053,2543.714638,1,10.278385,10.000000,10.932635,1.886792
...,...,...,...,...,...,...,...,...
430,1069,1069,6729.305389,1,20.941136,11.111111,20.368983,1.851852
431,1070,1041,2639.151311,1,16.694381,25.000000,10.182909,2.325581
432,1070,1052,1222.214601,1,7.731317,25.000000,3.919300,1.724138
433,1070,1069,9390.370233,1,59.400314,25.000000,28.423779,1.851852


In [10]:
from igraph import Graph
import pandas as pd
import random

def generate_subgraph(patient_zero=None, depth_limit=2, year_month='2023-06'):
    # Read the graph table
    file_name = f"financial_graph_{year_month}.csv"
    graph_table = pd.read_csv(file_name)
    
    # Create the full graph from the table
    full_graph = Graph.TupleList(edges=graph_table[['origin_company_id', 'destiny_company_id']].itertuples(index=False), directed=True)
    
    # Set the name attribute for vertices based on the company IDs
    full_graph.vs['name'] = [str(x) for x in graph_table['origin_company_id'].unique().tolist()]

    # Get a random patient_zero if not provided
    if patient_zero is None:
        patient_zero = random.choice(full_graph.vs)['name']

    # Get the vertex ID for patient zero
    patient_zero_vertex = full_graph.vs.find(name=str(patient_zero))

    # Perform BFS to get vertices within the depth limit
    subgraph_vertices = set()
    bfs_result = full_graph.bfsiter(patient_zero_vertex, advanced=True)
    for vertex, edge, depth in bfs_result:
        if depth > depth_limit:
            break
        subgraph_vertices.add(vertex.index)

    # Create the subgraph
    subgraph = full_graph.subgraph(subgraph_vertices)
    
    return subgraph

# Example usage with random patient_zero
subgraph = generate_subgraph()

TypeError: '>' not supported between instances of 'NoneType' and 'int'

In [30]:
import altair as alt
import numpy as np
import pandas as pd
from igraph import Graph

def generate_subgraph_by_value(percentile=80):
    # Read the graph table
    file_name = "financial_graph_2023-06.csv"  # Adjust the file name accordingly
    graph_table = pd.read_csv(file_name)
    
    # Determine the threshold based on percentile
    threshold = np.percentile(graph_table['transaction_volume'], percentile)

    # Filter the table based on the threshold
    filtered_table = graph_table[graph_table['transaction_volume'] >= threshold]

    # Create the subgraph from the filtered table
    subgraph = Graph.TupleList(edges=filtered_table[['origin_company_id', 'destiny_company_id']].itertuples(index=False), directed=True)
    
    # Plot the histogram and threshold line
    hist_data = pd.DataFrame({'Transaction Value': graph_table['transaction_volume']})
    hist_plot = alt.Chart(hist_data).mark_bar(color="#061727").encode(
        alt.X('Transaction Value:Q', bin=True),
        y='count()'
    )
    threshold_line = alt.Chart(pd.DataFrame({'x': [threshold]})).mark_rule(color='orange').encode(
        x='x:Q'
    )
    plot = (hist_plot + threshold_line).properties(
        title='Transaction Value Distribution with Threshold'
    )
    
    return subgraph, plot

# Example usage
subgraph, plot = generate_subgraph_by_value()

In [31]:
def generate_subgraph(patient_zero=None, depth_limit=2, year_month='2023-06'):
    # Read the graph table
    file_name = f"financial_graph_{year_month}.csv"
    graph_table = pd.read_csv(file_name)

    # Create the graph from the table
    full_graph = Graph.TupleList(edges=graph_table[['origin_company_id', 'destiny_company_id']].itertuples(index=False), directed=True)
    full_graph.vs['name'] = [str(x) for x in graph_table['origin_company_id'].append(graph_table['destiny_company_id']).unique().tolist()]

    # Print available vertices
    print("Vertices:", full_graph.vs['name'])

    # Get a random patient_zero if not provided
    if patient_zero is None:
        patient_zero = random.choice(full_graph.vs)['name']

    print("Chosen patient zero:", patient_zero)

    # Get the vertex ID for patient zero
    patient_zero_vertex = full_graph.vs.find(name=str(patient_zero))

    # Perform BFS to get vertices within the depth limit
    subgraph_vertices = set()
    queue = [(patient_zero_vertex, 0)] # Queue of (vertex, depth)
    
    while queue:
        vertex, depth = queue.pop(0)
        if depth > depth_limit:
            break
        subgraph_vertices.add(vertex.index)
        for neighbor in vertex.neighbors():
            queue.append((neighbor, depth + 1))

    # Create the subgraph
    subgraph = full_graph.subgraph(subgraph_vertices)
    
    return subgraph

# Example usage with random patient_zero
subgraph = generate_subgraph()

Vertices: ['1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1069', '1070']
Chosen patient zero: 1061


plot.show()

In [34]:
import glob
import os

def generate_all_samples(folder_path, percentile=80, depth_limit=2):
    # Iterate through all financial graph files in the folder
    for file_name in glob.glob(f"{folder_path}/financial_graph_*.csv"):
        # Extract the year and month from the file name
        year_month = file_name.split('_')[-1].split('.')[0]
        
        # Generate subgraph by value threshold
        subgraph_threshold, plot_threshold = generate_subgraph_by_value(percentile=percentile)
        # Save the subgraph with the appropriate name
        subgraph_threshold.save(f"{file_name[:-4]}_sample_threshold_{percentile}.graphml")
        plot_threshold.save(f"{file_name[:-4]}_sample_threshold_{percentile}.html")
        
        # Generate subgraph by patient zero
        subgraph_patient_zero = generate_subgraph(year_month=year_month, depth_limit=depth_limit)
        # Get the patient zero id used
        patient_zero_id = subgraph_patient_zero.vs['name'][0]
        # Save the subgraph with the appropriate name
        subgraph_patient_zero.save(f"{file_name[:-4]}_sample_patient_zero_{patient_zero_id}.graphml")

# Example usage
generate_all_samples(folder_path=os.getcwd())

Vertices: ['1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '1050', '1051', '1052', '1053', '1054', '1055', '1056', '1057', '1058', '1059', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1069', '1070']
Chosen patient zero: 1031
Vertices: ['1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1018', '1019', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', 