In [10]:
import pandas as pd
import random
from datetime import timedelta, datetime

def generate_transactions(num_lines, start_date, end_date, company_id_range=(1000, 9999), num_preferred_partners=5, save=False, file_name='transactions.csv'):
    # Convert start and end dates to datetime objects
    start_date = datetime.strptime(start_date, '%Y-%m')
    end_date = datetime.strptime(end_date, '%Y-%m')
    
    # Create empty DataFrame with desired columns
    columns = ['year-month', 'origin_company_id', 'destiny_company_id', 'transaction_quantity', 'transaction_value']
    transactions = pd.DataFrame(columns=columns)

    # Dictionary to store preferred partners for companies
    preferred_partners = {}

    # Generate transactions
    for i in range(num_lines):
        # Generate random date within the range
        random_date = start_date + (end_date - start_date) * random.random()
        year_month = random_date.strftime('%Y-%m')

        # Generate random origin company ID
        origin_company_id = random.randint(*company_id_range)

        # Select destiny company ID based on preferential attachment
        if origin_company_id in preferred_partners and random.random() < 0.7: # 70% chance to select a preferred partner
            destiny_company_id = random.choice(preferred_partners[origin_company_id])
        else:
            destiny_company_id = random.randint(*company_id_range)
            # Update preferred partners
            preferred_partners.setdefault(origin_company_id, []).append(destiny_company_id)
            if len(preferred_partners[origin_company_id]) > num_preferred_partners:
                preferred_partners[origin_company_id].pop(0)

        # Generate transaction quantity and value, with some correlation
        transaction_quantity = random.randint(1, 10)
        transaction_value = transaction_quantity * random.uniform(10, 1000)

        # Append transaction to DataFrame
        transactions.loc[i] = [year_month, origin_company_id, destiny_company_id, transaction_quantity, transaction_value]

    # Optionally save to CSV
    if save:
        transactions.to_csv(file_name, index=False)
    
    return transactions

# Example usage
transactions = generate_transactions(20000, '2020-01', '2023-08', company_id_range=(1000, 1070), save=True)
print(transactions.head())

  year-month origin_company_id destiny_company_id transaction_quantity  \
0    2023-02              1001               1007                    6   
1    2022-07              1017               1015                    6   
2    2023-01              1047               1001                    2   
3    2020-08              1036               1042                   10   
4    2022-01              1042               1033                    7   

   transaction_value  
0        3121.341734  
1        2679.264604  
2        1589.671461  
3        9183.653171  
4        4622.657462  


In [11]:
import pandas as pd

def generate_financial_graphs(transactions, save=False, file_name_prefix='financial_graph'):
    # List to store financial graphs for each month
    monthly_graphs = []

    # Extract unique year-months from the transactions
    unique_months = transactions['year-month'].unique()

    for year_month in unique_months:
        # Filter transactions for the current month
        transactions_month = transactions[transactions['year-month'] == year_month]

        # Origin (Supplier) summary
        origin_summary = transactions_month.groupby('origin_company_id').agg(
            total_value_sent=('transaction_value', 'sum'),
            total_quantity_sent=('transaction_quantity', 'sum'),
            num_transactions_sent=('transaction_value', 'count')
        ).reset_index()

        # Destiny (Client) summary
        destiny_summary = transactions_month.groupby('destiny_company_id').agg(
            total_value_received=('transaction_value', 'sum'),
            total_quantity_received=('transaction_quantity', 'sum')
        ).reset_index()

        # Merging the transactions with the summaries to compute the percentages
        merged = pd.merge(transactions_month, origin_summary, on='origin_company_id', how='left')
        merged = pd.merge(merged, destiny_summary, on='destiny_company_id', how='left')

        # Calculating the required percentages
        merged['suppliersview_percent_value_sent'] = (merged['transaction_value'] / merged['total_value_sent']) * 100
        merged['suppliersview_percent_quantity_sent'] = (1 / merged['num_transactions_sent']) * 100
        merged['clientview_percent_value_received'] = (merged['transaction_value'] / merged['total_value_received']) * 100
        merged['clientview_percent_quantity_received'] = (1 / merged['total_quantity_received']) * 100

        # Aggregating by origin and destiny company IDs
        financial_graph = merged.groupby(['origin_company_id', 'destiny_company_id']).agg(
            transaction_volume=('transaction_value', 'sum'),
            num_transactions=('transaction_value', 'count'),
            suppliersview_percent_value_sent=('suppliersview_percent_value_sent', 'mean'),
            suppliersview_percent_quantity_sent=('suppliersview_percent_quantity_sent', 'mean'),
            clientview_percent_value_received=('clientview_percent_value_received', 'mean'),
            clientview_percent_quantity_received=('clientview_percent_quantity_received', 'mean')
        ).reset_index()

        # Optionally save to CSV for the current month
        if save:
            file_name = f"{file_name_prefix}_{year_month}.csv"
            financial_graph.to_csv(file_name, index=False)

        # Append the financial graph for the current month to the list
        monthly_graphs.append(financial_graph)

    return monthly_graphs

# Example usage
financial_graphs = generate_financial_graphs(transactions, save=True)
# The first financial graph (for the first month)
print(financial_graphs[0].head())

   origin_company_id  destiny_company_id  transaction_volume  \
0               1000                1039         2450.246546   
1               1000                1042         4286.607294   
2               1000                1054         3320.425369   
3               1000                1063         1666.681774   
4               1000                1065         8186.205779   

   num_transactions  suppliersview_percent_value_sent  \
0                 1                         10.419723   
1                 1                         18.228884   
2                 1                         14.120176   
3                 1                          7.087598   
4                 1                         34.812005   

   suppliersview_percent_quantity_sent  clientview_percent_value_received  \
0                            16.666667                           7.588839   
1                            16.666667                          36.931907   
2                            16.666667   

In [15]:
import pandas as pd
import igraph as ig
import altair as alt
import numpy as np
from scipy.stats import norm

def analyze_graph(file_name):
    # Read the financial graph from the CSV file
    financial_graph = pd.read_csv(file_name)

    # Create a directed graph using igraph
    g = ig.Graph.TupleList(financial_graph.itertuples(index=False), directed=True, edge_attrs=['transaction_volume'])
    
    # 1. Descriptive Analysis
    degrees_in = g.indegree()
    degrees_out = g.outdegree()

    # Plotting Degree Distributions
    degree_df = pd.DataFrame({
        'Degree': degrees_in + degrees_out,
        'Type': ['In'] * len(degrees_in) + ['Out'] * len(degrees_out)
    })
    alt.Chart(degree_df).mark_bar().encode(
        x='Degree:Q',
        y='count()',
        color='Type:N'
    ).show()

    # 2. Plot histogram of the transaction volumes (suppliers view)
    transaction_volumes = financial_graph['transaction_volume']
    hist_df = pd.DataFrame({'Transaction Volume': transaction_volumes})
    alt.Chart(hist_df).mark_bar().encode(
        alt.X('Transaction Volume:Q', bin=True),
        y='count()'
    ).show()

    # 3. Fit a normal distribution to the transaction volumes
    mu, std = norm.fit(transaction_volumes)
    threshold = mu - std
    x = np.linspace(transaction_volumes.min(), transaction_volumes.max(), 100)
    pdf_df = pd.DataFrame({
        'x': x,
        'pdf': norm.pdf(x, mu, std)
    })
    alt.Chart(pdf_df).mark_line().encode(
        x='x',
        y='pdf'
    ).show()

    # Plot threshold
    threshold_df = pd.DataFrame({'x': [threshold], 'y': [0]})
    alt.Chart(threshold_df).mark_rule(color='red').encode(
        x='x:Q'
    ).show()

    # 4. Remove edges below the threshold
    g.delete_edges(g.es.select(transaction_volume_lt=threshold))

    # 5. Centrality Measures
    betweenness = g.betweenness()
    closeness = g.closeness()
    
    # Plotting Betweenness and Closeness Centrality
    centrality_df = pd.DataFrame({
        'Betweenness': betweenness,
        'Closeness': closeness
    })
    alt.Chart(centrality_df).mark_circle().encode(
        alt.X('Betweenness:Q'),
        alt.Y('Closeness:Q'),
    ).show()

    # Additional analysis can be added as needed
    # ...

# Example file name
file_name = 'financial_graph_2023-01.csv'
analyze_graph(file_name)

Displaying chart at http://localhost:60463/
Displaying chart at http://localhost:60463/
Displaying chart at http://localhost:60463/
Displaying chart at http://localhost:60463/
Displaying chart at http://localhost:60463/


In [14]:
from scipy.stats import norm

def plot_histogram_with_threshold(transaction_volumes):
    # Fit a normal distribution to the transaction volumes
    mu, std = norm.fit(transaction_volumes)
    threshold = mu - std

    # Create histogram data
    hist_data = pd.DataFrame({'Transaction Volume': transaction_volumes})
    hist_plot = alt.Chart(hist_data).mark_bar(color='blue').encode(
        alt.X('Transaction Volume:Q', bin=True),
        y='count()'
    )

    # Create PDF data
    x = np.linspace(transaction_volumes.min(), transaction_volumes.max(), 100)
    pdf_data = pd.DataFrame({
        'x': x,
        'pdf': norm.pdf(x, mu, std)
    })
    pdf_plot = alt.Chart(pdf_data).mark_line(color='blue').encode(
        x='x',
        y=alt.Y('pdf', scale=alt.Scale(domain=(0, hist_data['Transaction Volume'].count())))
    )

    # Create threshold line data
    threshold_data = pd.DataFrame({'x': [threshold], 'y': [0]})
    threshold_plot = alt.Chart(threshold_data).mark_rule(color='orange').encode(
        x='x:Q'
    )

    # Combine all plots
    final_plot = (hist_plot + pdf_plot + threshold_plot).properties(
        title='Transaction Volume Distribution with Threshold'
    )
    
    return final_plot

# Sample transaction volumes
transaction_volumes = financial_graph['transaction_volume']

# Plot
plot = plot_histogram_with_threshold(transaction_volumes)
plot.show()

NameError: name 'financial_graph' is not defined

In [None]:
Please, enunciate and explain deeply what would be relevant/realistic/useful treatments of the monthly slice graph data to simplify its structure (we want to generate a reducted, tractable graph, but without oversoimplifing). Discuss the cut based on the transactions (its assumptions, flwas, etc). Describe another methods to pursue this need in out problem (too huge in real life) and that needs to be tractable (keep in mind our viralization with patient zero in mind). Describe the methods, discuss them deeply and provide python code (complemenetary to last one) to apply it