In [4]:
import numpy as np
import pandas as pd

In [6]:

np.random.seed(42)
n = 10
data = {
    "company_name": [f"Company_{i}" for i in range(1, n+1)],
    "arr_million": np.round(np.random.uniform(1, 10, n), 2),
    "growth_rate_pct": np.round(np.random.uniform(10, 80, n), 1),
    "burn_rate_pct": np.round(np.random.uniform(20, 90, n), 1),
    "runway_months": np.random.randint(2, 18, n),
    "churn_rate_pct": np.round(np.random.uniform(2, 15, n), 1),
}


df = pd.DataFrame(data)
df["cash_flow_status"] = np.where(df["burn_rate_pct"] < 50, "Positive", "Negative")
df["acquisition_candidate"] = np.where(
    (df["growth_rate_pct"] > 30) &
    (df["burn_rate_pct"] < 50) &
    (df["runway_months"] > 6) &
    (df["churn_rate_pct"] < 10),
    "Yes", "No"
)

df.to_csv("simulated_saas_data.csv", index=False)
df.head()


Unnamed: 0,company_name,arr_million,growth_rate_pct,burn_rate_pct,runway_months,churn_rate_pct,cash_flow_status,acquisition_candidate
0,Company_1,4.37,11.4,62.8,4,12.5,Negative,No
1,Company_2,9.56,77.9,29.8,8,6.0,Positive,Yes
2,Company_3,7.59,68.3,40.5,6,3.3,Positive,No
3,Company_4,6.39,24.9,45.6,10,10.9,Positive,No
4,Company_5,2.4,22.7,51.9,8,7.7,Negative,No


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Simulate a dataset similar to company leads
np.random.seed(42)
n = 100
data = {
    'CompanyName': [f'Company_{i}' for i in range(n)],
    'Revenue': np.random.uniform(1_000_000, 10_000_000, n).round(2),
    'EmailScore': np.random.randint(50, 100, n),
    'DomainAge': np.random.randint(1, 15, n),  # in years
    'FounderLinkedIn': np.random.choice([1, 0], n, p=[0.85, 0.15]),  # 1 = present, 0 = not
    'RegistryVerified': np.random.choice([1, 0], n, p=[0.75, 0.25]),
    'FundingHistory': np.random.choice([1, 0], n, p=[0.4, 0.6])  # 1 = has history, 0 = none
}

df = pd.DataFrame(data)

# Simulate a "confidence score" using weighted logic (mock ML)
df['confidence_score'] = (
    df['Revenue'] / 1_000_000 * 0.2 +  # 20%
    df['EmailScore'] * 0.2 +           # 20%
    df['DomainAge'] * 0.15 +           # 15%
    df['FounderLinkedIn'] * 25 +       # 25%
    df['RegistryVerified'] * 10 +      # 10%
    df['FundingHistory'] * 10          # 10%
)

# Classify as "Deal Ready" if score above a threshold
df['deal_ready'] = df['confidence_score'] > 60

df.head()


# df.to_csv('simulated_deals.csv')
# df.head()

Unnamed: 0,CompanyName,Revenue,EmailScore,DomainAge,FounderLinkedIn,RegistryVerified,FundingHistory,confidence_score,deal_ready
0,Company_0,4370861.07,81,3,1,1,0,52.524172,False
1,Company_1,9556428.76,88,10,1,1,0,56.011286,False
2,Company_2,7587945.48,98,3,1,1,0,56.567589,False
3,Company_3,6387926.36,81,3,1,1,0,52.927585,False
4,Company_4,2404167.76,53,4,0,1,0,21.680834,False


In [None]:
import pandas as pd
import streamlit as st
# Load the simulated CSV
df = pd.read_csv('simulated_deals.csv')

# Create the network
G = nx.Graph()

# Simulated sample partners/investors/customers
investors = ['Accel', 'Sequoia', 'SoftBank', 'Andreessen Horowitz']
partners = ['Salesforce', 'Google Cloud', 'AWS', 'Stripe']
customers = ['Meta', 'Netflix', 'Airbnb', 'Uber']

# Loop through companies and assign fake connections
for index, row in df.iterrows():
    company = row['CompanyName']
    score = row['confidence_score']

    G.add_node(company, title=f"{company}<br>Score: {score}", group='company')

    # Add 1 investor, 1 partner, 1 customer for each company
    inv = investors[index % len(investors)]
    part = partners[index % len(partners)]
    cust = customers[index % len(customers)]

    G.add_node(inv, group='investor')
    G.add_node(part, group='partner')
    G.add_node(cust, group='customer')

    G.add_edge(company, inv)
    G.add_edge(company, part)
    G.add_edge(company, cust)

# Create PyVis network
# net = Network(height='600px', width='100%', notebook=False)
# net.from_nx(G)

# # Save and render
# net.save_graph('network.html')

# Streamlit display
st.title("Company Network Intelligence Map")
# st.components.v1.html(open('network.html', 'r', encoding='utf-8').read(), height=650)


ModuleNotFoundError: No module named 'networkx'

In [3]:
import pandas as pd
import plotly.graph_objects as go
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import numpy as np

# Load the uploaded CSV
df = pd.read_csv("simulated_deals.csv")

# Basic cleanup: drop NA and select numerical features for similarity
features = ['Revenue', 'EmailScore', 'domain_age_years', 'num_funding_rounds', 'confidence_score']
df_clean = df.dropna(subset=features).copy()

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df_clean[features])

# Calculate similarity matrix
similarity_matrix = cosine_similarity(X_scaled)

# Apply clustering for node color grouping
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(X_scaled)
df_clean["cluster"] = clusters

# Create edge list from similarity matrix (threshold-based)
edges = []
threshold = 0.90  # high similarity
for i in range(similarity_matrix.shape[0]):
    for j in range(i + 1, similarity_matrix.shape[1]):
        if similarity_matrix[i, j] > threshold:
            edges.append((i, j, similarity_matrix[i, j]))

# Generate node positions using circular layout
theta = np.linspace(0, 2 * np.pi, len(df_clean))
x_pos = np.cos(theta)
y_pos = np.sin(theta)

# Plotly network graph
edge_trace = go.Scatter(
    x=[],
    y=[],
    line=dict(width=1, color='gray'),
    hoverinfo='none',
    mode='lines'
)

for edge in edges:
    x0, y0 = x_pos[edge[0]], y_pos[edge[0]]
    x1, y1 = x_pos[edge[1]], y_pos[edge[1]]
    edge_trace['x'] += (x0, x1, None)
    edge_trace['y'] += (y0, y1, None)

# Node trace
node_trace = go.Scatter(
    x=x_pos,
    y=y_pos,
    mode='markers+text',
    text=df_clean['CompanyName'],
    textposition='bottom center',
    marker=dict(
        showscale=True,
        colorscale='Viridis',
        size=15,
        color=df_clean['cluster'],
        colorbar=dict(
            thickness=15,
            title='Cluster',
            xanchor='left'
            # titleside='right'
        ),
        line_width=2
    ),
    hovertext=[
        f"{row['CompanyName']}<br>Revenue: {row['Revenue']}<br>Confidence: {row['confidence_score']}" 
        for _, row in df_clean.iterrows()
    ],
    hoverinfo='text'
)

# Final figure
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    title='<br>Influence Network of Investment-Ready Leads',
                    # titlefont_size=20,
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=20, l=5, r=5, t=40),
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
                ))

fig.show()
