# Examples of using analysis functionalities

Using discovery_utils analyses functionalities for investments data

Here, we'll find companies using their categories, but you can also use search results from the process shown in cybersec_search.ipynb

In [1]:
from discovery_utils.utils import (
    analysis_crunchbase,
    analysis,
    charts
)

In [2]:
import pandas as pd
PROJECT_DIR = "/Users/william.woodward/Documents/discovery_mission_radar_prototyping"

# change to markup until poetry issue resolved:
# from src import PROJECT_DIR

In [None]:
from discovery_utils.getters import crunchbase
CB = crunchbase.CrunchbaseGetter()

In [None]:
# Categories for ahl
CB.find_similar_categories("obesity, diabetes, nutrition", category_type="narrow", n_results=10)

In [None]:
selected_df = CB.get_companies_in_categories(["AgTech","Agriculture","Diabetes","Dietary Supplements","Farmers Market","Food Delivery","Food Processing","Food Trucks","Food and Beverage","Grocery","Nutrition","Organic Food","Recipes","Restaurants","Snack Food"], category_type="narrow")
matching_ids = set(selected_df.id)    

Group categories into lists

In [None]:
# set category lists
lists_of_categories = {
    "biological": ["Agtech", "Agriculture"],
    "health_diabetes": ["Diabetes"],
    "health_nutrition": ["Dietary Supplements", "Nutrition"],
    "economic_retail": ["Organic Food", "Grocery", "Snack Food", "Farmers Market"],
    "economic - ooh": ["Food Delivery", "Restaurants"],
    "economic - food proc": ["Food Processing"],
    "Food & bev": ["Food and Beverage"],
    "Social": ["Recipes"]
}

# Create loop for each list group
for category_group, categories in lists_of_categories.items():
    # Get companies for the current category group
    selected_df = CB.get_companies_in_categories(categories, category_type="narrow")
    matching_ids = set(selected_df.id)

    # Check companies by querying ids
    matchings_orgs_df = CB.organisations_enriched.query("id in @matching_ids")
    matchings_orgs_df[['name', 'homepage_url', 'short_description']]

    # Get the funding rounds for the matching companies
    funding_rounds_df = (
    CB.select_funding_rounds(org_ids=matching_ids, funding_round_types=["angel", "pre_seed", "seed", "series_a", "series_b"])
    )

    # organise investors by each funding round
    investors_df = (
    CB.funding_rounds_enriched
    .query("funding_round_id in @funding_rounds_df.funding_round_id")
    .groupby("funding_round_id")
    .agg(investor_name=("investor_name", list))
    .reset_index()
    )

    funding_rounds_df = (
    funding_rounds_df
    .drop(columns=["investor_name"])
    .merge(investors_df, on="funding_round_id", how="left")
    )

    len(funding_rounds_df)

    # save funding rounds as csv
    funding_rounds_df.to_csv(f"{PROJECT_DIR}/data/2025_01_MS_ahl/{category_group}_funding_rounds.csv", index=False)

    # generate basic time series
    ts_df = analysis_crunchbase.get_timeseries(matchings_orgs_df, funding_rounds_df, period='year', min_year=2014, max_year=2024)
    ts_df

    # create raised amount bar chart
    fig = charts.ts_bar(
        ts_df,
        variable='raised_amount_gbp_total',
        variable_title="Raised amount, £ millions",
        category_column="_category",
    )
    charts.configure_plots(fig, chart_title=f"Funding raised over time for {category_group}")
    
    # Save the chart with a unique filename
    chart_filename = f"{PROJECT_DIR}/charts/{category_group}_raised_amount.png"
    fig.save(chart_filename)

    # Let's look into breakdown of deal types
    deals_df, deal_counts_df = analysis_crunchbase.get_funding_by_year_and_range(funding_rounds_df, 2014, 2024)
    aggregated_funding_types_df = analysis_crunchbase.aggregate_by_funding_round_types(funding_rounds_df)

    # now let's chart by deal types
    investment_types_fig = analysis_crunchbase.chart_investment_types(aggregated_funding_types_df)
    investment_types_chart_filename = f"{PROJECT_DIR}/charts/{category_group}_investment_types.png"
    investment_types_fig.save(investment_types_chart_filename)

    # Chart by deal counts and save the chart
    investment_types_counts_fig = analysis_crunchbase.chart_investment_types_counts(aggregated_funding_types_df)
    investment_types_counts_chart_filename = f"{PROJECT_DIR}/charts/{category_group}_investment_types_counts.png"
    investment_types_counts_fig.save(investment_types_counts_chart_filename)

    # Chart deal sizes and save the chart
    deal_sizes_fig = analysis_crunchbase.chart_deal_sizes(deals_df)
    deal_sizes_chart_filename = f"{PROJECT_DIR}/charts/{category_group}_deal_sizes.png"
    deal_sizes_fig.save(deal_sizes_chart_filename)
    
    # Chart deal sizes counts and save the chart
    deal_sizes_counts_fig = analysis_crunchbase.chart_deal_sizes_counts(deal_counts_df)
    deal_sizes_counts_chart_filename = f"{PROJECT_DIR}/charts/{category_group}_deal_sizes_counts.png"
    deal_sizes_counts_fig.save(deal_sizes_counts_chart_filename)
    

In [None]:
aggregated_funding_types_df

In [None]:
deals_df

In [None]:
deal_counts_df

In [None]:
analysis_crunchbase.chart_investment_types(aggregated_funding_types_df)

In [None]:
analysis_crunchbase.chart_investment_types_counts(aggregated_funding_types_df)

In [None]:
analysis_crunchbase.chart_deal_sizes(deals_df)

In [None]:
analysis_crunchbase.chart_deal_sizes_counts(deal_counts_df)