# Examples of using analysis functionalities

Using discovery_utils analyses functionalities for investments data

Here, we'll find companies using their categories, but you can also use search results from the process shown in cybersec_search.ipynb

In [None]:
from discovery_utils.utils import (
    analysis_crunchbase,
    analysis,
    charts
)

In [None]:
import pandas as pd
from src import PROJECT_DIR

In [None]:
from discovery_utils.getters import crunchbase
CB = crunchbase.CrunchbaseGetter()

In [None]:
included = ['y', 'Yes - CR', 'Yes - CR ', 'y-CR', 'Maybe - CR', 'maybe', 'Maybe']

reviewed_data_df = (
    pd.read_csv(PROJECT_DIR / "data/2024_12_MS/Cybersecurity - Mission studio 2012-12-16 - crunchbase.csv")
    .rename(columns={"RELEVANT?": "relevant"})
    .query("relevant in @included")
)
matching_ids = reviewed_data_df.id.to_list()

In [None]:
len(matching_ids)

You can check these companies by querying the ids

In [None]:
matchings_orgs_df = CB.organisations_enriched.query("id in @matching_ids")
matchings_orgs_df[['name', 'homepage_url', 'short_description']]

Now get the funding rounds for the matching companies - you can specify what type of funding rounds you need

In [None]:
# Check what type of funding rounds there are
CB.unique_funding_round_types

In [None]:
funding_rounds_df = (
    CB.select_funding_rounds(org_ids=matching_ids, funding_round_types=["angel", "pre_seed", "seed", "series_a"])
)

In [None]:
investors_df = (
    CB.funding_rounds_enriched
    .query("funding_round_id in @funding_rounds_df.funding_round_id")
    .groupby("funding_round_id")
    .agg(investor_name=("investor_name", list))
    .reset_index()
)

funding_rounds_df = (
    funding_rounds_df
    .drop(columns=["investor_name"])
    .merge(investors_df, on="funding_round_id", how="left")
)

In [None]:
len(funding_rounds_df)

In [None]:
funding_rounds_df.to_csv(PROJECT_DIR / "data/2024_12_MS/crunchbase_funding_rounds.csv", index=False)

Now let's generate some basic time series

In [None]:
ts_df = analysis_crunchbase.get_timeseries(matchings_orgs_df, funding_rounds_df, period='year', min_year=2014, max_year=2024)
ts_df

In [None]:
fig = charts.ts_bar(
    ts_df,
    variable='raised_amount_gbp_total',
    variable_title="Raised amount, £ millions",
    category_column="_category",
)
charts.configure_plots(fig, chart_title="")

Let's look into breakdown of deal types

In [None]:
deals_df, deal_counts_df = analysis_crunchbase.get_funding_by_year_and_range(funding_rounds_df, 2014, 2024)
aggregated_funding_types_df = analysis_crunchbase.aggregate_by_funding_round_types(funding_rounds_df)

In [None]:
aggregated_funding_types_df

In [None]:
deals_df

In [None]:
deal_counts_df

In [None]:
analysis_crunchbase.chart_investment_types(aggregated_funding_types_df)

In [None]:
analysis_crunchbase.chart_investment_types_counts(aggregated_funding_types_df)

In [None]:
analysis_crunchbase.chart_deal_sizes(deals_df)

In [None]:
analysis_crunchbase.chart_deal_sizes_counts(deal_counts_df)