In [108]:
import pandas as pd
import numpy as np
import mercury as mr
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go


In [109]:
app = mr.App(title="Startu investments", description="This notebook is designed to help investors analyze and evaluate startup investment opportunities")

## Startup Investments

Welcome to the Startup Investments Analysis Notebook! This notebook is designed to help investors analyze and evaluate startup investment opportunities.

Startup investing can be an exciting but challenging field, with a wide range of factors to consider when deciding whether to invest in a particular company. This notebook aims to provide a structured approach to analyzing startups, taking into account both quantitative and qualitative factors.

Throughout this notebook, we will cover various aspects of startup analysis, including financial metrics, market size and trends, competitive landscape, team quality, and more. We will also provide guidance on how to assess the potential risks and rewards of startup investing, as well as tips for conducting due diligence and making informed investment decisions.

Whether you are an experienced startup investor or just starting out in this field, this notebook will provide you with a valuable framework for evaluating startup opportunities and making informed investment decisions. So let's dive in and start analyzing some startups! (generated with ChatGPT)

Data set source <a href="https://www.kaggle.com/datasets/arindam235/startup-investments-crunchbase" target="_blank"> from Kaggle Datasets</a>.


In [49]:
df = pd.read_csv('./investments_VC.csv', encoding= 'unicode_escape')

In [50]:
df = df.rename({' market ': 'market'}, axis='columns')
df = df.drop_duplicates()
df = df.drop(['permalink', 'homepage_url'], axis=1)
df.dropna(how='any', subset=['name'], axis=0, inplace=True)
df['first_funding_at'] = df.first_funding_at.str.split("-").str[0]
df['first_funding_at'] = df['first_funding_at'].astype(int)
df['last_funding_at'] = df.last_funding_at.str.split("-").str[0]
df['last_funding_at'] = df['last_funding_at'].astype(int)
df[' funding_total_usd '] = df[' funding_total_usd '].str.strip().str.replace(",","")
df[' funding_total_usd '] = df[' funding_total_usd '].replace("-",0).astype("float")
df['status'] = df['status'].replace(np.nan,"unknown")
df.drop(df[df['first_funding_at']<1902].index, inplace=True)

In [55]:
top_markets = list(df['market'].value_counts()[:10].index)

In [57]:
year = mr.Range(value=[1980, 2014], label="Select year range", min=1980, max=2014)

mercury.Range

In [58]:
market = mr.MultiSelect(value=top_markets, choices=top_markets, label="Select market")

mercury.MultiSelect

In [60]:
df = df[(df.founded_year >= year.value[0]) & (df.founded_year <= year.value[1])]

In [63]:
df = df[df['market'].isin(market.value)]

## Top companies

In [91]:

df.sort_values(by=[' funding_total_usd '], ascending=False)[
    ['name', 'market', 'status', 'country_code', 'founded_year', ' funding_total_usd ']
].head(10)

Unnamed: 0,name,market,status,country_code,founded_year,funding_total_usd
45815,Verizon Communications,Mobile,operating,USA,1983.0,30079500000.0
7328,Carestream,Biotechnology,operating,USA,2007.0,2400000000.0
48936,Zebra Technologies,Enterprise Software,operating,USA,1991.0,2000000000.0
29882,O3b Networks,Enterprise Software,operating,NLD,2007.0,1371634000.0
7107,Cape Wind,Clean Technology,operating,USA,2001.0,1200000000.0
44673,Twitter,Software,operating,USA,2006.0,1160167000.0
1868,Alibaba,E-Commerce,operating,CHN,1999.0,1112000000.0
39127,Snapdeal,E-Commerce,operating,IND,2010.0,1062700000.0
5654,Bloom Energy,Clean Technology,operating,USA,2001.0,980000000.0
24406,LivingSocial,E-Commerce,operating,USA,2007.0,934725000.0


In [107]:
markets_count = df['market'].value_counts()
fig = px.bar(markets_count, title='Top Markets')
fig.update(layout_showlegend=False)
fig.show()

In [102]:
fig = px.histogram(df['status'], title='Startups Status')
fig.update_xaxes(categoryorder='total ascending')
fig.update(layout_showlegend=False)
fig.show()

In [96]:
fig = px.histogram(df, x='founded_year', labels={'founded_year': 'Year'}, title='Startups founded per Year')
fig.show()

In [11]:
countries = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
# countries = countries.drop('GDP (BILLIONS)', 1)
countries = countries.rename(columns={'CODE': 'country_code'})

In [12]:
df=pd.merge(df, countries, on='country_code')

In [95]:
fig_country_map = go.Figure()
fig_country_map.add_trace(go.Choropleth(locations=df['country_code'].value_counts().index,
                                       z=df['country_code'].value_counts(),
                                       colorscale='Blues', 
                                       colorbar_title="Startups founded"))

fig_country_map.update_traces(marker_line_width=0.2)

fig_country_map.update_layout(
    title_text="Number of Startups per Country", title_x=0.5, title_font_size=20,
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
)

fig_country_map.show()