### Importing the libraries 

In [1]:
import seaborn as sns
import matplotlib.pyplot as plt 
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

### Importing data

In [2]:
df = pd.read_csv('../../data/cleaned_investments_VC.csv', encoding='ISO-8859-1')
df.head()


Descriptive Statistics for Numerical Columns:
       funding_rounds          seed       venture  equity_crowdfunding  \
count    48124.000000  4.812400e+04  4.812400e+04         4.812400e+04   
mean         1.700586  2.191042e+05  7.598091e+06         6.255879e+03   
std          1.298296  1.068580e+06  2.877746e+07         2.024335e+05   
min          1.000000  0.000000e+00  0.000000e+00         0.000000e+00   
25%          1.000000  0.000000e+00  0.000000e+00         0.000000e+00   
50%          1.000000  0.000000e+00  0.000000e+00         0.000000e+00   
75%          2.000000  2.500000e+04  5.000000e+06         0.000000e+00   
max         18.000000  1.300000e+08  2.351000e+09         2.500000e+07   

        undisclosed  convertible_note  debt_financing         angel  \
count  4.812400e+04      4.812400e+04    4.812400e+04  4.812400e+04   
mean   1.273428e+05      2.353346e+04    1.909246e+06  6.605675e+04   
std    2.971224e+06      1.450830e+06    1.400643e+08  6.645587e+05   
mi

### Plotting country code disribution

In [3]:
# Import plotly.graph_objects instead of nbformat
import plotly.graph_objects as go

country_barchart = go.Figure(
    data=[go.Bar(x=df['country_code'], y=df['country_code'].value_counts().values)],
    layout_title_text="Country Code Distribution"
)

# # Display the chart directly using .show() method
country_barchart.show()


In [4]:
df.columns

Index(['status', 'country_code', 'region', 'funding_rounds',
       'first_funding_at', 'last_funding_at', 'seed', 'venture',
       'equity_crowdfunding', 'undisclosed', 'convertible_note',
       'debt_financing', 'angel', 'grant', 'private_equity', 'post_ipo_equity',
       'post_ipo_debt', 'secondary_market', 'product_crowdfunding', 'round_A',
       'round_B', 'round_C', 'round_D', 'round_E', 'round_F', 'round_G',
       'round_H'],
      dtype='object')

### Distribution of Funding Rounds

In [5]:
fig1 = px.histogram(df, x='funding_rounds', title='Distribution of Funding Rounds', nbins=20, color='status')
fig1.show()

### Selecting Funding columns

In [6]:
funding_columns = ['seed', 'venture', 'equity_crowdfunding', 'undisclosed', 'convertible_note', 'debt_financing', 'angel', 'grant', 'private_equity', 'post_ipo_equity', 'post_ipo_debt', 'secondary_market', 'product_crowdfunding']
funding_sums = df[funding_columns].sum().reset_index()
funding_sums.columns = ['Funding Type', 'Amount']

### Funding Type Distribution

In [7]:
fig2 = px.histogram(funding_sums,x="Funding Type",y='Amount', title='Funding Type Distribution')
fig2.show()


In [8]:
fig3 = px.line(df, x='first_funding_at', y='funding_rounds', color='status', title='Funding Rounds Over Time')
fig3.show()