# M13 VC Data Project

This project simulates a venture capital analysis using mock startup funding data. The goal is to extract insights across sectors, rounds, and investor behaviors using SQL and Python.

In [None]:
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt


df = pd.read_csv('mock_startup_funding_data.csv')
df.head()

In [None]:
conn = sqlite3.connect(':memory:')
df.to_sql('funding', conn, index=False, if_exists='replace')

## Total Seed Funding by Sector

In [None]:
query1 = """
SELECT sector, ROUND(SUM(amount_million), 2) as total_seed_funding
FROM funding
WHERE round_type = 'Seed'
GROUP BY sector
ORDER BY total_seed_funding DESC;
"""
print(pd.read_sql(query1, conn))

In [None]:
seed_df = df[df['round_type'] == 'Seed']
plot_df = seed_df.groupby('sector')['amount_million'].sum().sort_values()
plot_df.plot(kind='barh', figsize=(10, 6), title='Total Seed Funding by Sector')
plt.xlabel('Total Funding ($M)')
plt.ylabel('Sector')
plt.tight_layout()
plt.show()

## Funding by Round Type per Investor

In [None]:
query2 = """
SELECT investor, round_type, ROUND(SUM(amount_million), 2) AS total_invested
FROM funding
GROUP BY investor, round_type
ORDER BY investor, total_invested DESC;
"""
result2 = pd.read_sql(query2, conn)
result2.pivot(index='investor', columns='round_type', values='total_invested').fillna(0).plot(kind='bar', figsize=(10,6), title='Total Funding by Round Type per Investor')
plt.ylabel('Total Invested ($M)')
plt.xlabel('Investor')
plt.tight_layout()
plt.show()

## Funding by Sector per Investor

In [None]:
query3 = """
SELECT investor, sector, ROUND(SUM(amount_million), 2) AS total_invested
FROM funding
GROUP BY investor, sector
ORDER BY investor, total_invested DESC;
"""
result3 = pd.read_sql(query3, conn)
result3.pivot(index='investor', columns='sector', values='total_invested').fillna(0).plot(kind='bar', figsize=(12,6), title='Total Funding by Sector per Investor')
plt.ylabel('Total Invested ($M)')
plt.xlabel('Investor')
plt.tight_layout()
plt.show()

## Round Type Breakdown by Sector

In [None]:
query4 = """
SELECT sector, round_type, ROUND(SUM(amount_million), 2) AS total_raised
FROM funding
GROUP BY sector, round_type
ORDER BY sector, total_raised DESC;
"""
result4 = pd.read_sql(query4, conn)
result4.pivot(index='sector', columns='round_type', values='total_raised').fillna(0).plot(kind='bar', figsize=(12,6), title='Total Raised by Round Type per Sector')
plt.ylabel('Total Raised ($M)')
plt.xlabel('Sector')
plt.tight_layout()
plt.show()