<a href="https://colab.research.google.com/github/shrutipatil-25/python_basic/blob/main/unicorn_companies_with_numpy_and_panda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# STEP 1: Upload the CSV file
from google.colab import files
uploaded = files.upload()



Saving Unicorn_Companies.csv to Unicorn_Companies.csv


In [2]:
# STEP 2: Import necessary libraries
import pandas as pd
import numpy as np


In [5]:
# STEP 3: Load the uploaded CSV file
file_name = list(uploaded.keys())[0]
df = pd.read_csv(file_name)

# Show the first few rows
df.head()


Unnamed: 0,Company,Valuation ($B),Date Joined,Country,City,Industry,Select Inverstors,Founded Year,Total Raised,Financial Stage,Investors Count,Deal Terms,Portfolio Exits
0,Bytedance,$140,4/7/2017,China,Beijing,Artificial intelligence,"Sequoia Capital China, SIG Asia Investments, S...",2012.0,$7.44B,IPO,28.0,8.0,5.0
1,SpaceX,$100.30,12/1/2012,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen...",2002.0,$6.874B,,29.0,12.0,
2,Stripe,$95,1/23/2014,United States,San Francisco,Fintech,"Khosla Ventures, LowercaseCapital, capitalG",2010.0,$2.901B,Asset,39.0,12.0,1.0
3,Klarna,$45.60,12/12/2011,Sweden,Stockholm,Fintech,"Institutional Venture Partners, Sequoia Capita...",2005.0,$3.472B,Acquired,56.0,13.0,1.0
4,Epic Games,$42,10/26/2018,United States,Cary,Other,"Tencent Holdings, KKR, Smash Ventures",1991.0,$4.377B,Acquired,25.0,5.0,2.0


In [6]:
# STEP 4: Data Cleaning and Preprocessing

# Convert 'Valuation ($B)' to float by removing $ and B
df['Valuation ($B)'] = df['Valuation ($B)'].replace('[\$,B]', '', regex=True).astype(float)

# Convert 'Date Joined' to datetime
df['Date Joined'] = pd.to_datetime(df['Date Joined'], errors='coerce')

# Convert 'Founded Year' to numeric
df['Founded Year'] = pd.to_numeric(df['Founded Year'], errors='coerce')

# Convert 'Portfolio Exits' to numeric
df['Portfolio Exits'] = pd.to_numeric(df['Portfolio Exits'], errors='coerce')

# Check for missing values
print("Missing values per column:\n")
print(df.isna().sum())


Missing values per column:

Company                0
Valuation ($B)         0
Date Joined            0
Country                0
City                   0
Industry               0
Select Inverstors     17
Founded Year          43
Total Raised          24
Financial Stage      988
Investors Count        1
Deal Terms            29
Portfolio Exits      988
dtype: int64


In [7]:
# STEP 5: Answering Questions

# 1. Highest Valuation
richest_company = df.loc[df['Valuation ($B)'].idxmax(), 'Company']

# 2. Missing 'Founded Year'
missing_founded = df['Founded Year'].isna().sum()

# 3. No Listed Investors
missing_investors = df['Select Inverstors'].isna().sum()

# 4. Most Unicorns by Country
most_unicorn_country = df['Country'].value_counts().idxmax()

# 5. Founded After 2010
founded_after_2010 = (df['Founded Year'] > 2010).sum()

# 6. Average Valuation by Industry
avg_valuation_by_industry = df.groupby('Industry')['Valuation ($B)'].mean().sort_values(ascending=False)

# 7. Top 5 Cities with Most Unicorns
top_cities = df['City'].value_counts().head(5)

# 8. Financial Stage Distribution
financial_stage_dist = df['Financial Stage'].value_counts()

# 9. Most Portfolio Exits
most_exits = df.loc[df['Portfolio Exits'].idxmax(), ['Company', 'Portfolio Exits']]


In [8]:
# STEP 6: Display Results

print("\n1. Highest Valuation Company:", richest_company)
print("2. Companies Missing 'Founded Year':", missing_founded)
print("3. Companies Without Listed Investors:", missing_investors)
print("4. Country with Most Unicorns:", most_unicorn_country)
print("5. Companies Founded After 2010:", founded_after_2010)

print("\n6. Average Valuation by Industry (Top 5):\n", avg_valuation_by_industry.head(5))
print("\n7. Top 5 Cities by Unicorn Count:\n", top_cities)
print("\n8. Financial Stage Distribution:\n", financial_stage_dist)
print("\n9. Company with Most Portfolio Exits:\n", most_exits)



1. Highest Valuation Company: Bytedance
2. Companies Missing 'Founded Year': 43
3. Companies Without Listed Investors: 17
4. Country with Most Unicorns: United States
5. Companies Founded After 2010: 785

6. Average Valuation by Industry (Top 5):
 Industry
Sequoia Capital, Thoma Bravo, Softbank    32.000000
Finttech                                  10.000000
Other                                      4.735714
Artificial intelligence                    4.343521
Fintech                                    3.828390
Name: Valuation ($B), dtype: float64

7. Top 5 Cities by Unicorn Count:
 City
San Francisco    145
New York          96
Beijing           63
Shanghai          43
London            33
Name: count, dtype: int64

8. Financial Stage Distribution:
 Financial Stage
Acquired       22
Divestiture     8
IPO             7
Acq             7
Asset           1
Take            1
Management      1
Reverse         1
Corporate       1
Name: count, dtype: int64

9. Company with Most Portfolio Ex