# Importing Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
import pandas as pd
import plotly 
import plotly.express as px

# Loading the Dataset

In [None]:
start=pd.read_csv("../input/indian-startup-funding/startup_funding.csv")

In [None]:
start

In [None]:
start.shape

In [None]:
start.columns

In [None]:
start.info()

In [None]:
start.isnull().sum()

In [None]:
plt.figure(figsize=(15,8))
sns.heatmap(start.isnull(),cbar=False,yticklabels=False)

# Data cleaning

In [None]:
start=start.drop(["Remarks","Sr No"],axis=1)

In [None]:
start=start.dropna()

In [None]:
start.isnull().sum()

In [None]:
plt.figure(figsize=(15,8))
sns.heatmap(start.isnull(),cbar=False,yticklabels=False)

# Converting Amount in USD from string to integer

In [None]:
start["Amount in USD"]=start["Amount in USD"].str.replace(",","")
start["Amount in USD"]=start["Amount in USD"].str.replace("undisclosed","")
start["Amount in USD"]=start["Amount in USD"].str.replace("unknown","")
start["Amount in USD"]=start["Amount in USD"].str.replace("unknown","")
start["Amount in USD"]=start["Amount in USD"].str.replace("Undisclosed","")
start["Amount in USD"]=start["Amount in USD"].str.replace("+","")

In [None]:
start["Amount in USD"]=pd.to_numeric(start["Amount in USD"])

In [None]:
start.shape

In [None]:
start.describe()

In [None]:
start=start.sort_values(by=["Amount in USD"],ascending=False).reset_index()

In [None]:
start=start.drop(["index"],axis=1)

In [None]:
start.head()

# Lets check the funding Range provided by Investors

In [None]:
plt.figure(figsize=(15,8))
sns.set(style="darkgrid")
px.histogram(start, x="Amount in USD")

# The maximum density of fundings provided by invester lies in 0-0.5B and "Rapido Bike Tax" is the only startup who has recieved funding of 3.9B

# Lets check the startups who has recieved bottom 20 minor fundings 

In [None]:
start=start.sort_values(by=["Amount in USD"],ascending=True).reset_index()

In [None]:
start_botfun=start[0:20]

In [None]:
start_botfun[["Startup Name","Amount in USD"]]

In [None]:
plt.figure(figsize=(15,8))
sns.set(style="darkgrid")
sns.barplot(data=start_botfun,y="Startup Name",x="Amount in USD",palette="ch:s=.25,rot=-.25")

# Startups like Maptags, Cloudrino, SoundSurround have received Lowest fundings 

# Lets check the startups who has recieved top 20 major fundings 

In [None]:
start=start.sort_values(by=["Amount in USD"],ascending=False).reset_index()

In [None]:
start.head()

In [None]:
start_topfun=start[0:20]

In [None]:
plt.figure(figsize=(15,8))
sns.set(style="darkgrid")
sns.barplot(data=start_topfun,y="Startup Name",x="Amount in USD",palette="ch:s=.25,rot=-.25")

# Startups like Rapido bike taxi, Flipkart, Udaan and Paytm have received major fundings 

In [None]:
start.head()

# Lets check what type of Investment are the investor really intrested in

In [None]:
start["InvestmentnType"]=start["InvestmentnType"].str.replace("Seed/ Angel Funding","Seed / Angel Funding")
start["InvestmentnType"]=start["InvestmentnType"].str.replace("Seed/Angel Funding","Seed / Angel Funding")
start["InvestmentnType"]=start["InvestmentnType"].str.replace("Angel / Seed Funding","Seed / Angel Funding")
start["InvestmentnType"]=start["InvestmentnType"].str.replace("Seed / Angle Funding","Seed / Angel Funding")
start["InvestmentnType"]=start["InvestmentnType"].str.replace("pre-Series A","Pre-Series A")
start["InvestmentnType"]=start["InvestmentnType"].str.replace("Pre-series A","Pre-Series A")

In [None]:
start_INtype=start["InvestmentnType"].value_counts()[0:20].reset_index()

In [None]:
start_INtype

In [None]:
plt.figure(figsize=(15,12))
sns.set(style="darkgrid")
sns.barplot(data=start_INtype,y="index",x="InvestmentnType",palette="ch:s=.25,rot=-.25")
plt.title("Investment Type in which Investor are really intrested")
plt.xlabel("count")
plt.ylabel("Investment Type")
plt.show()
plt.figure(figsize=(15,12))
plt.pie(start_INtype["InvestmentnType"][0:7],labels=start_INtype["index"][0:7],autopct="%0.1f%%")
plt.show()

# Maximum investor are intrested in Private equity, Seed funding and Seed / Angel Funding as their Investment Type

# Lets check the Industry Vertical in which investor are intrested

In [None]:
start.head()

In [None]:
start_indver=start["Industry Vertical"].value_counts().reset_index()[0:10]

In [None]:
start_indver

In [None]:
plt.figure(figsize=(15,12))
sns.set(style="darkgrid")
sns.barplot(data=start_indver,y="index",x="Industry Vertical",palette="ch:s=.25,rot=-.25")
plt.title("Investment wrt Industry Vertical")
plt.xlabel(" Times Investment done")
plt.ylabel("Industry Vertical")

# Investor have been funding Consumer Internet and Technology 

In [None]:
start['Investors Name'][start['Investors Name'] == 'Undisclosed investors'] = 'Undisclosed Investors'
start['Investors Name'][start['Investors Name'] == 'undisclosed Investors'] = 'Undisclosed Investors'
start['Investors Name'][start['Investors Name'] == 'undisclosed investors'] = 'Undisclosed Investors'
start['Investors Name'][start['Investors Name'] == 'Undisclosed investor'] = 'Undisclosed Investors'
start['Investors Name'][start['Investors Name'] == 'Undisclosed Investor'] = 'Undisclosed Investors'
start['Investors Name'][start['Investors Name'] == 'Undisclosed'] = 'Undisclosed Investors'

In [None]:
start.head()

In [None]:
start_un=start[start["Investors Name"]!="Undisclosed Investors"]

In [None]:
start_inves=start_un["Investors Name"].value_counts()[0:10].reset_index()

# Now lets check who are the top 20 investors excluding the Undisclosed Investor

In [None]:
start_inves

In [None]:
plt.figure(figsize=(15,12))
sns.set(style="darkgrid")
sns.barplot(data=start_inves,y="index",x="Investors Name",palette="ch:s=.25,rot=-.25")
plt.title(" Top 20 Investors")
plt.xlabel("count")
plt.ylabel("Investors Name")

# Now lets check who are the top 20 investors including the Undisclosed Investor

In [None]:
start_invun=start["Investors Name"].value_counts()[0:10].reset_index()

In [None]:
start_invun

In [None]:
plt.figure(figsize=(15,12))
sns.set(style="darkgrid")
sns.barplot(data=start_invun,y="index",x="Investors Name",palette="ch:s=.25,rot=-.25")
plt.title(" Top 20 Investors(including the Undisclosed Investor)")
plt.xlabel("count")
plt.ylabel("Investors Name")