## Shark Tank India Exploratory Data Analysis (EDA)

## Dataset https://www.kaggle.com/thirumani/shark-tank-india

In [143]:
import datetime
print("Notebook was last executed on:", datetime.date.today().strftime("%Y-%b-%d"))

### Importing Required Python Libraries

In [144]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 50)

import matplotlib.pyplot as plt
import seaborn as sns
from babel.numbers import format_currency
from wordcloud import WordCloud

import warnings
warnings.filterwarnings('ignore')

### Check and import dataset

In [145]:
! ls -l /kaggle/input/shark-tank-india
shark_tank = pd.read_csv('/kaggle/input/shark-tank-india/Shark Tank India.csv', encoding = "ISO-8859-1")

nRow, nCol = shark_tank.shape
print(f'\nThere are {nRow} rows and {nCol} columns in the dataset')

## Exploratory Data Analysis (EDA)

In [146]:
shark_tank.head(5)

In [147]:
shark_tank.tail(10).T

In [148]:
shark_tank.sample(10)

In [149]:
shark_tank.info()

In [150]:
shark_tank.describe().T.round(2)

In [151]:
# Unique values in each column
for col in shark_tank.columns:
    print("Number of unique values in", col, "-", shark_tank[col].nunique())

## One season of Shark Tank India was broadcasted in SonyLiv OTT
### In 35 episodes, there were 117 pitches

In [152]:
print(shark_tank['Season Number'].max(), "season \n")
print(shark_tank['Episode Number'].max(), "episodes \n")
print(shark_tank['Pitch Number'].max(), "startup companies came pitching \n")

In [153]:
# Types of industries, came for pitching
print(shark_tank['Industry'].value_counts(), "\n")

fig = plt.figure(figsize=(20, 4))
ax = sns.countplot(x='Industry', data=shark_tank, order=shark_tank['Industry'].value_counts().index)
ax.set_title('Shark Tank India')
for t in ax.patches:
    if (np.isnan(float(t.get_height()))):
        ax.annotate(0, (t.get_x(), 0))
    else:
        ax.annotate(str(format(int(t.get_height()), ',d')), (t.get_x(), t.get_height()*1.01))

In [154]:
print("Total presenters", int(shark_tank['Number of Presenters'].sum()), "\n")

print("Total male presenters", int(shark_tank['Male Presenter'].sum()), "\n")
print(shark_tank['Male Presenter'].value_counts(), "\n")

print("Total female presenters", int(shark_tank['Female Presenter'].sum()), "\n")
print(shark_tank['Female Presenter'].value_counts(), "\n")

In [155]:
# Offers received
print(shark_tank['Received Offer'].value_counts(), "\n")
print(round(shark_tank['Received Offer'].value_counts(normalize=True)*100).astype(str).str.replace('.0', '%'), "\n")
sns.countplot(x='Received Offer', data=shark_tank, palette="ch:.45")

In [156]:
# Offers accepted
print(shark_tank['Accepted Offer'].value_counts(), "\n")
print(round(shark_tank['Accepted Offer'].value_counts(normalize=True)*100).astype(str).str.replace('.0', '%'), "\n")
sns.countplot(x='Accepted Offer', data=shark_tank, palette="ch:.45")

In [157]:
# Sum of investment amount asked by all starup companies, in Shark Tank, in India
print(format_currency(shark_tank['Original Ask Amount'].sum()/100, 'INR', locale='en_IN').replace(".00", ""),"crores")

In [158]:
# Sum of investment equity amount offered by all sharks, in Shark Tank, in India
print(format_currency(shark_tank['Total Deal Amount'].sum()/100, 'INR', locale='en_IN').replace(".00", ""),"crores")

In [159]:
# Sum of investment debt amount offered by all sharks, in Shark Tank, in India
print(format_currency(shark_tank['Total Deal Debt'].sum()/100, 'INR', locale='en_IN').replace(".00", ""),"crores")

In [160]:
# Top 15 investments, as per total deal amount
shark_tank.groupby('Startup Name')['Total Deal Amount'].max().nlargest(15)

In [161]:
# Top 10 investments, as per total equity amount
shark_tank.groupby('Startup Name')['Total Deal Equity'].max().nlargest(10)

In [162]:
# Top 5 investments, as per total debt amount
shark_tank.groupby('Startup Name')['Total Deal Debt'].max().nlargest(5)

In [163]:
# Ashneer
print("Investment amount by Ashneer", round(shark_tank['Ashneer Investment Amount'].sum()/100, 2), "crores")
print("Equity received by Ashneer", round(shark_tank['Ashneer Investment Equity'].sum(), 2), "%")
print("Debt/loan amount by Ashneer", round(shark_tank['Ashneer Debt Amount'].sum()/100, 2), "crores\n")

print(shark_tank.loc[shark_tank['Ashneer Investment Amount']>0][["Startup Name","Industry","Ashneer Investment Amount"]])

print("\nAshneer industry wise investments\n")
print(shark_tank[shark_tank['Ashneer Investment Amount']>0].groupby('Industry')['Industry'].value_counts().sort_values(ascending=False))

In [164]:
# Namita
print("Investment amount by Namita", round(shark_tank['Namita Investment Amount'].sum()/100, 2), "crores")
print("Equity received by Namita", round(shark_tank['Namita Investment Equity'].sum(), 2), "%")
print("Debt/loan amount by Namita", round(shark_tank['Namita Debt Amount'].sum()/100, 2), "crores\n")

print(shark_tank.loc[shark_tank['Namita Investment Amount']>0][["Startup Name","Industry","Namita Investment Amount"]])

print("\n Namita industry wise investments\n")
print(shark_tank[shark_tank['Namita Investment Amount']>0].groupby('Industry')['Industry'].value_counts().sort_values(ascending=False))

In [165]:
# Anupam
print("Investment amount by Anupam", round(shark_tank['Anupam Investment Amount'].sum()/100, 2), "crores")
print("Equity received by Anupam", round(shark_tank['Anupam Investment Equity'].sum(), 2), "%")
print("Debt/loan amount by Anupam", round(shark_tank['Anupam Debt Amount'].sum()/100, 2), "crores\n")

print(shark_tank.loc[shark_tank['Anupam Investment Amount']>0][["Startup Name","Industry","Anupam Investment Amount"]])

print("\n Anupam industry wise investments\n")
print(shark_tank[shark_tank['Anupam Investment Amount']>0].groupby('Industry')['Industry'].value_counts().sort_values(ascending=False))

In [166]:
# Vineeta
print("Investment amount by Vineeta", round(shark_tank['Vineeta Investment Amount'].sum()/100, 2), "crores")
print("Equity received by Vineeta", round(shark_tank['Vineeta Investment Equity'].sum(), 2), "%")
print("Debt/loan amount by Vineeta", round(shark_tank['Vineeta Debt Amount'].sum()/100, 2), "crores\n")

print(shark_tank.loc[shark_tank['Vineeta Investment Amount']>0][["Startup Name","Industry","Vineeta Investment Amount"]])

print("\n Vineeta industry wise investments\n")
print(shark_tank[shark_tank['Vineeta Investment Amount']>0].groupby('Industry')['Industry'].value_counts().sort_values(ascending=False))

In [167]:
# Aman
print("Investment amount by Aman", round(shark_tank['Aman Investment Amount'].sum()/100, 2), "crores")
print("Equity received by Aman", round(shark_tank['Aman Investment Equity'].sum(), 2), "%")
print("Debt/loan amount by Aman", round(shark_tank['Aman Debt Amount'].sum()/100, 2), "crores\n")

print(shark_tank.loc[shark_tank['Aman Investment Amount']>0][["Startup Name","Industry","Aman Investment Amount"]])

print("\n Aman industry wise investments\n")
print(shark_tank[shark_tank['Aman Investment Amount']>0].groupby('Industry')['Industry'].value_counts().sort_values(ascending=False))

In [168]:
# Peyush
print("Investment amount by Peyush", round(shark_tank['Peyush Investment Amount'].sum()/100, 2), "crores")
print("Equity received by Peyush", round(shark_tank['Peyush Investment Equity'].sum(), 2), "%")
print("Debt/loan amount by Peyush", round(shark_tank['Peyush Debt Amount'].sum()/100, 2), "crores\n")

print(shark_tank.loc[shark_tank['Peyush Investment Amount']>0][["Startup Name","Industry","Peyush Investment Amount"]])

print("\n Peyush industry wise investments\n")
print(shark_tank[shark_tank['Peyush Investment Amount']>0].groupby('Industry')['Industry'].value_counts().sort_values(ascending=False))

In [169]:
# Ghazal
print("Investment amount by Ghazal", round(shark_tank['Ghazal Investment Amount'].sum()/100, 2), "crores")
print("Equity received by Ghazal", round(shark_tank['Ghazal Investment Equity'].sum(), 2), "%")
print("Debt/loan amount by Ghazal", round(shark_tank['Ghazal Debt Amount'].sum()/100, 2), "crores\n")

print(shark_tank.loc[shark_tank['Ghazal Investment Amount']>0][["Startup Name","Industry","Ghazal Investment Amount"]])

print("\n Ghazal industry wise investments\n")
print(shark_tank[shark_tank['Ghazal Investment Amount']>0].groupby('Industry')['Industry'].value_counts().sort_values(ascending=False))

In [170]:
# Number of sharks in deal
print(shark_tank['Number of sharks in deal'].value_counts(), "\n")

# In percentage
print(round(shark_tank['Number of sharks in deal'].value_counts(normalize=True)*100).astype(str).str.replace('.0', '%'))

sns.countplot(data = shark_tank, x = 'Number of sharks in deal')

In [171]:
# All sharks deal
print(shark_tank.loc[shark_tank['Number of sharks in deal']==5][["Startup Name","Total Deal Amount","Total Deal Equity","Total Deal Debt"]])

In [172]:
shark_tank.info()

In [173]:
text = " Shark Tank India ".join(cat for cat in shark_tank['Business Description'])
#print(text)
#wordcloud = WordCloud(width = 2000, height = 1500, background_color='skyblue', colormap='Pastel2', collocations=False, random_state=2022).generate(text)
wordcloud = WordCloud(width = 2000, height = 1500, background_color='salmon', colormap='Pastel2', collocations=False, random_state=2022).generate(text)
wordcloud.to_file("Shark_Tank_India.png")

<img src="./Shark_Tank_India.png">

In [174]:
print("numpy version: {}". format(np.__version__))
print("pandas version: {}". format(pd.__version__))

import matplotlib
print("matplotlib version: {}". format(matplotlib. __version__))
print("seaborn version: {}". format(sns.__version__))


# numpy version: 1.20.3
# pandas version: 1.3.5
# matplotlib version: 3.5.1
# seaborn version: 0.11.2