# **This is a [Shark Tank India](https://github.com/mkaustubh/data-analytics) EDA notebook. You can reference the dataset at [this link](https://www.kaggle.com/thirumani/shark-tank-india).**
___


In [None]:
import numpy as np # linear algebra
import pandas as pd # for data preparation
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
from babel.numbers import format_currency
from wordcloud import WordCloud, STOPWORDS
import plotly.express as px
import plotly.io as pio
pio.templates.default = "plotly_dark"
pio.renderers.default = 'notebook'

import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [None]:
shark_tank = pd.read_csv('../input/shark-tank-india/Shark Tank India.csv', index_col='Pitch Number')
shark_tank.shape

In [None]:
shark_tank.head()

In [None]:
# Word cloud based on episode titles
text = " Shark Tank India ".join(cat for cat in shark_tank['Episode Title'])
stop_words = list(STOPWORDS) + ["Ka", "Ki", "Ko"]
wordcloud = WordCloud(width=2000, height=1500, stopwords=stop_words, background_color='white', colormap='Greys', collocations=False, random_state=2022).generate(text)
plt.figure(figsize=(20,10))
plt.imshow(wordcloud)
plt.axis("off")
plt.show()

## Data Cleaning

In [None]:
shark_tank = shark_tank.fillna(0)

## Dataset Info

In [None]:
shark_tank.head()

In [None]:
pd.DataFrame({
    'Number of Episodes':[shark_tank['Episode Number'].max()],
    'Startup Pitched':[shark_tank['Startup Name'].count()],
    'Total Invested Amount(in Lakhs)':[int(shark_tank['Total Deal Amount'].sum())],
    'Total Equity':[int(shark_tank['Total Deal Equity'].sum())],
    'Number of Deals Closed':[shark_tank['Accepted Offer'].value_counts()[1]]
    },index=['Count'])

## Data Stats

### Industry-wise pitches


In [None]:
plt.figure(figsize=(20,5))
sns.countplot(shark_tank['Industry'], palette='flare_r')
plt.ylabel('Number of Startups')
plt.show()

### State Wise

In [None]:
from collections import Counter
col = shark_tank[(shark_tank['Pitchers State']!=0)]['Pitchers State'].tolist()
d = Counter(col)
df = pd.DataFrame({
    'States':d.keys(),
    'Number of Startups':d.values()
})

In [None]:
shp_gdf = gpd.read_file('../input/india-gis-data/India States/Indian_states.shp')
merged = shp_gdf.set_index('st_nm').join(df.set_index('States'))
merged['Number of Startups'] = merged['Number of Startups']

In [None]:
fig, ax = plt.subplots(1, figsize=(12, 12))
ax.axis('off')
ax.set_title('As per Number of Startups', fontdict={'fontsize': '15', 'fontweight' : '3'})
fig = merged.plot(column='Number of Startups', cmap='icefire', linewidth=0.8, ax=ax, edgecolor='0.5', legend=True)

### Year Wise

In [None]:
plt.figure(figsize=(15,5))
sns.countplot(shark_tank[shark_tank['Started in']!=0]['Started in'])
plt.ylabel('Number of Startups')
plt.xlabel('Year')
plt.show()

### Received v/s Not Received *Offer*

In [None]:
data = [shark_tank['Received Offer'].sum(), shark_tank.shape[0]-shark_tank['Received Offer'].sum()]
labels = ['Received', 'Not Received']
plt.figure(figsize=(12,6))
plt.pie(data, labels = labels, autopct='%.0f%%')
plt.show()

### Accepted v/s Not Accepted *Offer*

In [None]:
data = [shark_tank['Accepted Offer'].sum(), shark_tank['Received Offer'].sum()-shark_tank['Accepted Offer'].sum()]
labels = ['Accepted', 'Not Accepted']
plt.figure(figsize=(12,6))
plt.pie(data, labels = labels, autopct='%.0f%%')
plt.show()

## Data Analysis

### Male vs Female Presenters

In [None]:
df = pd.DataFrame({
    'Total Presenters':[shark_tank['Male Presenters'].sum(),shark_tank['Female Presenters'].sum()],
    'Sex':['Male','Female']
})
plt.title('Male vs Female Presenters')
sns.barplot(y='Total Presenters', x='Sex', data=df, palette='crest')
plt.show()

### Pitcher's Average Age


In [None]:
plt.figure(figsize=(20,5))
sns.set_style('whitegrid')
sns.barplot(x='Number of Presenters', y='Pitchers Average Age', data=shark_tank, palette='CMRmap_r', estimator = np.sum)
plt.title('Pitcher"s Average Age')
plt.xticks(range(0,200,20))
plt.show()

### Number of sharks in deal

In [None]:
plt.figure(figsize=(12,5))
plt.title('Number of Sharks in deal')

sns.set_style('dark')
sns.distplot(shark_tank['Number of sharks in deal'], kde=False, color='#5539AB')
plt.show()

### Invested Amount per Episode

In [None]:
plt.figure(figsize=(24,6))
sns.barplot(data=shark_tank, x="Episode Number", y="Total Deal Amount", estimator=np.sum, capsize=0.1)
plt.ylabel('Invested Amount (in Lakhs)')
plt.show()

### Highest Investment

As per Highest Deal Amount

In [None]:
df = shark_tank.loc[shark_tank['Accepted Offer']==1,['Startup Name', 'Total Deal Amount', 'Total Deal Equity', 'Total Deal Debt']]
df = df.sort_values(by=['Total Deal Amount'],ascending=False)[:10]

In [None]:
plt.figure(figsize=(25,7))
sns.barplot(x='Startup Name', y='Total Deal Amount', data=df)
plt.title('As per Highest Deal Amount')
plt.show()

As per Highest Debt

In [None]:
df = shark_tank.loc[shark_tank['Accepted Offer']==1,['Startup Name', 'Total Deal Amount', 'Total Deal Equity', 'Total Deal Debt']]
df = df.sort_values(by=['Total Deal Debt'],ascending=False)[:10]

In [None]:
plt.figure(figsize=(25,7))
sns.barplot(x='Startup Name', y='Total Deal Debt', data=df)
plt.title('As per Highest Deal Debt')
plt.show()

As per Highest Equity

In [None]:
df = shark_tank.loc[shark_tank['Accepted Offer']==1,['Startup Name', 'Total Deal Amount', 'Total Deal Equity', 'Total Deal Debt']]
df = df.sort_values(by=['Total Deal Equity'],ascending=False)[:10]

In [None]:
plt.figure(figsize=(25,7))
sns.barplot(x='Startup Name', y='Total Deal Equity', data=df)
plt.title('As per Highest Deal Equity')
plt.show()

## Sharks Investment Analysis

### Aman's Investment Stats

In [None]:
pd.DataFrame({
    'Invested Amt(Lakhs)':[shark_tank['Aman Investment Amount'].sum()//1],
    'Debt Amt(Lakhs)':[shark_tank['Aman Debt Amount'].sum()//1],
    'Equity Received':[shark_tank['Aman Investment Equity'].sum()]
},
index=['Count'])

#### Industry Wise

In [None]:
plt.figure(figsize=(15,5))
sns.countplot(shark_tank[shark_tank['Aman Investment Amount']!=0]['Industry'], palette='flare_r')
plt.ylabel('Number of Startups')
plt.show()

In [None]:
shark_tank.loc[(shark_tank['Aman Investment Amount']!=0),['Startup Name','Number of sharks in deal','Aman Investment Amount','Aman Investment Equity','Aman Debt Amount']].head()

### Ashneer's Investment Stats

In [None]:
pd.DataFrame({
    'Invested Amt(Lakhs)':[shark_tank['Ashneer Investment Amount'].sum()],
    'Debt Amt(Lakhs)':[shark_tank['Ashneer Debt Amount'].sum()//1],
    'Equity Received':[shark_tank['Ashneer Investment Equity'].sum()]
},
index=['Count'])

#### Industry Wise

In [None]:
plt.figure(figsize=(15,5))
sns.countplot(shark_tank[shark_tank['Ashneer Investment Amount']!=0]['Industry'], palette='flare_r')
plt.ylabel('Number of Startups')
plt.show()

In [None]:
shark_tank.loc[(shark_tank['Ashneer Investment Amount']!=0),['Startup Name','Number of sharks in deal','Ashneer Investment Amount','Ashneer Investment Equity','Ashneer Debt Amount']].head()

### Anupam's Investment Stats

In [None]:
pd.DataFrame({
    'Invested Amt(Lakhs)':[shark_tank['Anupam Investment Amount'].sum()],
    'Debt Amt(Lakhs)':[shark_tank['Anupam Debt Amount'].sum()//1],
    'Equity Received':[shark_tank['Anupam Investment Equity'].sum()]
},
index=['Count'])

#### Industry Wise

In [None]:
plt.figure(figsize=(15,5))
sns.countplot(shark_tank[shark_tank['Anupam Investment Amount']!=0]['Industry'], palette='flare_r')
plt.ylabel('Number of Startups')
plt.show()

In [None]:
shark_tank.loc[(shark_tank['Anupam Investment Amount']!=0),['Startup Name','Number of sharks in deal','Anupam Investment Amount','Anupam Investment Equity','Anupam Debt Amount']].head()

### Namita's Investment Stats

In [None]:
pd.DataFrame({
    'Invested Amt(Lakhs)':[shark_tank['Namita Investment Amount'].sum()],
    'Debt Amt(Lakhs)':[shark_tank['Namita Debt Amount'].sum()//1],
    'Equity Received':[shark_tank['Namita Investment Equity'].sum()]
},
index=['Count'])

#### Industry Wise

In [None]:
plt.figure(figsize=(15,5))
sns.countplot(shark_tank[shark_tank['Namita Investment Amount']!=0]['Industry'], palette='flare_r')
plt.ylabel('Number of Startups')
plt.show()

In [None]:
shark_tank.loc[(shark_tank['Namita Investment Amount']!=0),['Startup Name','Number of sharks in deal','Namita Investment Amount','Namita Investment Equity','Namita Debt Amount']].head()

### Vineeta's Investment Stats

In [None]:
pd.DataFrame({
    'Invested Amt(Lakhs)':[shark_tank['Vineeta Investment Amount'].sum()],
    'Debt Amt(Lakhs)':[shark_tank['Vineeta Debt Amount'].sum()//1],
    'Equity Received':[shark_tank['Vineeta Investment Equity'].sum()]
},
index=['Count'])

#### Industry Wise

In [None]:
plt.figure(figsize=(15,5))
sns.countplot(shark_tank[shark_tank['Vineeta Investment Amount']!=0]['Industry'], palette='flare_r')
plt.ylabel('Number of Startups')
plt.show()

In [None]:
shark_tank.loc[(shark_tank['Vineeta Investment Amount']!=0),['Startup Name','Number of sharks in deal','Vineeta Investment Amount','Vineeta Investment Equity','Vineeta Debt Amount']].head()

### Peyush's Investment Stats

In [None]:
pd.DataFrame({
    'Invested Amt(Lakhs)':[shark_tank['Peyush Investment Amount'].sum()],
    'Debt Amt(Lakhs)':[shark_tank['Peyush Debt Amount'].sum()//1],
    'Equity Received':[shark_tank['Peyush Investment Equity'].sum()]
},
index=['Count'])

#### Industry Wise

In [None]:
plt.figure(figsize=(15,5))
sns.countplot(shark_tank[shark_tank['Peyush Investment Amount']!=0]['Industry'], palette='flare_r')
plt.ylabel('Number of Startups')
plt.show()

In [None]:
shark_tank.loc[(shark_tank['Peyush Investment Amount']!=0),['Startup Name','Number of sharks in deal','Peyush Investment Amount','Peyush Investment Equity','Peyush Debt Amount']].head()

### Ghazal's Investment Stats

In [None]:
pd.DataFrame({
    'Invested Amt(Lakhs)':[shark_tank['Ghazal Investment Amount'].sum()],
    'Debt Amt(Lakhs)':[shark_tank['Ghazal Debt Amount'].sum()//1],
    'Equity Received':[shark_tank['Ghazal Investment Equity'].sum()]
},
index=['Count'])

#### Industry Wise

In [None]:
plt.figure(figsize=(15,5))
sns.countplot(shark_tank[shark_tank['Ghazal Investment Amount']!=0]['Industry'], palette='flare_r')
plt.ylabel('Number of Startups')
plt.show()

In [None]:
shark_tank.loc[(shark_tank['Ghazal Investment Amount']!=0),['Startup Name','Number of sharks in deal','Ghazal Investment Amount','Ghazal Investment Equity','Ghazal Debt Amount']].head()