### Import Libraries

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-darkgrid')

In [None]:
#Read Data
df=pd.read_csv('../input/ipo-data-india2021/IPO.csv')
df.head()

In [None]:
#First Row to Column Name
df.columns = df.iloc[0]
df.drop(df.index[0], inplace=True)
df=df.reset_index(drop=True)
df.head()

In [None]:
#Column name Change
rename_dict={'IPO Name':'company',
             'Issue Size \t\t\t\t\t\t  (in crores)':'issue_size',
             'Listing Open':'open',
             'Listing Close':'close',
             'Listing  \t\t\t\t\t\t  Gains(%)':'gains',
             'Current \t\t\t\t\t\t  Gains (%)':'gains_current'}

df=df.rename(columns=rename_dict)
df.head()

In [None]:
#Check Datatypes
df.info()

In [None]:
#convert data types
#float columns
fl_cols=['issue_size', 'QIB', 'HNI', 'RII', 'Total', 'Issue',
       'open', 'close', 'gains', 'CMP', 'gains_current']
error_cols=[]

for col in fl_cols:
    try:
        df[col]=df[col].astype('float')
    except:
        print('Error in ', col, ' Conversion!!!')
        error_cols.append(col)
#Date to datetime format
df.Date=pd.to_datetime(df.Date)

In [None]:
#Remove comma in CMP and convert to float
df.CMP=df.CMP.apply(lambda x: float(str(x).replace(',','')))
df.head()

In [None]:
#New Columns
df['year']=df.Date.apply(lambda x:x.year)
df['month']=df.Date.apply(lambda x:x.month)

df['fav']=df.gains.apply(lambda x:1 if x>0 else 0) #favorable 1st day or not?

#No idea what the Total in dataset means!!!
#New feature total as sum of each type of investors
df['total']=df.QIB+df.HNI+df.RII

#percentage of each type of investors
df['q_pc']=round(df.QIB*100/df.total,2)
df['h_pc']=round(df.HNI*100/df.total,2)
df['r_pc']=round(df.RII*100/df.total,2)
df.head()

In [None]:
sns.boxplot(df.issue_size, orient='h')
plt.show()

In [None]:
print('Mean Issue Size : ',df.issue_size.mean())
print('Median Issue Size : ',df.issue_size.median())

plt.figure(figsize=(10,4))
sns.distplot(df.issue_size, label='Number of IPOs')
plt.axvline(x=df.issue_size.mean(),color='r', linestyle='--', label='mean')
plt.axvline(x=df.issue_size.median(),color='g', linestyle='--', label='median')
plt.legend()
plt.show()

In [None]:
pt=df.pivot_table(index='year', values='company', aggfunc='count').reset_index()
pt.head()

In [None]:
sns.barplot(data=pt, x='year', y='company')
plt.title('IPOs by year')
plt.show()

### Just Indian Things

In [None]:
#Add Quarter and Finacial year
# Indian FY starts from April 1st. 
# Jan 1- Mar 31 is the last quarter of FY prev year-curr year 
# Apr 1-Jun 30 is the first quarter of FY curr year-nxt yr
df['quarter']=0
df['FY']=''
for i in range(df.shape[0]):
    df.FY[i]=str(df.year[i])+"-"+str(df.year[i]+1)[-2:]
    if df.month[i]<4:
        df.quarter[i]=4
        df.FY[i]=str(df.year[i]-1)+"-"+str(df.year[i])[-2:]
    elif df.month[i]<7:
        df.quarter[i]=1
    elif df.month[i]<10:
        df.quarter[i]=2
    else:
        df.quarter[i]=3        
df.head()

In [None]:
temp=pd.pivot_table(data=df,index=['FY'], values='Issue', aggfunc='count')
temp.plot(kind='bar')
plt.show()

In [None]:
temp=pd.pivot_table(data=df,index=['quarter'], values='Issue', aggfunc='count')
temp.plot(kind='bar')
plt.show()

In [None]:
temp=pd.pivot_table(data=df,index=['FY', 'quarter'], values='Issue', aggfunc='count')
#temp=temp.reset_index()

In [None]:
temp.plot(kind='bar', figsize=(14,5), title='IPOs by FY and QY')
plt.show()