In [None]:
# Import libraries

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib 
import seaborn as sns

matplotlib.rc('xtick', labelsize=15) 
matplotlib.rc('ytick', labelsize=15) 

In [None]:
# Load data

data = pd.read_csv("../input/indian-startups-2021/2021_registered_companies.csv")
data.head()

In [None]:
# Data information

data.info()

In [None]:
#Remove rows with authorized_capital < paidup_capital

if len(data[data["authorized_capital"] < data["paidup_capital"]]) > 0:
    data.drop(data[data["authorized_capital"] < data["paidup_capital"]].index, inplace=True)
    data.info()

In [None]:
# Monthly registraion distribution

month_name_dict = dict(data.month_name.value_counts())

fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.pie(x=list(month_name_dict.values()), 
       labels=list(month_name_dict.keys()), 
       autopct="%1.2f%%",
       radius=2,
       textprops={'fontsize': 14})
plt.show()

In [None]:
# Build state_codes dictionary to remove duplicate entries

state_codes = {
                "Andhra Pradesh": "AP",
                "Arunachal Pradesh": "AR",
                "Assam": "AS",
                "Bihar": "BR",
                "Chattisgarh": "CG",
                "Goa": "GA",
                "Gujarat": "GJ",
                "Haryana": "HR",
                "Himachal Pradesh": "HP",
                "Jammu & Kashmir": "JK",
                "Jharkhand": "JH",
                "Karnataka": "KA",
                "Kerala": "KL",
                "Ladakh": "LA",
                "Madhya Pradesh": "MP",
                "Maharashtra": "MH",
                "Manipur": "MN",
                "Meghalaya": "ML",
                "Mizoram": "MZ",
                "Nagaland": "NL",
                "Orissa": "OR",
                "Punjab": "PB",
                "Rajasthan": "RJ",
                "Sikkim": "SK",
                "Tamil Nadu": "TN",
                "Tripura": "TR",
                "Uttarakhand": "UK",
                "Uttar Pradesh": "UP",
                "West Bengal": "WB",
                "Tamil Nadu": "TN",
                "Telangana": "TS",
                "Tripura": "TR",
                "Andaman & Nicobar": "AN",
                "Chandigarh": "CH",
                "Dadra & Nagar Haveli": "DH",
                "Daman and Diu": "DD",
                "Delhi": "DL",
                "Lakshadweep": "LD",
                "Pondicherry": "PY"
            }

In [None]:
# Merge duplicate state entries

df = data.replace(to_replace=state_codes)
state_dict = dict(df.state.value_counts())
print(state_dict)

In [None]:
# State wise registration distribution

state_dict = {}
others = 0

for key, value in dict(df.state.value_counts()).items():
    if value > 1000:
        state_dict[key] = value
    else:
        others += value
state_dict["Others"] = others

fig = plt.figure(figsize=(18, 6))
ax = fig.add_axes([0,0,1,1])
ax.bar(x=list(state_dict.keys()), height=list(state_dict.values()), color="dimgray")
plt.show()

In [None]:
# State wise registration distribution

fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
patches, texts, autotext = ax.pie(x=list(state_dict.values()), 
                                  labels=list(state_dict.keys()), 
                                  autopct="%1.0f%%", 
                                  radius=2,
                                  textprops={'fontsize': 14})
plt.show()

In [None]:
# RoC wise registration distribution

roc_dict = {}
others = 0

for key, value in dict(df.roc.value_counts()).items():
    if value > 1000:
        roc_dict[key.split("-")[1]] = value
    else:
        others += value
roc_dict["Others"] = others

fig = plt.figure(figsize=(21, 6))
ax = fig.add_axes([0,0,1,1])
ax.bar(x=list(roc_dict.keys()), height=list(roc_dict.values()), color="gray")
plt.show()

In [None]:
# RoC wise registration distribution

fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.pie(x=list(roc_dict.values()), 
       labels=list(roc_dict.keys()), 
       autopct="%1.0f%%",
       radius=2,
       textprops={'fontsize': 14})
plt.show()

In [None]:
# Categorical distribution

category_dict = dict(data.category.value_counts())

fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.pie(x=list(category_dict.values()), 
       labels=list(category_dict.keys()), 
       autopct="%1.2f%%",
       radius=2,
       textprops={'fontsize': 14})
plt.show()

In [None]:
# Class distribution

class_dict = dict(data["class"].value_counts())

fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.pie(x=list(class_dict.values()), 
       labels=list(class_dict.keys()), 
       autopct="%1.2f%%",
       radius=2,
       textprops={'fontsize': 14})
plt.show()

In [None]:
# Type distribution

company_type_dict = dict(data.company_type.value_counts())

fig = plt.figure(figsize=(21, 6))
ax = fig.add_axes([0,0,1,1])
ax.bar(x=list(company_type_dict.keys()), height=list(company_type_dict.values()))
plt.show()

In [None]:
# Activity distribution

activity_description_dict = {}
others = 0

for key, value in dict(df.activity_description.value_counts()).items():
    if value > 1000:
        activity_description_dict[key.split(" ")[0]] = value
    else:
        others += value
activity_description_dict["Others"] = others

fig = plt.figure(figsize=(18, 6))
ax = fig.add_axes([0,0,1,1])
ax.bar(x=list(activity_description_dict.keys()), height=list(activity_description_dict.values()), color="gray")
plt.show()

In [None]:
# Median authorized capital

median_authorized_capital = data.authorized_capital.median()
print("median_authorized_capital =", median_authorized_capital)

In [None]:
# Median paidup capital

median_paidup_capital = data.paidup_capital.median()
print("median_authorized_capital =", median_paidup_capital)