# Importing Libraries 

In [None]:
import plotly as py
import plotly.express as px
py.offline.init_notebook_mode(connected=True)
import pandas as pd
import numpy as np
import wordcloud as wcd
import re

# Loading Dataset

In [None]:
csv = r"../input/pakistan-startup-census/Pakistan Startup Census.csv"
startup_pk = pd.read_csv(csv)[:433]
startup_pk.columns

# Data Cleaning

## Extracting Founded Year

In [None]:
startup_pk.Founded = startup_pk.Founded.str.strip()
year = lambda string :  string[-4:] 
startup_pk["founded_year"] = startup_pk.Founded.apply(year).str.replace("^\D+\w.","Not Provided")

## Correcting City Names

In [None]:
def city_cleaner(string, 
                 cities=["lahore","karachi","faisalabad","islamabad",
                        "rawalpindi", 'peshawar','nowshera',"gujrat"
]
):
    for city in cities:
        if city in string:
            string = city
    return string
print("Function Defined")
startup_pk["locations"] = startup_pk.Location.str.lower().apply(city_cleaner)

# Exploratory Analysis

# Total Number of Startups

In [None]:
print(f"There are {startup_pk.Name.nunique()} Startups in Pakistan")

# Startups Location wise

In [None]:
loca_wise = startup_pk.groupby(startup_pk.locations).size().sort_values(ascending=False)[:20]
px.bar(loca_wise,labels= {"Variable":"Number of Startups","value":"Count"},
      title="Number Startups by Location")

# Startup Category wise

In [None]:
startup_pk.Category = startup_pk.Category.str.lower().str.strip()
cat_wise = startup_pk.groupby(startup_pk.Category).size().nlargest(25)
px.bar(cat_wise,orientation="v",title="Number of Startups by Category")

# Startup by Year Founded

In [None]:
year_wise = startup_pk.groupby(startup_pk.founded_year).size()
px.bar(year_wise,orientation="v")

# Unveiling Buzz words in Startups Taglines and Business Description

In [None]:
def iterable_to_wordcloud(iterable,name=None,plot=True,width=1400,height=600,stopwords=None,col="white"):
    text = " ".join([ str(tagline).lower() for tagline in iterable])                
    wordcloud_instance = wcd.WordCloud(width=width,height=height,stopwords=stopwords,
                        background_color=col)
    wc = wordcloud_instance.generate(text)
    if plot :
        plot = px.imshow(wc,title=f"Word cloud for {name}")
        plot.update_xaxes(showticklabels=False)
        plot.update_yaxes(showticklabels=False)
        plot.show()
    else:
        return wc #wordcloud object
print("Function Defined")

In [None]:
wc_tag = iterable_to_wordcloud(startup_pk["Tagline"],name="Taglines")

In [None]:
iterable_to_wordcloud(startup_pk["Description"],stopwords=wcd.STOPWORDS,name="Business Description")