# Inc 5000 Companies

Dataset containing information about each company on the INC 5000 list in 2019. Fields include the company name, industry, founding year, website, and location, as well as 2019 revenue, % growth, number of workers (year-over-year), and the number of years on the list.

Logo: [Image Source](https://www.prnewswire.com/news-releases/aviatrix-named-one-of-the-2022-inc-5000-fastest-growing-private-companies-in-america-301606200.html)

**Recommended Analysis**
1. What's the average revenue among companies on the list? Broken down by industry?
2. Which industries are most and least represented in the list?
3. Do you notice any interesting geographic trends?
4. Which industries saw the largest average growth rate?
5. Which companies had the largest increase in staff/new hires?
6. Did any companies increase revenue while reducing staff?

_Import Packages and Dataset, as well as Instantiate Constant Variables_

In [1]:
# Import Packages --------------------
import pandas as pd
import numpy as np
import string
# import plotly.io as pio
# pio.renderers.default = "png"
import plotly.express as px
import plotly.graph_objects as go

# Instantiate Constant Variables --------------
dataset_filename = "INC 5000 Companies 2019.csv";

# Import Dataset ---------------------
df = pd.read_csv(dataset_filename);
print(df.info());
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5012 entries, 0 to 5011
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   rank              5012 non-null   int64  
 1   profile           5012 non-null   object 
 2   name              5012 non-null   object 
 3   url               5012 non-null   object 
 4   state             5012 non-null   object 
 5   revenue           5012 non-null   object 
 6   growth_%          5012 non-null   float64
 7   industry          5012 non-null   object 
 8   workers           5011 non-null   float64
 9   previous_workers  5012 non-null   int64  
 10  founded           5012 non-null   int64  
 11  yrs_on_list       5012 non-null   int64  
 12  metro             4199 non-null   object 
 13  city              5012 non-null   object 
dtypes: float64(2), int64(4), object(8)
memory usage: 548.3+ KB
None


Unnamed: 0,rank,profile,name,url,state,revenue,growth_%,industry,workers,previous_workers,founded,yrs_on_list,metro,city
0,1,https://www.inc.com/profile/freestar,Freestar,http://freestar.com,AZ,36.9 Million,36680.3882,Advertising & Marketing,40.0,5,2015,1,Phoenix,Phoenix
1,2,https://www.inc.com/profile/freightwise,FreightWise,http://freightwisellc.com,TN,33.6 Million,30547.9317,Logistics & Transportation,39.0,8,2015,1,Nashville,Brentwood
2,3,https://www.inc.com/profile/ceces-veggie,Cece's Veggie Co.,http://cecesveggieco.com,TX,24.9 Million,23880.4852,Food & Beverage,190.0,10,2015,1,Austin,Austin
3,4,https://www.inc.com/profile/ladyboss,LadyBoss,http://ladyboss.com,NM,32.4 Million,21849.8925,Consumer Products & Services,57.0,2,2014,1,,Albuquerque
4,5,https://www.inc.com/profile/perpay,Perpay,http://perpay.com,PA,22.5 Million,18166.407,Retail,25.0,6,2014,1,Philadelphia,Philadelphia


***1. What's the average revenue among companies on the list? Broken down by industry?***

In [2]:
def average_revenue_by_industry(data, chart_type):
    dff = data.copy();
    dff["revenue_cleaned"] = dff["revenue"].apply(lambda x: float(x.split(" ")[0]));
    dff_agg = dff.groupby("industry").mean("revenue_cleaned");
    dff_agg = dff_agg.sort_values(by=["revenue_cleaned"], ascending=False)["revenue_cleaned"];
    data = pd.DataFrame({
        "Industry":dff_agg.index.tolist(),
        "Average Revenue":dff_agg.values.tolist()
    });
    if chart_type == "pie":
        fig = px.pie(data, names="Industry", values="Average Revenue", title="Percentage of Average Revenue in 2019 by Industry");
        fig.show();
    elif chart_type == "bar":
        fig = px.bar(data, x="Industry", y="Average Revenue", title="Average Revenue in 2019 by Industry", text_auto='.2f');
        fig.update_layout(yaxis=dict(title="Average Revenue (In Millions)"));
        fig.show();
    else:
        print(dff_agg);

average_revenue_by_industry(df, "bar")

***2. Which industries are most and least represented in the list?***

In [3]:
def industry_representation(data, chart_type):
    dff = data.copy();
    dff_agg = dff["industry"].value_counts();
    data = pd.DataFrame({
        "Industry":dff_agg.index.tolist(),
        "Count":dff_agg.values.tolist()
    });
    if chart_type == "pie":
        fig = px.pie(data, names="Industry", values="Count", title="Percentage Industry Representation in 2019");
        fig.show();
    elif chart_type == "bar":
        fig = px.bar(data, x="Industry", y="Count", title="Industry Representation in 2019", text_auto=True);
        # fig.update_layout(yaxis=dict(title="Average Revenue (In Millions)"));
        fig.show();
    else:
        print(dff_agg);

industry_representation(df, "bar")

***3. Do you notice any interesting geographic trends?***

In [4]:
def geographic_trends(data, geo_entity, chart_type, orientation):
    dff = data.copy();
    dff_agg = dff[geo_entity].value_counts();
    data = pd.DataFrame({
        geo_entity.capitalize():dff_agg.index.tolist(),
        "Count":dff_agg.values.tolist()
    });
    data = data.sort_values(by=["Count"], ascending=False);
    if chart_type == "pie":
        fig = px.pie(data, names=geo_entity.capitalize(), values="Count", title=f"Percentage {geo_entity.capitalize()} Proportion in 2019");
        fig.show();
    elif chart_type == "bar":
        if orientation == "h":
            data = data.sort_values(by=["Count"], ascending=True);
            fig = px.bar(data, y=geo_entity.capitalize(), x="Count", title=f"Number of Companies in 2019 by {geo_entity.capitalize()}", text_auto=True, orientation="h");
            fig.show();
        else:
            fig = px.bar(data, x=geo_entity.capitalize(), y="Count", title=f"Number of Companies in 2019 by {geo_entity.capitalize()}", text_auto=True);
            fig.show();
    else:
        print(f"Top 10 Geographic Entities with Companies in 2019 ({geo_entity.capitalize()})");
        print(data.head(10));
        print(f"Bottom 10 Geographic Entities with Companies in 2019 ({geo_entity.capitalize()})");
        print(data.tail(10));

In [5]:
geographic_trends(df, "state", "bar", None)

In [6]:
geographic_trends(df, "metro", "bar", "h")    

In [7]:
geographic_trends(df, "city", None, None)

Top 10 Geographic Entities with Companies in 2019 (City)
            City  Count
0       New York    165
1        Chicago    110
2        Atlanta    101
3         Austin     87
4        Houston     84
5         Dallas     74
6      San Diego     71
7  San Francisco     52
8    Los Angeles     51
9         Denver     48
Bottom 10 Geographic Entities with Companies in 2019 (City)
                City  Count
910           DRAPER      1
911        Park CIty      1
912          EL PASO      1
913          Roselle      1
914         Lagrange      1
915        Deer Park      1
916         Hiawatha      1
917           Sedona      1
918      Chula Vista      1
1557  LEXINGTON PARK      1


***4. Which industries saw the largest average growth rate?***

In [8]:
def industry_average_growth(data, chart_type):
    dff = data.copy();
    dff_agg = dff.groupby("industry").mean("growth_%")["growth_%"];
    data = pd.DataFrame({
        dff_agg.index.name.capitalize():dff_agg.index.values.tolist(),
        "Average Growth Rate (%)":dff_agg.values.tolist()
    });
    data = data.sort_values(by=["Average Growth Rate (%)"], ascending=False);
    if chart_type == "bar":
        fig = px.bar(data, x="Industry", y="Average Growth Rate (%)", title=f"Average Growth Rate of Companies in 2019", text_auto=".2f");
        fig.show();
    else:
        print(data);

industry_average_growth(df, "bar")

***5. Which companies had the largest increase in staff/new hires?***

In [9]:
def increase_in_staff(data, chart_type):
    dff = data.copy();
    dff["Increase in Staff"] = dff["workers"] - dff["previous_workers"];
    dff = dff[["name","previous_workers","workers","Increase in Staff"]];
    dff = dff.sort_values(by=["Increase in Staff"], ascending=False);
    dff = dff[dff["Increase in Staff"] > 0];
    if chart_type == "bar":
        fig = px.bar(dff, x="name", y="Increase in Staff", title=f"Largest Increase in Staff/New Hires in 2019", text_auto=".2f");
        fig.show();
    else:
        print("Top 10 Companies with the greatest increase in staff/new hires");
        print(dff[["name","Increase in Staff"]].head(10));

increase_in_staff(df, None)

Top 10 Companies with the greatest increase in staff/new hires
                         name  Increase in Staff
1621         Allied Universal           118531.0
3752                  Alorica            43000.0
3403              Infiniti HR            37145.0
1336               CircusTrix            13739.0
3172            ScribeAmerica            11641.0
1118                   TaskUs             9198.0
4110  Freddy's Frozen Custard             6494.0
427      Pathway Vet Alliance             4174.0
2927         Four Foods Group             3500.0
4985  Pacific Dental Services             3356.0


***6. Did any companies increase revenue while reducing staff?***

In [10]:
def increase_revenue_reduce_staff(data, chart_type, limit):
    dff = data.copy();
    dff["revenue_cleaned"] = dff["revenue"].apply(lambda x: float(x.split(" ")[0]));
    dff["Increase in Staff"] = dff["workers"] - dff["previous_workers"];
    dff = dff[["name","previous_workers","workers","Increase in Staff","revenue_cleaned"]];
    dff = dff[dff["Increase in Staff"] < 0];
    dff = dff.sort_values(by=["Increase in Staff","revenue_cleaned"], ascending=[True,False]);
    dff = dff.head(limit);
    if chart_type == "bar":
        fig = go.Figure();
        fig.add_trace(go.Bar(
            x=dff["name"],
            y=dff["Increase in Staff"],
            name='Increase in Staff',
            text=dff["Increase in Staff"]
        ));
        fig.add_trace(go.Bar(
            x=dff["name"],
            y=dff["revenue_cleaned"],
            name='Revenue (in Millions)',
            text=dff["revenue_cleaned"]
        ));
        fig.update_layout(barmode='group', title=f"Top {limit} Companies in 2019 with Highest Revenue Growth and Greatest Reduction in Staff");
        fig.show();
    else:
        print(dff[["name","Increase in Staff","revenue_cleaned"]]);

increase_revenue_reduce_staff(df, "bar", 10)