In [None]:
%pip install numpy
%pip install pandas
%pip install seaborn
%pip install plotly-express
%pip install jinja2
%pip install matplotlib
%pip install geopy
%pip install nbformat

: 

<h2 style="background-color:#E1F8FF; margin-bottom:0px;padding:20px; color:#000; border:5px solid #4AD3FF"> Table of Contents</h2>

1. [Import necessary Libaries](#imports)
2. [Read the Dataset](#dataset)<br>
    2.1. [Dataset Description](#dataset_des)
3. [Data Exploration](#data_expo)
4. [Feature Enginnering](#feat_eng)
5. [Data Cleaning](#data_clean)
6. [Data Analysis](#data_ana)<br>
    6.1. [Visualize the number Layoffs in these Years](#v1)<br>
    6.2. [Get the insights of the Layoffs groupby to their Continents](#insi1)<br>
    6.3. [Lets, visualize these countries](#v2)<br>
    6.4. [Get the insights of the company with 100% layoff rate](#insi2)<br>
    <!-- 6.5. [Visualize the locations of tthe companies with 100% layoff rate](#geoloc)<br> -->
    6.6. [Divide the dataframe into 4 Groups](#divide_4grp)<br>
    6.6.1 [Visualize the different types of Industries in the Group 4](#v3)<br>
    6.6.2 [Lets, see the Top 10 companies with the most layoffs in the group 4 (154-11000 layoff count)](#insi3)<br>
    6.6.2a [Visualize the Top10 Layoff Count and Location](#v4)<br>
    6.6.2b [Visualize the Top16 Companies Location](#v5)<br>
    6.6.2c [Visualize the Top10 Layoff Companies Funds Raised](#v6)<br>
7. [Conclusion](#conclusion)

<a id="imports"></a>
<h2 style="background-color:#E1F8FF; margin-bottom:0px;padding:20px; color:#000; border-bottom:5px solid #4AD3FF">1. Import necessary Libaries</h2>

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import plotly_express as px

import seaborn as sns

palette = sns.color_palette("rainbow", 8)

: 

<a id="dataset"></a>
<h2 style="background-color:#E1F8FF; margin-bottom:0px;padding:20px; color:#000; border-bottom:5px solid #4AD3FF">2. Read the Dataset</h2>

In [None]:
df = pd.read_csv("../dataset/layoffs_data.csv")
df.head().style.set_properties(**{'background-color': '#E1F8FF','color': '#000','border': '1px solid #4AD3FF'})

: 

<a id="dataset_des"></a>
<h3 style="background-color:#E1F8FF; margin-bottom:0px;padding:10px; color:#000">2.1 Dataset Description</h3>

1. Company - Name of the Company
2. Location - Location of the Company
3. Industry - Type of Industry the company is
4. Laid_Off_Count - Total count
5. Percentage - Percentage of layoff
6. Date - Date of layoff
7. Source - Data gathered from source
8. Funds_Raised - Total funds raised
9. Stage - Stage of the company
10. Date_Added - data added in database
11. Country - Country of company located
12. List_of_Employees_Laid_Off - link to g_docs of employee list

<a id="data_expo"></a>
<h2 style="background-color:#E1F8FF; margin-bottom:0px;padding:20px; color:#000; border-bottom:5px solid #4AD3FF">3. Data Exploration</h2>

In [None]:
df.info()

: 

In [None]:
df.isna().sum()

: 

In [None]:
df.shape

: 

In [None]:
df.describe()

: 

<a id="feat_eng"></a>
<h2 style="background-color:#E1F8FF; margin-bottom:0px;padding:20px; color:#000; border-bottom:5px solid #4AD3FF">4. Feature Engineering</h2>

Now, lets create a new column named 'year_of_laidoff' which will tell us on which layoff happend

In [None]:
def get_date():
    year_list = []
    
    col = df['Date_Added']
    for i in range(len(col)):
        date = col[i]
        year = col[i].split("-", 1)[0]
        year_list.append(year)
        # break
    return year_list

layoff_year_list = get_date()

: 

In [None]:
df['year_of_laidOff'] = layoff_year_list

: 

<a id="data_clean"></a>
<h2 style="background-color:#E1F8FF; margin-bottom:0px;padding:20px; color:#000; border-bottom:5px solid #4AD3FF">5. Data Cleaning</h2>

<br>
Lets, with some cleanup...<br>
We don't need the Source(data gathered from source), Date_Added(data added in database), and List_of_Employees_Laid_Off - link to g_docs of employee list

In [None]:
df.drop(['Source', 'Date_Added', 'List_of_Employees_Laid_Off'], axis=1, inplace=True)

: 

<br>Drop the rows which Layoff count NAN, we don't need those rows

In [None]:
df = df[df['Laid_Off_Count'].notna()]

: 

In [None]:
df.head()

: 

<a id="data_ana"></a>
<h2 style="background-color:#E1F8FF; margin-bottom:0px;padding:20px; color:#000; border-bottom:5px solid #4AD3FF">6. Data Analytics</h2>

<a id="v1"></a>
<h3 style="background-color:#E1F8FF; margin-bottom:0px;padding:10px; color:#000">6.1. Visualize the number Layoffs in these Years</h3>

In [None]:
plt.figure(figsize=(25, 8))
laidOff_countYear = df.groupby('year_of_laidOff', as_index=False).sum()

plt.subplot(1, 2, 1)

plt.plot(laidOff_countYear['year_of_laidOff'], laidOff_countYear['Laid_Off_Count'], marker="o", linestyle='--', color='#10a2f0', 
         markerfacecolor='#dfe2ff', markersize=8)
plt.xlabel('Year')
plt.ylabel('Layoff count')
plt.title("Layoff count vs Years")

plt.subplot(1, 2, 2)

explode = [0.1] * len(laidOff_countYear)  # Explode all slices
palette = sns.color_palette("rainbow", len(laidOff_countYear))

plt.pie(x=laidOff_countYear['Laid_Off_Count'], labels=laidOff_countYear['year_of_laidOff'], autopct='%.f%%',
        explode=explode, colors=palette)
plt.title("Mass layoffs in this years")

plt.show()

: 

<a id="insi1"></a>
<h3 style="background-color:#E1F8FF; margin-bottom:0px;padding:10px; color:#000">6.2. Get the insights of the layoffs groupby to their Continents</h3>

In [None]:
laidOff_countByCountry = df.groupby('Country', as_index=False).sum()

: 

In [None]:
asia = ['China', 'Hong Kong', 'India', 'Indonesia', 'Malaysia', 'Myanmar', 'Singapore', 'Thailand', 'United Arab Emirates', 'Israel']
europe = ['Austria', 'Bulgaria','Denmark', 'Finland', 'France', 'Germany', 'Ireland', 'Lithuania', 'Luxembourg', 'Norway', 
          'Poland', 'Portugal', 'Romania', 'Russia', 'Sweden', 'Switzerland', 'Netherlands', 'United Kingdom']

n_america = ['Canada', 'Mexico', 'United States']
s_america = ['Argentina', 'Brazil', 'Chile', 'Colombia']

africa = ['Estonia', 'Kenya', 'Nigeria', 'Senegal', 'Seychelles']
oceania = ['Australia', 'New Zealand']

: 

In [None]:
asia_insight = laidOff_countByCountry.loc[laidOff_countByCountry['Country'].isin(asia)]
europe_insight = laidOff_countByCountry.loc[laidOff_countByCountry['Country'].isin(europe)]

nAmerica_insight = laidOff_countByCountry.loc[laidOff_countByCountry['Country'].isin(n_america)]
sAmerica_insight = laidOff_countByCountry.loc[laidOff_countByCountry['Country'].isin(s_america)]

africa_insight = laidOff_countByCountry.loc[laidOff_countByCountry['Country'].isin(africa)]
oceania_insight = laidOff_countByCountry.loc[laidOff_countByCountry['Country'].isin(oceania)]

: 

In [None]:
plt.figure(figsize=(20, 20))

plt.subplot(3, 2, 1)
sns.barplot(data=asia_insight, y=asia_insight['Laid_Off_Count'], x=asia_insight['Country'], palette=palette)
plt.xticks(rotation = 60)
plt.xlabel('Country'), plt.ylabel('Layoff count')
plt.title("Layoff count in Asia", fontsize=14), plt.xticks(fontsize=14), plt.yticks(fontsize=14)

plt.subplot(3, 2, 2)
sns.barplot(data=europe_insight, y=europe_insight['Laid_Off_Count'], x=europe_insight['Country'], palette=palette)
plt.xticks(rotation = 60)
plt.xlabel('Country'), plt.ylabel('Layoff count')
plt.title("Layoff count in Europe", fontsize=14), plt.xticks(fontsize=14), plt.yticks(fontsize=14)

plt.subplot(3, 2, 3)
sns.barplot(data=nAmerica_insight, y=nAmerica_insight['Laid_Off_Count'], x=nAmerica_insight['Country'], palette=palette)
plt.xticks(rotation = 60)
plt.xlabel('Country'), plt.ylabel('Layoff count')
plt.title("Layoff count in North America", fontsize=14), plt.xticks(fontsize=14), plt.yticks(fontsize=14)

plt.subplot(3, 2, 4)
sns.barplot(data=sAmerica_insight, y=sAmerica_insight['Laid_Off_Count'], x=sAmerica_insight['Country'], palette=palette)
plt.xticks(rotation = 60)
plt.xlabel('Country'), plt.ylabel('Layoff count')
plt.title("Layoff count in South America", fontsize=14), plt.xticks(fontsize=14), plt.yticks(fontsize=14)

plt.subplot(3, 2, 5)
sns.barplot(data=africa_insight, y=africa_insight['Laid_Off_Count'], x=africa_insight['Country'], palette=palette)
plt.xticks(rotation = 60)
plt.xlabel('Country'), plt.ylabel('Layoff count')
plt.title("Layoff count in Africa", fontsize=14), plt.xticks(fontsize=14), plt.yticks(fontsize=14)

plt.subplot(3, 2, 6)
sns.barplot(data=oceania_insight, y=oceania_insight['Laid_Off_Count'], x=oceania_insight['Country'], palette=palette)
plt.xticks(rotation = 60)
plt.xlabel('Country'), plt.ylabel('Layoff count')
plt.title("Layoff count in Oceania", fontsize=14), plt.xticks(fontsize=14), plt.yticks(fontsize=14)

plt.tight_layout()
plt.show()

: 

so, from the above visualization we can see that ***India, Germany, Netherlands, Sweden, United Kingdom, United States, Nigeria, and Australia*** are the countries to get most of the layoffs


<a id="v2"></a>
<h2 style="background-color:#E1F8FF; margin-bottom:0px;padding:20px; color:#000; border-bottom:5px solid #4AD3FF">6.3. Lets, visualize these countries</h2>

In [None]:
mass_layoff_countries_list = ['India', 'Germany', 'Netherlands', 'Sweden', 'United Kingdom', 'United States', 'Nigeria', 'Australia']

mass_layoff_countries = laidOff_countByCountry.loc[laidOff_countByCountry['Country'].isin(mass_layoff_countries_list)]

plt.figure(figsize=(15, 20))
plt.pie(data=mass_layoff_countries, x=mass_layoff_countries['Laid_Off_Count'], labels=mass_layoff_countries['Country'],
        autopct='%.f%%', colors=palette, rotatelabels=True, wedgeprops = {'linewidth': 3})
plt.legend(mass_layoff_countries['Laid_Off_Count'], loc='center left', bbox_to_anchor=(-0.35, .5), fontsize=15)
plt.xticks(fontsize=50), plt.yticks(fontsize=50)
plt.tight_layout()
plt.show()

: 

<a id="insi2"></a>
<h2 style="background-color:#E1F8FF; margin-bottom:0px;padding:20px; color:#000; border-bottom:5px solid #4AD3FF">6.4. Get the insights of the company with <strong>100% layoff rate</strong> </h2>

In [None]:
insight_1 = df.loc[df['Percentage'] == 1.00].dropna()
insight_1.head()

: 

In [None]:
plt.figure(figsize=(25,5))

plt.plot(insight_1['Company'], insight_1['Laid_Off_Count'], marker="o", linestyle='--', color='#10a2f0', 
         markerfacecolor='#dfe2ff', markersize=8)
plt.xticks(rotation = 90)

plt.show()

: 

<a id="divide_4grp"></a>
<h2 style="background-color:#E1F8FF; margin-bottom:0px; padding:20px; color:#000; border-bottom:5px solid #4AD3FF">6.6. Divide the dataframe into <strong>4 Groups</strong></h2>
So, we can divide the data into <strong>4 groups</strong> according to their <strong>Layoff count</strong> from 3-35, 35-73, 73-154, and 154-11000

In [None]:
grp1_insight = df.loc[(df['Laid_Off_Count'] <= 35)]
grp2_insight = df.loc[(df['Laid_Off_Count'] > 35) & (df['Laid_Off_Count'] <= 73)]
grp3_insight = df.loc[(df['Laid_Off_Count'] > 75) & (df['Laid_Off_Count'] <= 153)]
grp4_insight = df.loc[(df['Laid_Off_Count'] > 153)]

: 

<a id="v3"></a>
<h4 style="background-color:#efefef; margin-bottom:0px;padding:10px; color:#000">6.6.1 Visualize the different types of Industries in the Group 4</h4>

In [None]:
plt.figure(figsize=(25,10))

sns.countplot(y='Industry', data=grp4_insight)
plt.xlabel('Industry'), plt.ylabel('Count')
plt.xticks(fontsize=12), plt.yticks(rotation=40, fontsize=12)
plt.title("\nDifferent types of Industries in the Group 4\n", fontsize=18)

plt.show()

: 

<a id="insi3"></a>
<h4 style="background-color:#efefef; margin-bottom:0px;padding:10px; color:#000">6.6.2 Lets, see the <strong>Top 10</strong> companies with the most layoffs in the group 4 (154-11000 layoff count)</h4>

In [None]:
top10_comp = grp4_insight.sort_values('Laid_Off_Count', ascending=False)
top10_comp = top10_comp.head(10)
top10_comp

: 

<a id="v4"></a>
<h5 style="background-color:#efefef; margin-bottom:0px;padding:10px; color:#000">6.6.2a Visualize the Top10 <strong>Layoff Count and Location</strong></h5>

In [None]:
plt.figure(figsize=(25,7))

plt.subplot(1, 2, 1)
sns.barplot(data=top10_comp, x=top10_comp['Company'], y=top10_comp['Laid_Off_Count'], palette=palette)
plt.xlabel('Company'), plt.ylabel('Count')
plt.xticks(rotation=40, fontsize=12), plt.yticks(rotation=40, fontsize=12)
plt.title("\nLayoff in Top10\n", fontsize=18)

plt.subplot(1, 2, 2)
top10_comp_loca = top10_comp['Location_HQ'].value_counts()
plt.pie(data=top10_comp, x=top10_comp_loca.values, labels=top10_comp_loca.index, autopct='%.f%%',  explode = [0.08, 0, 0, 0], colors=palette)
plt.title("\nLocations of the Top10 companies\n", fontsize=18)

plt.show()

: 

<a id="v5"></a>
<h5 style="background-color:#efefef; margin-bottom:0px;padding:10px; color:#000">6.6.2b Visualize the Top10 Companies <strong>Location</strong></h5>

In [None]:
for i in range(len(top10_comp['Location_HQ'])):
    def find_lat(i):
        return geolocator.geocode(i).latitude
    
    def find_long(i):
        return geolocator.geocode(i).longitude

: 

In [None]:
top10_comp['latitude'] = top10_comp['Location_HQ'].apply(find_lat)
top10_comp['longitude'] = top10_comp['Location_HQ'].apply(find_long)

: 

In [None]:
top10_comp = top10_comp.fillna(0)

: 

In [None]:
top10_comp.head()

: 

In [None]:
top10_comp_fig = px.scatter_mapbox(top10_comp,lat='latitude',lon='longitude', hover_name='Location_HQ', hover_data=['Laid_Off_Count', "Company"], 
    color='Laid_Off_Count',color_continuous_scale=px.colors.sequential.Cividis,
    size="Laid_Off_Count", size_max=50, opacity=0.4,
    center={'lat':10, 'lon':30},
    zoom=1,
    height=400,width=1400,
    mapbox_style="carto-positron")
top10_comp_fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
top10_comp_fig.update_layout(title_text="Population of each cities")
top10_comp_fig.show()

: 

<a id="v6"></a>
<h5 style="background-color:#efefef; margin-bottom:0px;padding:10px; color:#000">6.6.2c Visualize the Top10 Layoff Companies <strong>Funds Raised</strong></h5>

In [None]:
plt.figure(figsize=(25,7))

sns.barplot(data=top10_comp, y=top10_comp['Company'], x=top10_comp['Funds_Raised'].astype(str).astype(float), palette=palette)
plt.ylabel('Company'), plt.xlabel('Fund Raised')
plt.xticks(rotation=40, fontsize=12), plt.yticks(rotation=40, fontsize=12)
plt.title("\nFund Raised in Top10\n", fontsize=18)

plt.show()

: 