In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/layoffs-data-2022/layoffs_data.csv


In [2]:
# Loading the dataset
df = pd.read_csv('/kaggle/input/layoffs-data-2022/layoffs_data.csv')

# Displaying the first few rows
df.head()


Unnamed: 0,Company,Location_HQ,Industry,Percentage,Date,Source,Funds_Raised,Stage,Date_Added,Country,Laid_Off_Count,List_of_Employees_Laid_Off
0,IRL,SF Bay Area,Consumer,1.0,2023-06-23,https://www.theinformation.com/articles/social...,197.0,Series C,2023-06-25 23:04:16,United States,,Unknown
1,Retool,SF Bay Area,Other,0.09,2023-06-22,Internal memo,141.0,Series C,2023-06-22 14:23:40,United States,,Unknown
2,Anaplan,SF Bay Area,Other,,2023-06-21,https://nypost.com/2023/06/23/software-giant-a...,300.0,Acquired,2023-06-24 03:48:00,United States,300.0,Unknown
3,Uber,SF Bay Area,Transportation,,2023-06-21,https://www.nasdaq.com/articles/uber-to-lay-of...,25200.0,Post-IPO,2023-06-22 00:19:03,United States,200.0,Unknown
4,Tackle.io,Boise,Infrastructure,,2023-06-21,https://tackle.io/blog/tackle-company-update/,148.0,Series C,2023-06-23 15:38:59,United States,75.0,Unknown


**1. Bar chart: Top Industries with the Most Layoffs**

In [3]:
import plotly.express as px

# Grouping by industry and summing the layoffs
industry_layoffs = df.groupby('Industry')['Laid_Off_Count'].sum().sort_values(ascending=False).head(10)

# Bar chart for top industries with the most layoffs
fig1 = px.bar(industry_layoffs, 
              title='Top Industries with the Most Layoffs', 
              labels={'Industry': 'Industry', 'Laid_Off_Count': 'Number of Layoffs'},
              color_discrete_sequence=px.colors.sequential.RdBu)

fig1.show()


**2. Time Series Plot: Trend of Layoffs Over Time**

In [4]:
# Convert the 'Date' column to a datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Group by date and sum the layoffs
date_layoffs = df.groupby('Date')['Laid_Off_Count'].sum()

# Time series plot for layoffs over time
fig2 = px.line(date_layoffs.reset_index(),
               x='Date',
               y='Laid_Off_Count',
               title='Trend of Layoffs Over Time',
               labels={'Date': 'Date', 'Laid_Off_Count': 'Number of Layoffs'},
               color_discrete_sequence=px.colors.sequential.RdBu)

fig2.show()


**3. Pie Chart: Distribution of Company Stages Among the Layoffs**

In [5]:
# Group by stage and sum the layoffs
stage_layoffs = df.groupby('Stage')['Laid_Off_Count'].sum()

# Pie chart for distribution of company stages among the layoffs
fig3 = px.pie(stage_layoffs.reset_index(),
              names='Stage',
              values='Laid_Off_Count',
              title='Distribution of Company Stages Among the Layoffs',
              color_discrete_sequence=px.colors.sequential.RdBu)

fig3.show()


**4. Bar Chart: Companies with the Highest Number of Layoffs**

In [6]:
# Group by company and sum the layoffs
company_layoffs = df.groupby('Company')['Laid_Off_Count'].sum().sort_values(ascending=False).head(10)

# Bar chart for companies with the highest number of layoffs
fig4 = px.bar(company_layoffs.reset_index(),
              x='Company',
              y='Laid_Off_Count',
              title='Companies with the Highest Number of Layoffs',
              labels={'Company': 'Company', 'Laid_Off_Count': 'Number of Layoffs'},
              color='Laid_Off_Count',
              color_continuous_scale=px.colors.sequential.RdBu)

fig4.show()


**5. Map Visualization: Highlighting Locations with the Highest Layoffs**

In [7]:
# Group by country and sum the layoffs
country_layoffs = df.groupby('Country')['Laid_Off_Count'].sum()

# Map visualization for countries with layoffs
fig5 = px.choropleth(country_layoffs.reset_index(), 
                     locations='Country',
                     locationmode='country names',
                     color='Laid_Off_Count',
                     hover_name='Country',
                     color_continuous_scale=px.colors.sequential.RdBu,
                     title='Number of Layoffs by Country')

fig5.show()


**6. Histogram: Distribution of Funds Raised by Companies**

In [8]:
# Histogram for distribution of funds raised by companies
fig6 = px.histogram(df,
                    x='Funds_Raised',
                    title='Distribution of Funds Raised by Companies',
                    color_discrete_sequence=px.colors.sequential.RdBu,
                    labels={'Funds_Raised': 'Amount of Funds Raised'},
                    nbins=30)

fig6.show()


**7. Bar Chart: Top Locations with Most Layoffs**

In [9]:
# Group by location and sum the layoffs
location_layoffs = df.groupby('Location_HQ')['Laid_Off_Count'].sum().sort_values(ascending=False).head(10)

# Bar chart for top locations with the most layoffs
fig7 = px.bar(location_layoffs.reset_index(), 
              x='Location_HQ', 
              y='Laid_Off_Count', 
              title='Top Locations with Most Layoffs', 
              labels={'Location_HQ': 'Location', 'Laid_Off_Count': 'Number of Layoffs'},
              color='Laid_Off_Count',
              color_continuous_scale=px.colors.sequential.RdBu)

fig7.show()


**8. Scatter Plot: Funds Raised vs. Number of Layoffs**

In [10]:
# Scatter plot for funds raised vs number of layoffs
fig8 = px.scatter(df, 
                  x='Funds_Raised', 
                  y='Laid_Off_Count', 
                  title='Funds Raised vs. Number of Layoffs', 
                  labels={'Funds_Raised': 'Funds Raised ($)', 'Laid_Off_Count': 'Number of Layoffs'},
                  color='Laid_Off_Count',
                  color_continuous_scale=px.colors.sequential.RdBu,
                  hover_name='Company')

fig8.show()


**9. Bar Chart: Layoffs Distribution by Stage**

In [11]:
# Group by stage and sum the layoffs
stage_layoffs_count = df.groupby('Stage')['Laid_Off_Count'].sum().sort_values(ascending=False)

# Bar chart for layoffs distribution by stage
fig9 = px.bar(stage_layoffs_count.reset_index(), 
              x='Stage', 
              y='Laid_Off_Count', 
              title='Layoffs Distribution by Stage', 
              labels={'Stage': 'Company Stage', 'Laid_Off_Count': 'Number of Layoffs'},
              color='Laid_Off_Count',
              color_continuous_scale=px.colors.sequential.RdBu)

fig9.show()


**10. Box Plot: Distribution of Layoffs Across Industries**

In [12]:
# Box plot for distribution of layoffs across industries
fig10 = px.box(df, 
               x='Industry', 
               y='Laid_Off_Count',
               title='Distribution of Layoffs Across Industries',
               labels={'Industry': 'Industry', 'Laid_Off_Count': 'Number of Layoffs'},
               color_discrete_sequence=px.colors.sequential.RdBu)

fig10.show()


**11. Country-wise Bar Chart: Total Layoffs**

In [13]:
# Group by country and sum the layoffs
country_total_layoffs = df.groupby('Country')['Laid_Off_Count'].sum().sort_values(ascending=False)

# Bar chart for total layoffs by country
fig11 = px.bar(country_total_layoffs.reset_index(), 
               x='Country', 
               y='Laid_Off_Count', 
               title='Total Layoffs by Country', 
               labels={'Country': 'Country', 'Laid_Off_Count': 'Total Layoffs'},
               color='Laid_Off_Count',
               color_continuous_scale=px.colors.sequential.RdBu)

fig11.show()


**12. Country-wise Box Plot: Layoffs Distribution**

In [14]:
# Box plot for distribution of layoffs across countries
fig12 = px.box(df, 
               x='Country', 
               y='Laid_Off_Count',
               title='Distribution of Layoffs Across Countries',
               labels={'Country': 'Country', 'Laid_Off_Count': 'Number of Layoffs'},
               color_discrete_sequence=px.colors.sequential.RdBu)

fig12.show()


**13. Country-wise Bar Chart: Average Funds Raised**

In [15]:
# Group by country and calculate the average funds raised
country_avg_funds = df.groupby('Country')['Funds_Raised'].mean().sort_values(ascending=False)

# Bar chart for average funds raised by country
fig14 = px.bar(country_avg_funds.reset_index(), 
               x='Country', 
               y='Funds_Raised', 
               title='Average Funds Raised by Country', 
               labels={'Country': 'Country', 'Funds_Raised': 'Average Funds Raised ($)'},
               color='Funds_Raised',
               color_continuous_scale=px.colors.sequential.RdBu)

fig14.show()


In [16]:
# Mapping countries to their respective continents
continent_mapping = {
    'North America': ['United States', 'Canada', 'Mexico'],
    'South America': ['Brazil', 'Argentina', 'Colombia', 'Chile', 'Peru', 'Venezuela', 'Ecuador', 'Bolivia', 'Paraguay', 'Uruguay', 'Guyana', 'Suriname'],
    'Asia': ['India', 'China', 'Japan', 'South Korea', 'Indonesia', 'Saudi Arabia', 'Turkey', 'Iran', 'Israel', 'Thailand', 'Philippines', 'Malaysia', 'Vietnam', 'Singapore'],
    'Europe': ['United Kingdom', 'Germany', 'France', 'Italy', 'Spain', 'Netherlands', 'Belgium', 'Poland', 'Sweden', 'Austria', 'Switzerland', 'Norway', 'Denmark', 'Greece', 'Ireland', 'Portugal', 'Finland', 'Ukraine', 'Czech Republic', 'Romania', 'Hungary', 'Bulgaria', 'Croatia', 'Luxembourg', 'Slovakia', 'Belarus', 'Estonia', 'Latvia', 'Lithuania', 'Slovenia', 'Cyprus', 'Malta', 'Iceland'],
    'Africa': ['South Africa', 'Nigeria', 'Egypt', 'Algeria', 'Morocco', 'Kenya', 'Uganda', 'Angola', 'Zimbabwe', 'Tanzania', 'Ghana', 'Ethiopia', 'Madagascar', 'Cameroon', 'Côte d\'Ivoire', 'Senegal', 'Mali', 'Zambia'],
    'Oceania': ['Australia', 'New Zealand'],
    'Antarctica': ['Antarctica']
}

# Reverse the mapping to get country to continent mapping
country_to_continent = {}
for continent, countries in continent_mapping.items():
    for country in countries:
        country_to_continent[country] = continent

# Add a new column 'Continent' to the dataframe
df['Continent'] = df['Country'].map(country_to_continent)

# Aggregate the layoff data at the continent level
continent_layoffs = df.groupby('Continent')['Laid_Off_Count'].sum()
country_layoffs = df.groupby(['Continent', 'Country'])['Laid_Off_Count'].sum()

continent_layoffs, country_layoffs.head()


(Continent
 Africa             2537.0
 Asia              64484.0
 Europe            61280.0
 North America    325658.0
 Oceania            4111.0
 South America     11781.0
 Name: Laid_Off_Count, dtype: float64,
 Continent  Country
 Africa     Egypt         0.0
            Ghana         0.0
            Kenya       355.0
            Nigeria    1882.0
            Senegal     300.0
 Name: Laid_Off_Count, dtype: float64)

In [17]:
import plotly.graph_objects as go

# Group by stage and sum the layoffs
stage_layoffs = df.groupby('Stage')['Laid_Off_Count'].sum()

# Custom color palette
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']

# Enhanced pie chart
fig15 = go.Figure(data=[go.Pie(labels=stage_layoffs.index,
                               values=stage_layoffs.values,
                               textinfo='percent+label',
                               insidetextorientation='radial',
                               marker=dict(colors=colors, line=dict(color='#000000', width=2)))],
                  layout=go.Layout(title='Distribution of Company Stages Among the Layoffs'))

# Display the pie chart
fig15.show()
