In [19]:
# Dependencies and Setup
import pandas as pd
from pathlib import Path
import numpy as np

import warnings 
warnings.filterwarnings('ignore')

# File to Load 
layoffs_file = Path("Resources/layoffs.csv")

# Read Layoffs file and store into Pandas DataFrames
layoffs_data = pd.read_csv(layoffs_file)
layoffs_data_df = layoffs_data

# Display the data table for preview
display(layoffs_data_df)


Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,stage,country,funds_raised
0,FlightStats,Portland,Travel,73.0,,2024-05-30,Acquired,United States,3.0
1,Walnut,New York City,Sales,15.0,0.20,2024-05-29,Series B,United States,56.0
2,Fisker,Los Angeles,Transportation,,,2024-05-29,Post-IPO,United States,1700.0
3,Lucid Motors,SF Bay Area,Transportation,400.0,0.06,2024-05-24,Post-IPO,United States,8300.0
4,Foursquare,New York City,Marketing,105.0,0.25,2024-05-23,Series G,United States,390.0
...,...,...,...,...,...,...,...,...,...
3620,Service,Los Angeles,Travel,,1.00,2020-03-16,Seed,United States,5.1
3621,HopSkipDrive,Los Angeles,Transportation,8.0,0.10,2020-03-13,Unknown,United States,45.0
3622,Panda Squad,SF Bay Area,Consumer,6.0,0.75,2020-03-13,Seed,United States,1.0
3623,Tamara Mellon,Los Angeles,Retail,20.0,0.40,2020-03-12,Series C,United States,90.0


In [20]:
layoffs_data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3625 entries, 0 to 3624
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   company              3625 non-null   object 
 1   location             3624 non-null   object 
 2   industry             3624 non-null   object 
 3   total_laid_off       2377 non-null   float64
 4   percentage_laid_off  2331 non-null   float64
 5   date                 3625 non-null   object 
 6   stage                3618 non-null   object 
 7   country              3625 non-null   object 
 8   funds_raised         3236 non-null   float64
dtypes: float64(3), object(6)
memory usage: 255.0+ KB


In [21]:
# ------------- Clean data --------------

# Fixing missing values
layoffs_data_df["total_laid_off"] = layoffs_data_df["total_laid_off"].fillna(0)
layoffs_data_df["percentage_laid_off"] = layoffs_data_df["percentage_laid_off"].fillna(0)
layoffs_data_df["funds_raised"] = layoffs_data_df["funds_raised"].fillna(0)

# Converting date to mm/yyyy
layoffs_data_df['date'] = pd.to_datetime(layoffs_data_df['date']).dt.strftime('%m-%Y')

# Selecting only the data where the total number of layoffs is different from zero 0
layoffs_data_df = layoffs_data_df[layoffs_data_df["total_laid_off"] != 0]
 

display(layoffs_data_df)


Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,stage,country,funds_raised
0,FlightStats,Portland,Travel,73.0,0.00,05-2024,Acquired,United States,3.0
1,Walnut,New York City,Sales,15.0,0.20,05-2024,Series B,United States,56.0
3,Lucid Motors,SF Bay Area,Transportation,400.0,0.06,05-2024,Post-IPO,United States,8300.0
4,Foursquare,New York City,Marketing,105.0,0.25,05-2024,Series G,United States,390.0
5,Guild,Denver,Education,300.0,0.25,05-2024,Series F,United States,643.0
...,...,...,...,...,...,...,...,...,...
3619,Help.com,Austin,Support,16.0,1.00,03-2020,Seed,United States,6.0
3621,HopSkipDrive,Los Angeles,Transportation,8.0,0.10,03-2020,Unknown,United States,45.0
3622,Panda Squad,SF Bay Area,Consumer,6.0,0.75,03-2020,Seed,United States,1.0
3623,Tamara Mellon,Los Angeles,Retail,20.0,0.40,03-2020,Series C,United States,90.0


In [23]:
#  Select only the United States
usa_layoffs_df = layoffs_data_df[(layoffs_data_df["country"] == "United States")]

display(usa_layoffs_df)

Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,stage,country,funds_raised
0,FlightStats,Portland,Travel,73.0,0.00,05-2024,Acquired,United States,3.0
1,Walnut,New York City,Sales,15.0,0.20,05-2024,Series B,United States,56.0
3,Lucid Motors,SF Bay Area,Transportation,400.0,0.06,05-2024,Post-IPO,United States,8300.0
4,Foursquare,New York City,Marketing,105.0,0.25,05-2024,Series G,United States,390.0
5,Guild,Denver,Education,300.0,0.25,05-2024,Series F,United States,643.0
...,...,...,...,...,...,...,...,...,...
3619,Help.com,Austin,Support,16.0,1.00,03-2020,Seed,United States,6.0
3621,HopSkipDrive,Los Angeles,Transportation,8.0,0.10,03-2020,Unknown,United States,45.0
3622,Panda Squad,SF Bay Area,Consumer,6.0,0.75,03-2020,Seed,United States,1.0
3623,Tamara Mellon,Los Angeles,Retail,20.0,0.40,03-2020,Series C,United States,90.0


In [24]:
# Total of layoffs per industry in USA

usa_lay_industry = usa_layoffs_df.groupby('industry')["total_laid_off"].sum()

# Converting data to int
usa_lay_industry = usa_lay_industry.astype(int)

# # Display the data table
display(usa_lay_industry)


industry
AI                  242
Aerospace           737
Construction       3330
Consumer          57220
Crypto             6628
Data               8177
Education          2692
Energy             1647
Finance           24641
Fitness            7637
Food              14461
HR                 6913
Hardware          36210
Healthcare        22237
Infrastructure    14711
Legal               906
Logistics          3690
Manufacturing      1512
Marketing          6734
Media              4627
Other             32819
Product            1792
Real Estate       16141
Recruiting         4575
Retail            49887
Sales             14538
Security           8402
Support            5443
Transportation    42675
Travel            11691
Name: total_laid_off, dtype: int64

In [26]:
# Sumary of layoffs per industry in USA

usa_lay_industry = usa_layoffs_df.groupby('industry')

usa_lay_ind_sum = usa_lay_industry["total_laid_off"].sum()
usa_lay_ind_avg = usa_lay_industry["total_laid_off"].mean()
usa_lay_ind_median = usa_lay_industry["total_laid_off"].median()


usa_lay_ind_summary = pd.DataFrame({'Total Layoffs': usa_lay_ind_sum,
                        'Average Layoffs': usa_lay_ind_avg,
                        'Median Layoffs': usa_lay_ind_median,})


usa_lay_ind_summary


Unnamed: 0_level_0,Total Layoffs,Average Layoffs,Median Layoffs
industry,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AI,242.0,60.5,40.0
Aerospace,737.0,81.888889,75.0
Construction,3330.0,555.0,210.0
Consumer,57220.0,615.268817,95.0
Crypto,6628.0,157.809524,60.0
Data,8177.0,138.59322,80.0
Education,2692.0,74.777778,46.5
Energy,1647.0,183.0,154.0
Finance,24641.0,147.550898,75.0
Fitness,7637.0,424.277778,208.0


In [34]:
# Sumary of layoff per month/ year in the USA
usa_lay_period = usa_layoffs_df.groupby(['date', 'industry'])

usa_lay_date_sum = usa_lay_period["total_laid_off"].sum()
usa_lay_date_avg = usa_lay_period["total_laid_off"].mean()
usa_lay_date_median = usa_lay_period["total_laid_off"].median()


usa_lay_date_sumary = pd.DataFrame({'Total Layoffs': usa_lay_date_sum,
                                    'Average Layoffs': usa_lay_date_avg,
                                    'Median Layoffs': usa_lay_date_median,})

usa_lay_date_sumary


Unnamed: 0_level_0,Unnamed: 1_level_0,Total Layoffs,Average Layoffs,Median Layoffs
date,industry,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
01-2021,Food,2057.0,1028.500000,1028.5
01-2021,Other,315.0,315.000000,315.0
01-2021,Retail,800.0,800.000000,800.0
01-2022,Consumer,80.0,80.000000,80.0
01-2022,Finance,330.0,330.000000,330.0
...,...,...,...,...
12-2023,Media,8.0,8.000000,8.0
12-2023,Other,109.0,109.000000,109.0
12-2023,Retail,1084.0,361.333333,225.0
12-2023,Transportation,1070.0,356.666667,150.0
