# Layoffs Case Study

### Importing necessary libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
    

### Data Import

In [2]:
url = "https://raw.githubusercontent.com/neerajcodes888/Data-Science/main/Case%20Study/Recent%20Layoffs%20Analysis/layoffs.csv"

data = pd.read_csv(url)

In [3]:
data.head()

Unnamed: 0,company,location,total_laid_off,date,percentage_laid_off,industry,source,stage,funds_raised,country,date_added
0,Kaltura,New York City,70.0,8/7/2025,10%,Media,https://www.calcalistech.com/ctechnews/article...,Post-IPO,$166,United States,8/10/2025
1,Peloton,New York City,,8/7/2025,6%,Fitness,https://www.reuters.com/technology/peloton-cut...,Post-IPO,$1900,United States,8/10/2025
2,Yotpo,New York City,200.0,8/5/2025,34%,Marketing,https://www.calcalistech.com/ctechnews/article...,Unknown,$436,United States,8/5/2025
3,Windsurf,SF Bay Area,30.0,8/5/2025,,AI,https://techcrunch.com/2025/08/05/three-weeks-...,Acquired,$243,United States,8/10/2025
4,Wondery,Los Angeles,100.0,8/4/2025,,Media,https://variety.com/2025/biz/news/amazon-reorg...,Acquired,$15,United States,8/5/2025


### Let's handle some missing data

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4127 entries, 0 to 4126
Data columns (total 11 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   company              4127 non-null   object 
 1   location             4126 non-null   object 
 2   total_laid_off       2689 non-null   float64
 3   date                 4127 non-null   object 
 4   percentage_laid_off  2626 non-null   object 
 5   industry             4125 non-null   object 
 6   source               4124 non-null   object 
 7   stage                4120 non-null   object 
 8   funds_raised         3672 non-null   object 
 9   country              4125 non-null   object 
 10  date_added           4127 non-null   object 
dtypes: float64(1), object(10)
memory usage: 354.8+ KB


### We can remove some unwanted column . Here we can remove source col

In [5]:
data = data.drop('source',axis=1)

In [6]:
data

Unnamed: 0,company,location,total_laid_off,date,percentage_laid_off,industry,stage,funds_raised,country,date_added
0,Kaltura,New York City,70.0,8/7/2025,10%,Media,Post-IPO,$166,United States,8/10/2025
1,Peloton,New York City,,8/7/2025,6%,Fitness,Post-IPO,$1900,United States,8/10/2025
2,Yotpo,New York City,200.0,8/5/2025,34%,Marketing,Unknown,$436,United States,8/5/2025
3,Windsurf,SF Bay Area,30.0,8/5/2025,,AI,Acquired,$243,United States,8/10/2025
4,Wondery,Los Angeles,100.0,8/4/2025,,Media,Acquired,$15,United States,8/5/2025
...,...,...,...,...,...,...,...,...,...,...
4122,Service,Los Angeles,,3/16/2020,100%,Travel,Seed,$5,United States,8/31/2020
4123,HopSkipDrive,Los Angeles,8.0,3/13/2020,10%,Transportation,Unknown,$45,United States,4/3/2020
4124,Panda Squad,SF Bay Area,6.0,3/13/2020,75%,Consumer,Seed,$1,United States,4/17/2020
4125,Tamara Mellon,Los Angeles,20.0,3/12/2020,40%,Retail,Series C,$90,United States,3/31/2020


### Now , we can see that source column has been removed.

#### Checking NA or null values

In [7]:
data.isnull()

Unnamed: 0,company,location,total_laid_off,date,percentage_laid_off,industry,stage,funds_raised,country,date_added
0,False,False,False,False,False,False,False,False,False,False
1,False,False,True,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,True,False,False,False,False,False
4,False,False,False,False,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...
4122,False,False,True,False,False,False,False,False,False,False
4123,False,False,False,False,False,False,False,False,False,False
4124,False,False,False,False,False,False,False,False,False,False
4125,False,False,False,False,False,False,False,False,False,False


In [8]:
data.isnull().sum()

company                   0
location                  1
total_laid_off         1438
date                      0
percentage_laid_off    1501
industry                  2
stage                     7
funds_raised            455
country                   2
date_added                0
dtype: int64

In [9]:
data.isna()

Unnamed: 0,company,location,total_laid_off,date,percentage_laid_off,industry,stage,funds_raised,country,date_added
0,False,False,False,False,False,False,False,False,False,False
1,False,False,True,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,True,False,False,False,False,False
4,False,False,False,False,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...
4122,False,False,True,False,False,False,False,False,False,False
4123,False,False,False,False,False,False,False,False,False,False
4124,False,False,False,False,False,False,False,False,False,False
4125,False,False,False,False,False,False,False,False,False,False


In [10]:
data.isna().sum()

company                   0
location                  1
total_laid_off         1438
date                      0
percentage_laid_off    1501
industry                  2
stage                     7
funds_raised            455
country                   2
date_added                0
dtype: int64

### We can analyse , whether we have to drop rows or replace with needful

In [11]:
data.tail(100)

Unnamed: 0,company,location,total_laid_off,date,percentage_laid_off,industry,stage,funds_raised,country,date_added
4027,Maven,Seattle,31.0,3/30/2020,9%,Media,Post-IPO,$77,United States,4/8/2020
4028,Blume Global,SF Bay Area,30.0,3/30/2020,10%,Logistics,Unknown,,United States,4/1/2020
4029,Catalant,Boston,30.0,3/30/2020,,Other,Series E,$110,United States,3/31/2020
4030,Starship Technologies,"Tallinn,Non-U.S.",30.0,3/30/2020,,Transportation,Series A,$82,Estonia,4/8/2020
4031,Loftsmart,New York City,25.0,3/30/2020,75%,Real Estate,Series A,$18,United States,4/6/2020
...,...,...,...,...,...,...,...,...,...,...
4122,Service,Los Angeles,,3/16/2020,100%,Travel,Seed,$5,United States,8/31/2020
4123,HopSkipDrive,Los Angeles,8.0,3/13/2020,10%,Transportation,Unknown,$45,United States,4/3/2020
4124,Panda Squad,SF Bay Area,6.0,3/13/2020,75%,Consumer,Seed,$1,United States,4/17/2020
4125,Tamara Mellon,Los Angeles,20.0,3/12/2020,40%,Retail,Series C,$90,United States,3/31/2020


### We can remove na values.

In [12]:
data.shape

(4127, 10)

## Now Start our study

### 1. How many layoffs occurred in total across all companies?
