# Project Title: Analysis of Layoffs during Covid-19 Pandemic
## 11th March 2020 to 14th Febuary 2023

## Project Description
This notebook is 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [9]:
path = r"layoffs.csv"
df = pd.read_csv(path)
df.sort_values(by='total_laid_off', ascending=False).head(20)

Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,stage,country,funds_raised
459,Google,SF Bay Area,Consumer,12000.0,0.06,2023-01-20,Post-IPO,United States,26.0
932,Meta,SF Bay Area,Consumer,11000.0,0.13,2022-11-09,Post-IPO,United States,26000.0
141,Meta,SF Bay Area,Consumer,10000.0,,2023-03-14,Post-IPO,United States,26000.0
492,Microsoft,Seattle,Other,10000.0,0.05,2023-01-18,Post-IPO,United States,1.0
864,Amazon,Seattle,Retail,10000.0,0.03,2022-11-16,Post-IPO,United States,108.0
125,Amazon,Seattle,Retail,9000.0,,2023-03-20,Post-IPO,United States,108.0
210,Ericsson,Stockholm,Other,8500.0,0.08,2023-02-24,Post-IPO,Sweden,663.0
633,Salesforce,SF Bay Area,Sales,8000.0,0.1,2023-01-04,Post-IPO,United States,65.0
632,Amazon,Seattle,Retail,8000.0,0.02,2023-01-04,Post-IPO,United States,108.0
18,Flink,Berlin,Food,8000.0,0.4,2023-04-24,Series B,Germany,1000.0


In [10]:
df.country.unique()

array(['United States', 'Australia', 'India', 'Singapore', 'Germany',
       'Saudi Arabia', 'Poland', 'France', 'Philippines', 'Israel',
       'United Kingdom', 'Nigeria', 'Chile', 'Canada', 'New Zealand',
       'Spain', 'China', 'Sweden', 'Austria', 'Ukraine', 'Switzerland',
       'Ireland', 'Indonesia', 'Japan', 'Brazil', 'South Korea', 'Italy',
       'Estonia', 'Finland', 'Netherlands', 'Portugal', 'Colombia',
       'Argentina', 'Seychelles', 'Mexico', 'Egypt', 'Kenya',
       'Luxembourg', 'Greece', 'Norway', 'Belgium', 'Denmark',
       'Hong Kong', 'Malaysia', 'Hungary', 'Vietnam', 'Thailand',
       'Lithuania', 'Ghana', 'Senegal', 'Pakistan',
       'United Arab Emirates', 'Peru', 'Bahrain', 'Romania', 'Turkey',
       'Russia', 'Uruguay', 'Bulgaria', 'South Africa', 'Czech Republic',
       'Myanmar'], dtype=object)

### Exploring Data

In [5]:
df.shape

(2545, 9)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2545 entries, 0 to 2544
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   company              2545 non-null   object 
 1   location             2545 non-null   object 
 2   industry             2543 non-null   object 
 3   total_laid_off       1746 non-null   float64
 4   percentage_laid_off  1694 non-null   float64
 5   date                 2543 non-null   object 
 6   stage                2539 non-null   object 
 7   country              2545 non-null   object 
 8   funds_raised         2297 non-null   float64
dtypes: float64(3), object(6)
memory usage: 179.1+ KB


In [9]:
df.nunique()

company                2021
location                206
industry                 29
total_laid_off          299
percentage_laid_off      77
date                    528
stage                    16
country                  62
funds_raised            652
dtype: int64

### Observing for Missing Numbers

In [10]:
df.isna().sum()

company                  0
location                 0
industry                 2
total_laid_off         799
percentage_laid_off    851
date                     2
stage                    6
country                  0
funds_raised           248
dtype: int64

### Dealing with Null Values

In [11]:
# Percentage of Missing Values in data
(df.isnull().sum()/df.count()*100).round(2).sort_values(ascending=False).

percentage_laid_off    50.24
total_laid_off         45.76
funds_raised           10.80
stage                   0.24
date                    0.08
industry                0.08
country                 0.00
location                0.00
company                 0.00
dtype: float64

In [19]:
df = df.drop(['funds_raised','percentage_laid_off', 'stage', 'date'], axis=1)
df.total_laid_off = df.total_laid_off.fillna(1) # lets assume there must be atleast 1 reported layoff in any company in this dataset
df.industry = df.industry.fillna('Unknown')
df.total_laid_off = df.total_laid_off.astype(int) # layoffs should be whole numbers not floats

In [20]:
df.isna().sum().sum()

0

In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2543 entries, 0 to 2542
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   company         2543 non-null   object
 1   location        2543 non-null   object
 2   industry        2543 non-null   object
 3   total_laid_off  2543 non-null   int32 
 4   date            2543 non-null   object
 5   country         2543 non-null   object
dtypes: int32(1), object(5)
memory usage: 129.1+ KB


In [28]:
# Converting date column to datetype dtype
df['date'] = pd.to_datetime(df['date'])

In [30]:
# Looking at 10 random rolls in dataset
df.sample(10)

Unnamed: 0,company,location,industry,total_laid_off,date,country
779,Yapily,London,Finance,1,2022-12-01,United Kingdom
2009,Dark,SF Bay Area,Product,6,2020-06-23,United States
1365,Perceptive Automata,Boston,Transportation,1,2022-08-01,United States
222,StrongDM,SF Bay Area,Infrastructure,40,2023-02-23,United States
1874,Genius,New York City,Consumer,1,2021-09-15,United States
857,Capitolis,New York City,Finance,37,2022-11-17,United States
1909,Pulse Secure,SF Bay Area,Security,78,2020-12-23,United States
957,Zendesk,SF Bay Area,Support,350,2022-11-07,United States
1780,DataRobot,Boston,Data,70,2022-05-11,United States
1608,Bybit,Singapore,Crypto,600,2022-06-20,Singapore


In [31]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2543 entries, 0 to 2542
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   company         2543 non-null   object        
 1   location        2543 non-null   object        
 2   industry        2543 non-null   object        
 3   total_laid_off  2543 non-null   int32         
 4   date            2543 non-null   datetime64[ns]
 5   country         2543 non-null   object        
dtypes: datetime64[ns](1), int32(1), object(4)
memory usage: 129.1+ KB


In [33]:
df.to_csv('cleaned_layoffs.csv')