## 1. Import libraries

In [3]:
import quandl
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import os
import statsmodels.api as sm
import warnings


warnings.filterwarnings("ignore")

plt.style.use('fivethirtyeight')

In [4]:
# This option ensures that the graphs you create are displayed within the notebook without the need to "call" them specifically.

%matplotlib inline

## 2. Import data

In [5]:
path = r'C:\Users\Niklas Winter\Layoffs Analyse'

In [6]:
# 2.Importing data set Layoffs
df = pd.read_csv(os.path.join(path, '02 Data', 'Prepared Data', 'layoffs_clean.csv'))


In [7]:
# 2.Importing data set Layoffs_num
df_num = pd.read_csv(os.path.join(path, '02 Data', 'Prepared Data', 'layoffs_num.csv'))


In [8]:
df.head()

Unnamed: 0.1,Unnamed: 0,Company,Location,Industry,Laid_Off_Count,Date,Stage,Country,Percentage
0,0,GoTo Group,Jakarta,Transportation,600.0,2023-03-10,Post-IPO,Indonesia,0.18
1,1,Xero,Wellington,Finance,800.0,2023-03-09,Post-IPO,New Zealand,0.15
2,2,Shopee,Jakarta,Food,200.0,2023-03-09,Unknown,Indonesia,0.18
3,3,Gopuff,Philadelphia,Food,100.0,2023-03-09,Series H,United States,0.02
4,4,Wave Financial,Toronto,Finance,50.0,2023-03-09,Acquired,Canada,0.18


In [9]:
df = df.drop(columns = ['Unnamed: 0'])

In [36]:
df['Country'].nunique()

59

In [37]:
unique_values = df['Country'].unique().tolist()
print(unique_values)

['Indonesia', 'New Zealand', 'United States', 'Canada', 'India', 'Australia', 'Nigeria', 'France', 'Brazil', 'Switzerland', 'Germany', 'Israel', 'Sweden', 'United Kingdom', 'Japan', 'South Korea', 'China', 'Italy', 'Singapore', 'Estonia', 'Ireland', 'Finland', 'Netherlands', 'Spain', 'Portugal', 'Chile', 'Colombia', 'Argentina', 'Seychelles', 'Austria', 'Mexico', 'Egypt', 'Kenya', 'Luxembourg', 'Greece', 'Poland', 'Norway', 'Belgium', 'Denmark', 'Hong Kong', 'Malaysia', 'Hungary', 'Vietnam', 'Thailand', 'Lithuania', 'Ghana', 'Senegal', 'Pakistan', 'United Arab Emirates', 'Peru', 'Bahrain', 'Romania', 'Turkey', 'Russia', 'Uruguay', 'Bulgaria', 'South Africa', 'Czech Republic', 'Myanmar']


In [38]:
continents = {
    'Asia': ['Indonesia', 'India', 'China', 'Singapore', 'Estonia', 'Japan', 'South Korea', 'Israel', 'Vietnam', 'Thailand', 'Malaysia', 'Pakistan', 'United Arab Emirates', 'Myanmar'],
    'North America': ['United States', 'Canada', 'Mexico'],
    'Europe': ['New Zealand', 'France', 'Switzerland', 'Germany', 'Sweden', 'United Kingdom', 'Italy', 'Ireland', 'Finland', 'Netherlands', 'Spain', 'Portugal', 'Estonia', 'Luxembourg', 'Greece', 'Poland', 'Norway', 'Belgium', 'Denmark', 'Hungary', 'Lithuania', 'Bulgaria', 'Czech Republic'],
    'South America': ['Brazil', 'Chile', 'Colombia', 'Argentina', 'Peru', 'Uruguay'],
    'Africa': ['Nigeria', 'Seychelles', 'Egypt', 'Kenya', 'Ghana', 'Senegal', 'South Africa'],
    'Oceania': ['Australia', 'New Zealand']
}

country_to_continent = {}
for continent, countries in continents.items():
    for country in countries:
        country_to_continent[country] = continent


In [40]:
# Create a dictionary of countries and their associated continents
continents = {
    'Indonesia': 'Asia',
    'New Zealand': 'Oceania',
    'United States': 'North America',
    'Canada': 'North America',
    'India': 'Asia',
    'Australia': 'Oceania',
    'Nigeria': 'Africa',
    'France': 'Europe',
    'Brazil': 'South America',
    'Switzerland': 'Europe',
    'Germany': 'Europe',
    'Israel': 'Asia',
    'Sweden': 'Europe',
    'United Kingdom': 'Europe',
    'Japan': 'Asia',
    'South Korea': 'Asia',
    'China': 'Asia',
    'Italy': 'Europe',
    'Singapore': 'Asia',
    'Estonia': 'Europe',
    'Ireland': 'Europe',
    'Finland': 'Europe',
    'Netherlands': 'Europe',
    'Spain': 'Europe',
    'Portugal': 'Europe',
    'Chile': 'South America',
    'Colombia': 'South America',
    'Argentina': 'South America',
    'Seychelles': 'Africa',
    'Austria': 'Europe',
    'Mexico': 'North America',
    'Egypt': 'Africa',
    'Kenya': 'Africa',
    'Luxembourg': 'Europe',
    'Greece': 'Europe',
    'Poland': 'Europe',
    'Norway': 'Europe',
    'Belgium': 'Europe',
    'Denmark': 'Europe',
    'Hong Kong': 'Asia',
    'Malaysia': 'Asia',
    'Hungary': 'Europe',
    'Vietnam': 'Asia',
    'Thailand': 'Asia',
    'Lithuania': 'Europe',
    'Ghana': 'Africa',
    'Senegal': 'Africa',
    'Pakistan': 'Asia',
    'United Arab Emirates': 'Asia',
    'Peru': 'South America',
    'Bahrain': 'Asia',
    'Romania': 'Europe',
    'Turkey': 'Asia',
    'Russia': 'Europe',
    'Uruguay': 'South America',
    'Bulgaria': 'Europe',
    'South Africa': 'Africa',
    'Czech Republic': 'Europe',
    'Myanmar': 'Asia'
}

# Use the map() method to create a new column called "continent"
df['continent'] = df['Country'].map(continents)


In [41]:
df.head( )

Unnamed: 0,Company,Location,Industry,Laid_Off_Count,Date,Stage,Country,Percentage,continent
0,GoTo Group,Jakarta,Transportation,600.0,2023-03-10,Post-IPO,Indonesia,0.18,Asia
1,Xero,Wellington,Finance,800.0,2023-03-09,Post-IPO,New Zealand,0.15,Oceania
2,Shopee,Jakarta,Food,200.0,2023-03-09,Unknown,Indonesia,0.18,Asia
3,Gopuff,Philadelphia,Food,100.0,2023-03-09,Series H,United States,0.02,North America
4,Wave Financial,Toronto,Finance,50.0,2023-03-09,Acquired,Canada,0.18,North America


In [42]:
cross_1 = pd.crosstab(df['continent'], df['Laid_Off_Count'])


In [43]:
cross_1

Laid_Off_Count,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,...,4000.0,4100.0,4375.0,6000.0,6650.0,8000.0,8500.0,10000.0,11000.0,12000.0
continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Africa,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Asia,0,0,0,1,0,0,1,1,2,1,...,0,0,0,0,0,0,0,0,0,0
Europe,0,0,0,0,0,2,1,1,0,1,...,1,0,1,1,0,0,1,0,0,0
North America,1,2,8,5,2,3,3,12,3,10,...,0,1,0,0,1,2,0,2,1,1
Oceania,0,0,0,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
South America,0,1,0,0,0,0,0,2,1,0,...,0,0,0,0,0,0,0,0,0,0


In [45]:
pd.crosstab(index=[df['Country'], df['continent']], columns=df['Laid_Off_Count'])


Unnamed: 0_level_0,Laid_Off_Count,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,...,4000.0,4100.0,4375.0,6000.0,6650.0,8000.0,8500.0,10000.0,11000.0,12000.0
Country,continent,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Argentina,South America,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Australia,Oceania,0,0,0,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
Austria,Europe,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Bahrain,Asia,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Belgium,Europe,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Brazil,South America,0,1,0,0,0,0,0,2,1,0,...,0,0,0,0,0,0,0,0,0,0
Bulgaria,Europe,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Canada,North America,0,0,0,0,0,0,0,1,2,1,...,0,0,0,0,0,0,0,0,0,0
Chile,South America,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
China,Asia,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [46]:
df_country = df.groupby(['continent', 'Country'])['Laid_Off_Count'].sum().reset_index()


In [47]:
df_country

Unnamed: 0,continent,Country,Laid_Off_Count
0,Africa,Egypt,160.0
1,Africa,Ghana,80.0
2,Africa,Kenya,509.0
3,Africa,Nigeria,2282.0
4,Africa,Senegal,300.0
5,Africa,Seychelles,315.0
6,Africa,South Africa,80.0
7,Asia,Bahrain,80.0
8,Asia,China,6625.0
9,Asia,Hong Kong,890.0


In [54]:
df['Covid_flag'] = np.where(df['Industry'].isin(['Transportation', 'Retail', 'Consumer', 'Travel']), 'high-risk', '')


Unnamed: 0,Company,Location,Industry,Laid_Off_Count,Date,Stage,Country,Percentage,continent,Covid_flag
0,GoTo Group,Jakarta,Transportation,600.0,2023-03-10,Post-IPO,Indonesia,0.18,Asia,high-risk
1,Xero,Wellington,Finance,800.0,2023-03-09,Post-IPO,New Zealand,0.15,Oceania,
2,Shopee,Jakarta,Food,200.0,2023-03-09,Unknown,Indonesia,0.18,Asia,
3,Gopuff,Philadelphia,Food,100.0,2023-03-09,Series H,United States,0.02,North America,
4,Wave Financial,Toronto,Finance,50.0,2023-03-09,Acquired,Canada,0.18,North America,
...,...,...,...,...,...,...,...,...,...,...
2341,Service,Los Angeles,Travel,80.0,2020-03-16,Seed,United States,1.00,North America,high-risk
2342,HopSkipDrive,Los Angeles,Transportation,8.0,2020-03-13,Unknown,United States,0.10,North America,high-risk
2343,Panda Squad,SF Bay Area,Consumer,6.0,2020-03-13,Seed,United States,0.75,North America,high-risk
2344,Tamara Mellon,Los Angeles,Retail,20.0,2020-03-12,Series C,United States,0.40,North America,high-risk


In [49]:
df

Unnamed: 0,Company,Location,Industry,Laid_Off_Count,Date,Stage,Country,Percentage,continent
0,GoTo Group,Jakarta,Transportation,600.0,2023-03-10,Post-IPO,Indonesia,0.18,Asia
1,Xero,Wellington,Finance,800.0,2023-03-09,Post-IPO,New Zealand,0.15,Oceania
2,Shopee,Jakarta,Food,200.0,2023-03-09,Unknown,Indonesia,0.18,Asia
3,Gopuff,Philadelphia,Food,100.0,2023-03-09,Series H,United States,0.02,North America
4,Wave Financial,Toronto,Finance,50.0,2023-03-09,Acquired,Canada,0.18,North America
...,...,...,...,...,...,...,...,...,...
2341,Service,Los Angeles,Travel,80.0,2020-03-16,Seed,United States,1.00,North America
2342,HopSkipDrive,Los Angeles,Transportation,8.0,2020-03-13,Unknown,United States,0.10,North America
2343,Panda Squad,SF Bay Area,Consumer,6.0,2020-03-13,Seed,United States,0.75,North America
2344,Tamara Mellon,Los Angeles,Retail,20.0,2020-03-12,Series C,United States,0.40,North America


In [52]:
df['Covid_flag'] = np.where(df['Industry'].isin(['Hardware', 'Data', 'Media', 'Security']), 'Winner', '')

In [53]:
df

Unnamed: 0,Company,Location,Industry,Laid_Off_Count,Date,Stage,Country,Percentage,continent,Covid_flag
0,GoTo Group,Jakarta,Transportation,600.0,2023-03-10,Post-IPO,Indonesia,0.18,Asia,
1,Xero,Wellington,Finance,800.0,2023-03-09,Post-IPO,New Zealand,0.15,Oceania,
2,Shopee,Jakarta,Food,200.0,2023-03-09,Unknown,Indonesia,0.18,Asia,
3,Gopuff,Philadelphia,Food,100.0,2023-03-09,Series H,United States,0.02,North America,
4,Wave Financial,Toronto,Finance,50.0,2023-03-09,Acquired,Canada,0.18,North America,
...,...,...,...,...,...,...,...,...,...,...
2341,Service,Los Angeles,Travel,80.0,2020-03-16,Seed,United States,1.00,North America,
2342,HopSkipDrive,Los Angeles,Transportation,8.0,2020-03-13,Unknown,United States,0.10,North America,
2343,Panda Squad,SF Bay Area,Consumer,6.0,2020-03-13,Seed,United States,0.75,North America,
2344,Tamara Mellon,Los Angeles,Retail,20.0,2020-03-12,Series C,United States,0.40,North America,


In [55]:

df['Covid_flag'] = np.where(df['Industry'].isin(['Transportation', 'Retail', 'Consumer', 'Travel']), 'high-risk', 
                            np.where(df['Industry'].isin(['Hardware', 'Data', 'Media', 'Security']), 'Technology', ''))


In [56]:
df

Unnamed: 0,Company,Location,Industry,Laid_Off_Count,Date,Stage,Country,Percentage,continent,Covid_flag
0,GoTo Group,Jakarta,Transportation,600.0,2023-03-10,Post-IPO,Indonesia,0.18,Asia,high-risk
1,Xero,Wellington,Finance,800.0,2023-03-09,Post-IPO,New Zealand,0.15,Oceania,
2,Shopee,Jakarta,Food,200.0,2023-03-09,Unknown,Indonesia,0.18,Asia,
3,Gopuff,Philadelphia,Food,100.0,2023-03-09,Series H,United States,0.02,North America,
4,Wave Financial,Toronto,Finance,50.0,2023-03-09,Acquired,Canada,0.18,North America,
...,...,...,...,...,...,...,...,...,...,...
2341,Service,Los Angeles,Travel,80.0,2020-03-16,Seed,United States,1.00,North America,high-risk
2342,HopSkipDrive,Los Angeles,Transportation,8.0,2020-03-13,Unknown,United States,0.10,North America,high-risk
2343,Panda Squad,SF Bay Area,Consumer,6.0,2020-03-13,Seed,United States,0.75,North America,high-risk
2344,Tamara Mellon,Los Angeles,Retail,20.0,2020-03-12,Series C,United States,0.40,North America,high-risk


In [57]:
df['Covid_flag'].value_counts()

             1558
high-risk     526
Winner        262
Name: Covid_flag, dtype: int64

In [58]:
df.to_csv('new_dataframe.csv', index=False)

In [76]:
# Group dataframe by country and Covid_flag
grouped = df.groupby(['Country', 'Covid_flag'])['Laid_Off_Count'].sum()

# Calculate percentage of laid_off persons in each country who belong to the 'high-risk' group
result = grouped.groupby(level=0).apply(lambda x: 100 * x / float(x.sum())).unstack('Covid_flag')['high-risk']

# Sort the result in descending order
result = result.sort_values(ascending=False)

# Print the result
print(result)


Country
Uruguay                 100.000000
Greece                  100.000000
Russia                  100.000000
Myanmar                 100.000000
Czech Republic          100.000000
Malaysia                100.000000
Lithuania               100.000000
Thailand                100.000000
United Arab Emirates     94.891775
Indonesia                69.294366
Nigeria                  67.966696
Finland                  51.020408
Egypt                    50.000000
South Korea              50.000000
Singapore                42.904385
China                    41.509434
United Kingdom           37.638594
United States            36.177221
India                    36.118079
Canada                   34.770855
Hong Kong                33.707865
Israel                   29.195089
Netherlands              29.076397
Brazil                   22.098115
Luxembourg               21.951220
France                   21.904762
Germany                  19.628375
Australia                18.884220
Kenya       

In [60]:
# Count the number of laid_off people in the 'Winner' group
winner_count = df.loc[df['Covid_flag'] == 'Winner', 'Laid_Off_Count'].sum()

# Calculate the total number of laid_off people
total_count = df['Laid_Off_Count'].sum()

# Calculate the chance that a laid_off person was part of the 'Winner' group
winner_chance = winner_count / total_count

print("The chance that a laid_off person was part of the 'Winner' group is {:.2%}".format(winner_chance))


The chance that a laid_off person was part of the 'Winner' group is 8.23%


In [62]:
# Calculate total number of laid-off persons
total_laid_off = df['Laid_Off_Count'].sum()

# Calculate total number of laid-off persons in the high-risk group
high_risk_laid_off = df.loc[df['Covid_flag'] == 'high-risk', 'Laid_Off_Count'].sum()

# Calculate probability of a laid-off person being part of the high-risk group
high_risk_prob = high_risk_laid_off / total_laid_off

# Calculate total number of laid-off persons in the Winner group
winner_laid_off = df.loc[df['Covid_flag'] == 'Winner', 'Laid_Off_Count'].sum()

# Calculate probability of a laid-off person being part of the Winner group
winner_prob = winner_laid_off / total_laid_off

# Print the probabilities
print("The probability of a laid-off person being part of the high-risk group is {:.2%}.".format(high_risk_prob))
print("The probability of a laid-off person being part of the Winner group is {:.2%}.".format(winner_prob))
print("For the high-risk group, the probability of a laid-off person being from a specific country cannot be calculated without additional information.")


The probability of a laid-off person being part of the high-risk group is 34.53%.
The probability of a laid-off person being part of the Winner group is 8.23%.
For the high-risk group, the probability of a laid-off person being from a specific country cannot be calculated without additional information.


In [80]:
winner_prob

0.08234976838631893

In [63]:
# Group dataframe by country and Covid_flag
grouped = df.groupby(['Country', 'Covid_flag'])['Laid_Off_Count'].sum()

# Calculate percentage of laid_off persons in each country who belong to the 'high-risk' group
result = grouped.groupby(level=0).apply(lambda x: 100 * x / float(x.sum())).unstack('Covid_flag')['high-risk']

# Sort the result in descending order
result = result.sort_values(ascending=False)

# Print the result
print(result)

Country
Uruguay                 100.000000
Greece                  100.000000
Russia                  100.000000
Myanmar                 100.000000
Czech Republic          100.000000
Malaysia                100.000000
Lithuania               100.000000
Thailand                100.000000
United Arab Emirates     94.891775
Indonesia                69.294366
Nigeria                  67.966696
Finland                  51.020408
Egypt                    50.000000
South Korea              50.000000
Singapore                42.904385
China                    41.509434
United Kingdom           37.638594
United States            36.177221
India                    36.118079
Canada                   34.770855
Hong Kong                33.707865
Israel                   29.195089
Netherlands              29.076397
Brazil                   22.098115
Luxembourg               21.951220
France                   21.904762
Germany                  19.628375
Australia                18.884220
Kenya       

In [72]:
merged_result.to_csv('country propability.csv', index=False)

In [70]:
pip install xlwt

Collecting xlwt
  Downloading xlwt-1.3.0-py2.py3-none-any.whl (99 kB)
     -------------------------------------- 100.0/100.0 kB 1.9 MB/s eta 0:00:00
Installing collected packages: xlwt
Successfully installed xlwt-1.3.0
Note: you may need to restart the kernel to use updated packages.


In [73]:

merged_result.to_excel('country propability.xls', index=False)

In [74]:
merged_result

Unnamed: 0_level_0,high-risk,Winner
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Uruguay,100.0,
Greece,100.0,
Russia,100.0,
Myanmar,100.0,
Czech Republic,100.0,
Malaysia,100.0,
Lithuania,100.0,
Thailand,100.0,
United Arab Emirates,94.891775,
Indonesia,69.294366,


In [77]:
# Group the original dataframe by country and sum the Laid_Off_Count column
grouped_counts = df.groupby('Country')['Laid_Off_Count'].sum()

# Merge the total counts with the merged_result dataframe
merged_result = pd.merge(merged_result, grouped_counts, on='Country', how='outer')

# Rename the new column
merged_result = merged_result.rename(columns={'Laid_Off_Count': 'Total Laid Off'})

# Print the final result
print(merged_result)


                       high-risk      Winner  Total Laid Off
Country                                                     
Uruguay               100.000000         NaN            80.0
Greece                100.000000         NaN            80.0
Russia                100.000000         NaN           400.0
Myanmar               100.000000         NaN           200.0
Czech Republic        100.000000         NaN            80.0
Malaysia              100.000000         NaN           180.0
Lithuania             100.000000         NaN            60.0
Thailand              100.000000         NaN            55.0
United Arab Emirates   94.891775         NaN          1155.0
Indonesia              69.294366         NaN          5201.0
Nigeria                67.966696         NaN          2282.0
Finland                51.020408   32.653061           490.0
Egypt                  50.000000         NaN           160.0
South Korea            50.000000         NaN           160.0
Singapore              4

In [78]:
merged_result['Total Laid Off']

Country
Uruguay                     80.0
Greece                      80.0
Russia                     400.0
Myanmar                    200.0
Czech Republic              80.0
Malaysia                   180.0
Lithuania                   60.0
Thailand                    55.0
United Arab Emirates      1155.0
Indonesia                 5201.0
Nigeria                   2282.0
Finland                    490.0
Egypt                      160.0
South Korea                160.0
Singapore                 6955.0
China                     6625.0
United Kingdom            8478.0
United States           294503.0
India                    37873.0
Canada                    7768.0
Hong Kong                  890.0
Israel                    4398.0
Netherlands              17540.0
Brazil                   11191.0
Luxembourg                 205.0
France                    1155.0
Germany                   9741.0
Australia                 4284.0
Kenya                      509.0
Estonia                    333.0
De

In [79]:
merged_result['Total Laid Off'].values


array([8.00000e+01, 8.00000e+01, 4.00000e+02, 2.00000e+02, 8.00000e+01,
       1.80000e+02, 6.00000e+01, 5.50000e+01, 1.15500e+03, 5.20100e+03,
       2.28200e+03, 4.90000e+02, 1.60000e+02, 1.60000e+02, 6.95500e+03,
       6.62500e+03, 8.47800e+03, 2.94503e+05, 3.78730e+04, 7.76800e+03,
       8.90000e+02, 4.39800e+03, 1.75400e+04, 1.11910e+04, 2.05000e+02,
       1.15500e+03, 9.74100e+03, 4.28400e+03, 5.09000e+02, 3.33000e+02,
       2.40000e+02, 1.15840e+04, 4.03000e+02, 5.70000e+02, 8.00000e+01,
       8.00000e+01, 1.20000e+02, 1.90000e+02, 3.70000e+02, 8.00000e+01,
       8.00000e+01, 3.37000e+02, 8.00000e+01, 8.50000e+01, 3.50000e+02,
       1.00500e+03, 3.00000e+02, 2.40000e+02, 8.00000e+01, 2.50000e+01,
       1.95000e+02, 8.00000e+01, 3.00000e+02, 3.15000e+02, 8.00000e+01,
       4.10000e+02, 2.22000e+02, 8.00000e+01, 2.40000e+02])

In [81]:
grouped_df = df[df['Laid_Off_Count'] == 100].groupby(['Country', 'continent']).size().reset_index(name='count')
grouped_df['percentage'] = grouped_df['count'] / grouped_df['count'].sum() * 100

In [83]:
grouped_df

Unnamed: 0,Country,continent,count,percentage
0,Argentina,South America,1,1.052632
1,Australia,Oceania,3,3.157895
2,Austria,Europe,1,1.052632
3,Brazil,South America,4,4.210526
4,Canada,North America,1,1.052632
5,Colombia,South America,1,1.052632
6,Germany,Europe,10,10.526316
7,India,Asia,10,10.526316
8,Indonesia,Asia,2,2.105263
9,Israel,Asia,6,6.315789


In [88]:
grouped_df = df[df['Laid_Off_Count'] == 100].groupby(['Industry']).size().reset_index(name='count')
grouped

In [89]:
grouped_df


Unnamed: 0,Industry,count,percentage
0,Aerospace,1,1.052632
1,Construction,1,1.052632
2,Consumer,2,2.105263
3,Crypto,5,5.263158
4,Data,1,1.052632
5,Education,5,5.263158
6,Finance,14,14.736842
7,Fitness,1,1.052632
8,Food,9,9.473684
9,HR,1,1.052632


In [95]:
# Subset the dataframe to only include rows with 'high-risk' in the 'Covid_flag' column
winner = df[df['Covid_flag'] == 'Winner']

# Aggregate the number of layoffs by Industry
Winner = winner.groupby('Industry')['Laid_Off_Count'].sum().reset_index()

In [96]:
Winner

Unnamed: 0,Industry,Laid_Off_Count
0,Data,7085.0
1,Hardware,14388.0
2,Media,7714.0
3,Security,7150.0


In [None]:
Winner

In [93]:
agg_df

Unnamed: 0,Industry,Laid_Off_Count
0,Consumer,47649.0
1,Retail,48573.0
2,Transportation,37468.0
3,Travel,18679.0


In [98]:
df['Percentage']

0       0.18
1       0.15
2       0.18
3       0.02
4       0.18
        ... 
2341    1.00
2342    0.10
2343    0.75
2344    0.40
2345    0.18
Name: Percentage, Length: 2346, dtype: float64

In [99]:
df['Laid_Off_Count'].sum()

441252.0

In [None]:
high_risk_100_percent = df[(df['Covid_flag'] == 'high-risk') & (df['Percentage'] == 1)]


In [100]:
high_risk_100_percent_by_country = df[(df['Covid_flag'] == 'high-risk') & (df['Percentage'] == 1)].groupby('Country').sum()
high_risk_100_percent_by_country

Unnamed: 0_level_0,Laid_Off_Count,Percentage
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Canada,125.0,2.0
India,80.0,1.0
Indonesia,160.0,2.0
Singapore,250.0,1.0
United Arab Emirates,160.0,2.0
United Kingdom,206.0,1.0
United States,2707.0,20.0


In [101]:
high_risk_100_percent_by_country = df[(df['Covid_flag'] == 'high-risk') & (df['Percentage'] == 1)].groupby('Country').sum()

winner_by_country = df[(df['Covid_flag'] == 'Winner')].groupby('Country').sum()


In [103]:
result = pd.concat([high_risk_100_percent_by_country, winner_by_country], axis=1)
result.columns = ['high_risk_100_percent', 'winner_by_country']
result.fillna(0, inplace=True)


ValueError: Length mismatch: Expected axis has 4 elements, new values have 2 elements

In [104]:
high_risk_100_percent_by_country

Unnamed: 0_level_0,Laid_Off_Count,Percentage
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Canada,125.0,2.0
India,80.0,1.0
Indonesia,160.0,2.0
Singapore,250.0,1.0
United Arab Emirates,160.0,2.0
United Kingdom,206.0,1.0
United States,2707.0,20.0


In [105]:
high_risk_countries = df[df['Covid_flag'] == 'high-risk']['Country'].unique()
high_risk_countries

array(['Indonesia', 'United States', 'Nigeria', 'France', 'India',
       'Germany', 'Australia', 'China', 'Brazil', 'Israel', 'Singapore',
       'South Korea', 'United Kingdom', 'Canada', 'Egypt', 'Luxembourg',
       'Greece', 'Malaysia', 'Thailand', 'Lithuania', 'Kenya', 'Finland',
       'Denmark', 'Sweden', 'Netherlands', 'United Arab Emirates',
       'Russia', 'Uruguay', 'Hong Kong', 'Czech Republic', 'Myanmar',
       'Estonia'], dtype=object)

In [106]:
df['Covid_flag'] = np.where(df['Industry'].isin(['Transportation', 'Retail', 'Consumer', 'Travel']), 'high-risk', 
                            np.where(df['Industry'].isin(['Hardware', 'Data', 'Media', 'Security']), 'Technology', 
                                    'Other'))


In [107]:
df.to_excel('spatial analyse.xls', index=False)

In [109]:
corr = df['Country'].corr(df['Laid_Off_Count'])

print("Correlation between Country and Laid Off Count:", corr)

TypeError: unsupported operand type(s) for /: 'str' and 'int'