In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv('Files/ecommerce.csv')
columns = ['Industry', 'Gender_ethnicity', 'Ecommerce_sales', 'Total_firms', 'Total_sales']
df.columns = columns
df = df.drop(columns = 'Total_sales')
df.head()

Unnamed: 0,Industry,Gender_ethnicity,Ecommerce_sales,Total_firms
0,Total for all sectors,All firms,All firms,27626360
1,Total for all sectors,All firms,No e-commerce sales,15787853
2,Total for all sectors,All firms,Less than 1%,175745
3,Total for all sectors,All firms,1% to 4%,177818
4,Total for all sectors,All firms,5% to 9%,122155


In [4]:
#Check data types
df.dtypes

Industry            object
Gender_ethnicity    object
Ecommerce_sales     object
Total_firms          int64
dtype: object

In [10]:
#Create df just with all firms
new_df = df.loc[df['Industry'] == 'Total for all sectors']
new_df = new_df.loc[new_df['Gender_ethnicity'] == 'All firms']
new_df = new_df.iloc[1:]
new_df

Unnamed: 0,Industry,Gender_ethnicity,Ecommerce_sales,Total_firms
1,Total for all sectors,All firms,No e-commerce sales,15787853
2,Total for all sectors,All firms,Less than 1%,175745
3,Total for all sectors,All firms,1% to 4%,177818
4,Total for all sectors,All firms,5% to 9%,122155
5,Total for all sectors,All firms,10% to 19%,149996
6,Total for all sectors,All firms,20% to 49%,177630
7,Total for all sectors,All firms,50% to 99%,289550
8,Total for all sectors,All firms,100%,215826
9,Total for all sectors,All firms,Don't know,249092
10,Total for all sectors,All firms,Total reporting,17347429


In [11]:
#Remove all firms?
df_grouped = new_df.groupby('Ecommerce_sales').sum()
df_grouped = df_grouped.sort_values('Total_firms', ascending = False)
df_grouped['Percentage'] = round(df_grouped['Total_firms']/df_grouped.iloc[0,0]*100,2)
df_grouped

Unnamed: 0_level_0,Total_firms,Percentage
Ecommerce_sales,Unnamed: 1_level_1,Unnamed: 2_level_1
Total reporting,17347429,100.0
No e-commerce sales,15787853,91.01
50% to 99%,289550,1.67
Don't know,249092,1.44
100%,215826,1.24
1% to 4%,177818,1.03
20% to 49%,177630,1.02
E-commerce not reported,176157,1.02
Less than 1%,175745,1.01
10% to 19%,149996,0.86


In [12]:
# Need to add a percentage column after Total_firms
df_multi_gender = df.set_index(['Gender_ethnicity', 'Ecommerce_sales'])
df_multi_gender = df_multi_gender.loc[df_multi_gender['Industry'] == 'Total for all sectors']
df_multi_gender = df_multi_gender.drop(columns = 'Industry')
df_multi_gender

Unnamed: 0_level_0,Unnamed: 1_level_0,Total_firms
Gender_ethnicity,Ecommerce_sales,Unnamed: 2_level_1
All firms,All firms,27626360
All firms,No e-commerce sales,15787853
All firms,Less than 1%,175745
All firms,1% to 4%,177818
All firms,5% to 9%,122155
All firms,10% to 19%,149996
All firms,20% to 49%,177630
All firms,50% to 99%,289550
All firms,100%,215826
All firms,Don't know,249092


In [14]:
#Female
df_female = df_multi_gender.loc[pd.IndexSlice['Female-owned',:],:]
df_female = df_female.iloc[1:]
df_female = df_female.sort_values('Total_firms', ascending = False)
df_female['Percentage'] = round(df_female['Total_firms']/df_female.iloc[0,0]*100,2)
df_female

Unnamed: 0_level_0,Unnamed: 1_level_0,Total_firms,Percentage
Gender_ethnicity,Ecommerce_sales,Unnamed: 2_level_1,Unnamed: 3_level_1
Female-owned,Total reporting,5680360,100.0
Female-owned,No e-commerce sales,5113985,90.03
Female-owned,50% to 99%,97952,1.72
Female-owned,Don't know,93659,1.65
Female-owned,100%,79388,1.4
Female-owned,Less than 1%,71059,1.25
Female-owned,1% to 4%,68996,1.21
Female-owned,E-commerce not reported,66285,1.17
Female-owned,20% to 49%,61982,1.09
Female-owned,10% to 19%,50475,0.89


In [15]:
#Male
df_male = df_multi_gender.loc[pd.IndexSlice['Male-owned',:],:]
df_male = df_male.iloc[1:]
df_male = df_male.sort_values('Total_firms', ascending = False)
df_male['Percentage'] = round(df_male['Total_firms']/df_male.iloc[0,0]*100,2)
df_male

Unnamed: 0_level_0,Unnamed: 1_level_0,Total_firms,Percentage
Gender_ethnicity,Ecommerce_sales,Unnamed: 2_level_1,Unnamed: 3_level_1
Male-owned,Total reporting,9460736,100.0
Male-owned,No e-commerce sales,8711340,92.08
Male-owned,50% to 99%,142919,1.51
Male-owned,Don't know,118200,1.25
Male-owned,100%,105083,1.11
Male-owned,E-commerce not reported,92833,0.98
Male-owned,20% to 49%,87356,0.92
Male-owned,1% to 4%,79810,0.84
Male-owned,Less than 1%,78133,0.83
Male-owned,10% to 19%,75863,0.8


In [16]:
#White
df_white = df_multi_gender.loc[pd.IndexSlice['White',:],:]
df_white = df_white.iloc[1:]
df_white = df_white.sort_values('Total_firms', ascending = False)
df_white['Percentage'] = round(df_white['Total_firms']/df_white.iloc[0,0]*100,2)
df_white

Unnamed: 0_level_0,Unnamed: 1_level_0,Total_firms,Percentage
Gender_ethnicity,Ecommerce_sales,Unnamed: 2_level_1,Unnamed: 3_level_1
White,Total reporting,14663863,100.0
White,No e-commerce sales,13290967,90.64
White,50% to 99%,258089,1.76
White,Don't know,210758,1.44
White,100%,187686,1.28
White,1% to 4%,158926,1.08
White,20% to 49%,157264,1.07
White,Less than 1%,155357,1.06
White,10% to 19%,134036,0.91
White,E-commerce not reported,121453,0.83


In [17]:
#Black
df_black = df_multi_gender.loc[pd.IndexSlice['Black or African American',:],:]
df_black = df_black.iloc[1:]
df_black = df_black.sort_values('Total_firms', ascending = False)
df_black['Percentage'] = round(df_black['Total_firms']/df_black.iloc[0,0]*100,2)
df_black

Unnamed: 0_level_0,Unnamed: 1_level_0,Total_firms,Percentage
Gender_ethnicity,Ecommerce_sales,Unnamed: 2_level_1,Unnamed: 3_level_1
Black or African American,Total reporting,840359,100.0
Black or African American,No e-commerce sales,784874,93.4
Black or African American,E-commerce not reported,22883,2.72
Black or African American,Don't know,9752,1.16
Black or African American,50% to 99%,9462,1.13
Black or African American,Less than 1%,8520,1.01
Black or African American,1% to 4%,6690,0.8
Black or African American,100%,6662,0.79
Black or African American,20% to 49%,6102,0.73
Black or African American,10% to 19%,4081,0.49


In [18]:
#Asian
df_asian = df_multi_gender.loc[pd.IndexSlice['Asian',:],:]
df_asian = df_asian.iloc[1:]
df_asian = df_asian.sort_values('Total_firms', ascending = False)
df_asian['Percentage'] = round(df_asian['Total_firms']/df_asian.iloc[0,0]*100,2)
df_asian

Unnamed: 0_level_0,Unnamed: 1_level_0,Total_firms,Percentage
Gender_ethnicity,Ecommerce_sales,Unnamed: 2_level_1,Unnamed: 3_level_1
Asian,Total reporting,1111430,100.0
Asian,No e-commerce sales,1028258,92.52
Asian,100%,17881,1.61
Asian,50% to 99%,15843,1.43
Asian,Don't know,12841,1.16
Asian,E-commerce not reported,12535,1.13
Asian,20% to 49%,9920,0.89
Asian,10% to 19%,7596,0.68
Asian,Less than 1%,6708,0.6
Asian,1% to 4%,6698,0.6


In [19]:
#Hispanic
df_hispanic = df_multi_gender.loc[pd.IndexSlice['Hispanic',:],:]
df_hispanic = df_hispanic.iloc[1:]
df_hispanic = df_hispanic.sort_values('Total_firms', ascending = False)
df_hispanic['Percentage'] = round(df_hispanic['Total_firms']/df_hispanic.iloc[0,0]*100,2)
df_hispanic

Unnamed: 0_level_0,Unnamed: 1_level_0,Total_firms,Percentage
Gender_ethnicity,Ecommerce_sales,Unnamed: 2_level_1,Unnamed: 3_level_1
Hispanic,Total reporting,1262033,100.0
Hispanic,No e-commerce sales,1191056,94.38
Hispanic,E-commerce not reported,24632,1.95
Hispanic,50% to 99%,13481,1.07
Hispanic,Don't know,13000,1.03
Hispanic,100%,11384,0.9
Hispanic,20% to 49%,7858,0.62
Hispanic,1% to 4%,7090,0.56
Hispanic,Less than 1%,6772,0.54
Hispanic,10% to 19%,5925,0.47


In [13]:
# Need to add a percentage column after Total_firms
# Find out reasons how different companies work with ecom
#df_multi_industry = df.set_index(['Industry', 'Ecommerce_sales'])
#df_multi_industry