<a href="https://colab.research.google.com/github/stefaniamocan/European-Young-Engineer-Survey/blob/main/Future_of_work.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Future of Work Analysis**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import plotly.express as px
import plotly.graph_objects as go


Load the data set

In [None]:
table = pd.read_csv('Future-of-work.csv')

table.head()

Unnamed: 0,Number,How old are you?,Gender,Which country are you living in?,Do you regard yourself as an engineer?,What is your study/working field?,What is your current employment status?,What industry are you working in?,What university do you study at?,How many days per week did you do remote working *BEFORE* COVID-19 on average?,How many days per week did you do remote working *AFTER* COVID-19 on average?,What is the biggest *advantage* of Remote Working for you? \n\nChoose one:,What is the biggest *disadvantage* of Remote Working for you? \n\nChoose one:,*Rank the importance of the following working conditions/offers for you.*\n\nSelect your number 1 choice_ (1/3):_,Your number 2 choice _(2/3)_:,"Finally, your number 3 choice _(3/3)_:","Where do you feel more productive (home, office or a hybrid version) ?\n",Submitted At,Token
0,1,58,Female,Bulgaria,True,Chemical Engineering,Employed,Dd,,4.0,3.0,Positive environmental impact,No separation between work and home life,Impact,Working Environment,Reputation,Both,9/8/20 20:04,78k96kjbml49k02y0578k3rd5r05apne
1,2,24,Female,Germany,True,Chemical Engineering,Student,,TUM,0.0,5.0,Flexible working hours,No separation between work and home life,Working Environment,Impact,Flexible working time models,Office,9/9/20 7:35,qetkrmy3knd3awmnq3lqetkrmmvkutdh
2,3,35,Male,Belgium,True,Electrical Engineering,Employed,Government administration,,0.0,4.0,Flexible working hours,Lack of human interaction,Working Environment,Impact,Amount of salary,Both,9/9/20 7:36,7ifsnyk7jlmmme0784t2i7ifsnyeyvhd
3,4,25,Male,Germany,True,Mechanical Engineering,Self-employed,,,1.0,3.0,Flexible working hours,Lack of human interaction,Working Environment,Amount of salary,Reputation,Both,9/9/20 7:43,vykka4gcua0h9ba5wtdvykkaz1hvltw7
4,5,25,Female,Ireland,True,Electrical Engineering,Employed,Power sector,,1.0,3.0,Flexible working hours,Lack of human interaction,Flexible working time models,Working Environment,Amount of salary,Both,9/9/20 7:43,ctqu9dyje9cltxtctqu9jbrhzs9jdcpg


Arrange the dataset

In [None]:
#renaming columns to work easier
table.rename({'*Rank the importance of the following working conditions/offers for you.*\n\nSelect your number 1 choice_ (1/3):_': 'First Choice',
              'Your number 2 choice _(2/3)_:': 'Second Choice',
              'Finally, your number 3 choice _(3/3)_:':'Third Choice',
              'What is the biggest *advantage* of Remote Working for you? \n\nChoose one:': 'Advantage',
              'What is the biggest *disadvantage* of Remote Working for you? \n\nChoose one:': 'Disadvantage',
              'How old are you?':'Age',
              "Which country are you living in?":'Country',
              "Do you regard yourself as an engineer?":"Engineer",
              'Where do you feel more productive (home, office or a hybrid version) ?\n': 'Office',
              'What is your current employment status?': 'Employment',
              'What is your study/working field?':'Working field',
              'What industry are you working in?': 'Working Industry',
              'How many days per week did you do remote working *BEFORE* COVID-19 on average?':'Remote days pre-covid',
              'How many days per week did you do remote working *AFTER* COVID-19 on average?':'Remote days during-covid'

              }, axis=1, inplace=True)

#remove the 'Number' column
column_names = list(table.columns)
column_names.remove('Number')
table = table.loc[:, column_names]

table.head()


Unnamed: 0,Age,Gender,Country,Engineer,Working field,Employment,Working Industry,What university do you study at?,Remote days pre-covid,Remote days during-covid,Advantage,Disadvantage,First Choice,Second Choice,Third Choice,Office,Submitted At,Token
0,58,Female,Bulgaria,True,Chemical Engineering,Employed,Dd,,4.0,3.0,Positive environmental impact,No separation between work and home life,Impact,Working Environment,Reputation,Both,9/8/20 20:04,78k96kjbml49k02y0578k3rd5r05apne
1,24,Female,Germany,True,Chemical Engineering,Student,,TUM,0.0,5.0,Flexible working hours,No separation between work and home life,Working Environment,Impact,Flexible working time models,Office,9/9/20 7:35,qetkrmy3knd3awmnq3lqetkrmmvkutdh
2,35,Male,Belgium,True,Electrical Engineering,Employed,Government administration,,0.0,4.0,Flexible working hours,Lack of human interaction,Working Environment,Impact,Amount of salary,Both,9/9/20 7:36,7ifsnyk7jlmmme0784t2i7ifsnyeyvhd
3,25,Male,Germany,True,Mechanical Engineering,Self-employed,,,1.0,3.0,Flexible working hours,Lack of human interaction,Working Environment,Amount of salary,Reputation,Both,9/9/20 7:43,vykka4gcua0h9ba5wtdvykkaz1hvltw7
4,25,Female,Ireland,True,Electrical Engineering,Employed,Power sector,,1.0,3.0,Flexible working hours,Lack of human interaction,Flexible working time models,Working Environment,Amount of salary,Both,9/9/20 7:43,ctqu9dyje9cltxtctqu9jbrhzs9jdcpg


Get a table with only engineers

In [None]:
df_engineer=table[(table['Engineer'] == True)]

#get the total nr of engineers
total_engineers=df_engineer['Engineer'].count()

Transform Values in % Function:

In [None]:
def percentage(df):
  for ind in df.index: 
    df[ind]= (df[ind]/total_engineers)*100

Display the Stacked-Bar Graph Function:

In [None]:
def showfig(df,title, texttemplate, width, height,x_title, y_title):

  fig = go.Figure()

  #iterate over the dataframe and siplay the bars
  for key in df:
    fig.add_trace(go.Bar(
        y=df.index,
        x=df[key],
        name=key,
        orientation='h',
        text=df[key],
        textposition='inside',
        texttemplate= texttemplate,
        textfont_color="white"
    
  ))


  #layout
  fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'}, xaxis_title=x_title,
    yaxis_title=y_title)
  fig.update_layout(title=title, bargap=0.5,
                    plot_bgcolor='rgb(248, 248, 255)', paper_bgcolor='rgb(248, 248, 255)', width=width, height=height)

  fig.show()

Display the Bar-graph Function:


In [None]:
def showfig_bar(df,title, width, height,x_title, y_title):

  fig = px.bar(y=df.index, x=df,orientation='h',
                labels={ # replaces default labels by column name
                  "y": y_title,  "x": x_title

              })
  fig.update_traces(text=df,texttemplate='%{text:.2s}'+ '%', #textfont_color="rgba(99,110,250,255)", 
                    textposition='outside', #marker_color='rgba(50, 171, 96, 0.6)'
                  )
  fig.update_layout(title=title, bargap=0.5,
                    plot_bgcolor='rgb(248, 248, 255)', paper_bgcolor='rgb(248, 248, 255)',width=width, height=height)


  fig.show()

##**Ranking Question**: "*Rank the importance of the following working conditions/offers for you.*"

###General Overview of the Question

In [None]:
#take only the columns related to the ranking question 
df = df_engineer.loc[:,['First Choice','Second Choice','Third Choice']]

#count values per column
df = df.apply(pd.value_counts)

#add a total column, which will allow df to be sorted
df['total'] = df.sum(axis=1)
df = df.sort_values('total', ascending=True)

#show the graphic

showfig(df.iloc[:, :3],"Rank the importance of the following working conditions/offers for you.", '%{text:.2s}', 1500,500, "Resp.", "Conditions")



## **Advantages of Remote Working**: "*What is the biggest advantage of remote Working for you?*"

###General Overview

In [None]:
#group by advantages, count the occurences and sort values
df = df_engineer.groupby("Advantage").size().sort_values(ascending=True)

#get total number of entries
total=df.sum()

#convert to %
percentage(df)


#show plot

showfig_bar(df.iloc[6:,],'What is the biggest advantage of Remote Working for you?', 1000, 450, "Resp.", "Advantages")





##**Disadvantage of Remote Working**

###General Overview

In [None]:
#group by disadvantages, count the occurences and sort values
df = df_engineer.groupby("Disadvantage").size().sort_values(ascending=True)

df=df.iloc[6:,]

#convert to %
percentage(df)


#show plot

showfig_bar(df,'What is the biggest disadvantage of Remote Working for you?', 1000, 450, "Resp.", "Disadvantages")


###Avarage Age of the Response for each Choice

In [None]:
table[["Disadvantage", "Age"]].groupby("Disadvantage").mean()


Unnamed: 0_level_0,Age
Disadvantage,Unnamed: 1_level_1
Bad technical conditions,29.761905
Have not worked from home,27.0
I don t have a job,21.0
Interruption at home,28.16
Lack of human interaction,26.727273
No disadvantage for me,38.0
No separation between work and home life,26.008
Older colleagues’ lack of trust,24.0
Time zone difference,27.666667
Unemployed,24.0


###Which countries choose bad thenical conditions?

In [None]:
#filter the table
df=table[["Disadvantage", "Country"]]
df=table[(table["Disadvantage"] == 'Bad technical conditions')]

#total awnsers per country
df.groupby('Country').size().sort_values(ascending=False)

Country
Spain             8
Germany           5
Romania           3
Italy             2
United Kingdom    1
Estonia           1
Austria           1
dtype: int64

##**Increased Productivity**: "*Where do you feel more productive (home, office or a hybrid version)?*"

###General Overview

In [None]:
#grouby Office and get the total
df = df_engineer.groupby("Office").size().sort_values(ascending=True)

#convert to %
percentage(df)



#show plot
showfig_bar(df,'Where do you feel more productive (home, office or a hybrid version)?', 1000, 400, "Resp.", "Place")


                  

###Employed vs Student Comparison

In [None]:
# groupby Employment and Office and get the counts
df = df_engineer.groupby(['Employment', 'Office']).Office.count().reset_index(name='counts')

# pivot the df
df = df.pivot(index='Employment', columns='Office', values='counts')

#get the total 
df['total'] = df.sum(axis=1)

# add a total column, which will allow df to be sorted
df= df.sort_values('total', ascending='True')

df=df.iloc[8:,]

#convert to %
df['Both']= (df['Both']/df['total'])*100
df['Home']= (df['Home']/df['total'])*100
df['Office']= (df['Office']/df['total'])*100

#show plot

showfig(df.iloc[:, :3],'Where do you feel more productive (Employed vs. Student)?', '%{text:.2s}'+ '%', 1000, 400, "Resp.", "Work Place")



###Engineers vs. Non-engineers Comparison

In [None]:
df = table.loc[:,['Engineer','Office']]

#replace true and false values
df['Engineer'] = df['Engineer'].map( {True:'Engineers' ,False:'Non-engineers'}) 

# groupby Engineer and Office and get the counts 
df = df.groupby(['Engineer', 'Office']).Office.count().reset_index(name='counts')

# pivot the df
df = df.pivot(index='Engineer', columns='Office', values='counts')

# add a total column
df['total'] = df.sum(axis=1)

#convert to %
df['Both']= (df['Both']/df['total'])*100
df['Home']= (df['Home']/df['total'])*100
df['Office']= (df['Office']/df['total'])*100

#show plot

showfig(df.iloc[:, :3],'Where do you feel more productive (Engineers vs. Non-engineers)?', '%{text:.2s}'+ '%', 1000, 400,"Resp.", "Work Place")




###Young Engineers (<30) vs. *old* Engineers (>30)

In [None]:
df = df_engineer.loc[:,['Engineer','Office','Age']]

#replace all the values >=30 with old and all the others with young
df['Age'] = np.where(df['Age']>=30, 'Old Enginners', 'Young Engineers')


# groupby Age and Office and get the counts 
df = df.groupby(['Age', 'Office']).Office.count().reset_index(name='counts')

# pivot the df
df = df.pivot(index='Age', columns='Office', values='counts')

# add a total column
df['total'] = df.sum(axis=1)

#get total for each row
total=df['total'].sum(axis=0)

#convert to %
df['Both']= (df['Both']/df['total'])*100
df['Home']= (df['Home']/df['total'])*100
df['Office']= (df['Office']/df['total'])*100

#show plot
showfig(df.iloc[:, :3],'Where do you feel more productive (Old Engineers (>30) vs. Young Engineers (<30))?', '%{text:.2s}'+ '%', 1000, 400, "Resp.", "Work Place")
