# Insights
- Location of the jobs
- Top hiring companies
- True proportion of data analysts vs. scientists
- Percentage of overlapped job positions
- Proportion of seniority

# Importing libraries

In [210]:
import re
import sys
import nltk
import unidecode
import numpy as np
import pandas as pd
import seaborn as sns
from nltk import Text
from textblob import TextBlob
from langdetect import detect
from collections import Counter
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from nltk.corpus import stopwords
from googletrans import Translator
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize, RegexpTokenizer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
%matplotlib inline
pd.set_option('display.max_colwidth', None)

In [231]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 210 entries, 0 to 209
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   title        210 non-null    object
 1   company      210 non-null    object
 2   modality     210 non-null    object
 3   description  210 non-null    object
 4   position     210 non-null    object
dtypes: object(5)
memory usage: 8.3+ KB


# Importing the data

In [211]:
df = pd.read_csv('../data/analyst_scientist_bcn_last_month.csv')

# Removing duplicate job offers (different locations only)

In [212]:
df.drop([159, 163, 207], inplace=True)
df.reset_index(drop=True, inplace=True)

In [213]:
df.head(2)

Unnamed: 0,title,company,location,modality,description
0,Language Data Analyst Norwegian Speakers (Barcelona),TransPerfect,"Barcelona, Catalonia, Spain",On-site,"\nJob description\nDataForce is part of the TransPerfect family of companies, the world’s largest provider of language and technology solutions for global business, with offices in more than 100 cities worldwide.\nWe are currently hiring Norwegian speakers in Barcelona to join us in an innovative and interesting project to improve Artificial Intelligence and technology (i.e., speech or text recognition, input methods, keyboard/swipe technology, or other areas of human-machine interaction related to languages).As a Language Data Analyst in DataForce, your main task will be to classify, sort, label, and annotate data that are used to train AI.\nJob requirements\nNo previous experience or training in the field is required - we will teach you all you need to know!Native level proficiency in Norwegian is a must.Great comprehension of English is also required (tests and training materials are in English.\nYou also need to be:\neligible to work in Spain able to work from our offices in Barcelona - this position is onsitedetail-oriented and not afraid of repetitive tasksa team player!\nWhat we offer\nFull-time work in a fast-growing multinational company.Friendly and international environment in the office located in the center of sunny Barcelona.Stable work schedule (we work standard office hours - no shifts or work on weekends!).Competitive salary.\nDoes it sound interesting? Then do not hesitate and hit the Apply button!\n"
1,GO! Graduate IT & Data Analysis,Essity,"Barcelona, Catalonia, Spain",On-site,"\nGO! Graduate - IT Data & Analytics\nDo you want to kick-start a career with purpose and break barriers to improved well-being? Are you thriving in a fast-paced environment where you can make an impact and be part of a global team? The Essity GO! Program offers you a real job with real responsibilities from day one.\nWithin the Global Business Services BU we are looking for candidates to join as GO! Graduate GO! Graduate Technology based in San Joan Despi, in Barcellona to help shaping the future at Essity.\nThe GO! Program is a great way to start to use those skills you have learned and put them to the test in a real job with real responsibilities, while you take the driver’s seat in your development. If you are an ambitious person looking to make an impact, this is the role for you! Working at Essity is not just a career; it is a chance to directly make the world a healthier, more hygienic and safer place.\nAt Essity, we believe every career is as unique as the individual and empower employees to reach their full potential in a winning culture motivated by a powerful purpose. Our commitment is supported by our beliefs: We are committed, we care, we collaborate and we have courage.\n What You Will Do\nAt Essity, we see graduates as playing an important role in challenging the status quo, driving change, and making a positive impact on the business and on hygiene and health globally.\nAs GO! Graduate IT & Data Analysis, you will be involved in various Technology projects to gain experience within projects and implementation of technology through partnership and cooperation with external suppliers and internal customers.\nWork in different areas within and around IT Data & Analytics, preparing you to start a promising career within this areaDevelop skills like Data Modelling / Information Architecture / Dashboarding / Business Requirement Analysis / Project Management / Data Science (ML)Work with and learn latest technologies like SAP Business Warehouse / SAP HANA / SAP Application Cloud / Azure Data Lake / MS PowerBiWork in direct collaboration with both IT & business colleaguesSupport experienced colleagues in daily tasksParticipate into project activities to deliver profitable growthPerform standard technical tasksMonitor technical processes and work methodsCollect and analyze technical data to identify improvement opportunities and propose corrective actionsEnsure collaboration and information flow across multiple businesses and stakeholdersNetwork with other graduates and build networks across Essity\n Who You Are\nYou have a minimum Bachelor degree in Physics / Mathematics / Information Technology / Computer Science or similar completed at program start.You have maximum 24 months of work experience after graduation. Work experience is not required, but appropriate internships in industry is an advantage.You have an interest in IT and can show this through (extra-) curricular activities.You have an excellent level of English is required and also good skills in local language (mention the country specific)You have International mindset from working and/or studying abroad and an interest in collaborating with people from different cultures.You have willingness to travel and relocate if needed.You have an interest in developing and leading yourself, a curiosity to learn new things and an ability to inspire people to achieve things together. With your solution-oriented approach you are eager to make a difference and have the courage to act on own ideas.You are a great networker and communicator who thrives in a changing environment. You have a strong learning agility and ability to adapt to change.\n What We Can Offer You\nAt Essity, we believe everyone's learning and professional development is unique and want to empower employees to reach their full potential in a winning culture motivated by a powerful purpose.\nCollaborative and Inclusive Culture | Empowering & Engaged Leaders | Working with Powerful Purpose & Sustainable Impact | Learning and Growing in your Career | Supporting Well-being & Sustainable Working Life | Life-changing Innovations | Competitive Total rewards\nTogether, we are improving lives, every day.\nWorking at Essity is not just a career; it is a chance to directly make the world a healthier, more hygienic and safer place. With impactful innovations coupled with sustainable solutions, we strive to reach more people every year with the necessary and essential solutions for well-being.\n\n"


# Categorization of positions

## Proportion of job positions raw

In [214]:
df['title'] = df['title'].str.lower()
analysts = len(df[df['title'].str.contains("anal")])
scientists = len(df[df['title'].str.contains("scien")])
engineers = len(df[df['title'].str.contains("engine")])
print(f'Analysts: {round(analysts/df.shape[0]*100)}%')
print(f'Scientists: {round(scientists/df.shape[0]*100)}%')
print(f'Engineers: {round(engineers/df.shape[0]*100)}%')

Analysts: 35%
Scientists: 25%
Engineers: 39%


## Overlapped job offers (cointaining analyst + scientist)

In [215]:
n = 0
overlapped = []
for index, job in df['title'].iteritems():
    if 'scien' in job and 'anal' in job:
        n += 1
        overlapped.append(index)
per = round(n/(analysts+scientists)*100)
print(f'{n} job positions are overlapped, {per}% of all Data Analyst & Data Scientist positions.')

12 job positions are overlapped, 5% of all Data Analyst & Data Scientist positions.


In [216]:
df.iloc[overlapped, 0]

17                                     game data analyst/scientist
24                  data scientist – marketing & network analytics
37                                   senior data analyst/scientist
72            game data scientist/analyst - barcelona gaming giant
86                 global analytics, data and automation scientist
108                  digital marketing data analyst/data scientist
150                 senior data analyst / data scientist : pricing
162                      senior data analyst / data scientist : ua
176                 senior data analyst / data scientist : courses
296    large format marketing data science & business analyst lead
373                      senior data scientist - pricing analytics
382                                      data scientist, analytics
Name: title, dtype: object

### Manual categorization of overlapped job position titles

Manual categorization after reading the descriptions:  
17: analyst  
37: analyst  
72: analyst  
86: scientist  
108: analyst  
150: analyst  
162: analyst  
176: analyst  

In [217]:
# Changing titles for categorization
df.iloc[17, 0] = 'game data analyst'
df.iloc[37, 0] = 'senior data analyst'
df.iloc[72, 0] = 'game data analyst - barcelona gaming giant'
df.iloc[86, 0] = 'data and automation scientist'
df.iloc[108, 0] = 'digital marketing data analyst'
df.iloc[150, 0] = 'senior data analyst : pricing'
df.iloc[162, 0] = 'senior data analyst : ua'
df.iloc[176, 0] = 'senior data analyst : courses'

## Categorizing job position and dropping irrelevant ones

In [218]:
category = []
dropped = []
for index, position in df['title'].iteritems():
    if "data scien" in position:
        category.append('data scientist')
    elif "data anal" in position:
        category.append('data analyst')
    else:
        dropped.append(index)

df.drop(dropped, inplace=True)
df.reset_index(inplace=True, drop=True)
df['position'] = category

# Exploring the data before cleaning it

## Proportion of job positions

In [219]:
df['position'].value_counts()

data analyst      119
data scientist     91
Name: position, dtype: int64

In [220]:
print(f"Analysts: {round(len(df[df['position'] == 'data analyst'])/df.shape[0]*100)}%")
print(f"Scientists: {round(len(df[df['position'] == 'data scientist'])/df.shape[0]*100)}%")

Analysts: 57%
Scientists: 43%


## Location

In [221]:
df['modality'].value_counts()

Unknown    59
On-site    57
Hybrid     49
Remote     45
Name: modality, dtype: int64

## Top hiring companies

In [222]:
df['company'].value_counts()

Glovo                                 19
eDreams ODIGEO                         7
Accenture España                       6
Zurich Insurance                       6
HP                                     6
                                      ..
Vertex Professional Services (VPS)     1
Cofidis España                         1
PPG                                    1
ZF Group                               1
Premier Research                       1
Name: company, Length: 127, dtype: int64

## Average length of descriptions

In [223]:
total = round(df['description'].str.split().apply(len).mean())
analysts = round(df[df['title'].str.contains("anal")]['description'].str.split().apply(len).mean())
scientists = round(df[df['title'].str.contains("scien")]['description'].str.split().apply(len).mean())
print(f'Average: {total}')
print(f'Analysts: {analysts}')
print(f'Scientists: {scientists}')

Average: 574
Analysts: 575
Scientists: 577


# Cleaning

## Dropping the location column

In [224]:
df.drop('location', axis=1, inplace=True)

## Descriptions

In [225]:
# Replacing line jumps with spaces
df['description'] = df['description'].str.replace('\n', ' ')
# Deleting hyphens & ¿
df['description'] = df['description'].map(lambda x: x.replace('-', '').replace('¿', ''))
# Normalising words (no accents, etc.)
df['description'] = df['description'].map(lambda x: unidecode.unidecode(x))
# Adding a space in a word if it contains a capitalised letter in between
df['description'] = df['description'].map(lambda x: re.sub(r"(?<![A-Z])(?<!^)([A-Z])",r" \1", x))
# Adding spaces before and after numbers
df['description'] = df['description'].map(lambda x: re.sub(r"([0-9]+(\.[0-9]+)?)", r" \1", x))
# Substitute n number of spaces by just one space
df['description'] = df['description'].map(lambda x: ' '.join(x.split()))
# Remove spaces before dots and after opening parenthesis
df['description'] = df['description'].map(lambda x: x.replace('( ', '(').replace(' .', '.').replace('/ ', '/'))
# Fixing some problems with the normalisation
df['description'] = df['description'].map(lambda x: x.replace('ano', 'año').replace('anos', 'años'))
# Translating the job offers in spanish
#df['description'] = df['description'].map(lambda x: str(TextBlob(x).translate(to='en'))if detect(x) == 'es' else x)

In [226]:
df.head(2)

Unnamed: 0,title,company,modality,description,position
0,language data analyst norwegian speakers (barcelona),TransPerfect,On-site,"Job description Data Force is part of the Trans Perfect family of companies, the world's largest provider of language and technology solutions for global business, with offices in more than 100 cities worldwide. We are currently hiring Norwegian speakers in Barcelona to join us in an innovative and interesting project to improve Artificial Intelligence and technology (i.e., speech or text recognition, input methods, keyboard/swipe technology, or other areas of humanmachine interaction related to languages). As a Language Data Analyst in Data Force, your main task will be to classify, sort, label, and annotate data that are used to train AI. Job requirements No previous experience or training in the field is required we will teach you all you need to know! Native level proficiency in Norwegian is a must. Great comprehension of English is also required (tests and training materials are in English. You also need to be: eligible to work in Spain able to work from our offices in Barcelona this position is onsitedetailoriented and not afraid of repetitive tasksa team player! What we offer Fulltime work in a fastgrowing multinational company. Friendly and international environment in the office located in the center of sunny Barcelona. Stable work schedule (we work standard office hours no shifts or work on weekends!). Competitive salary. Does it sound interesting? Then do not hesitate and hit the Apply button!",data analyst
1,go! graduate it & data analysis,Essity,On-site,"GO! Graduate IT Data & Analytics Do you want to kickstart a career with purpose and break barriers to improved wellbeing? Are you thriving in a fastpaced environment where you can make an impact and be part of a global team? The Essity GO! Program offers you a real job with real responsibilities from day one. Within the Global Business Services BU we are looking for candidates to join as GO! Graduate GO! Graduate Technology based in San Joan Despi, in Barcellona to help shaping the future at Essity. The GO! Program is a great way to start to use those skills you have learned and put them to the test in a real job with real responsibilities, while you take the driver's seat in your development. If you are an ambitious person looking to make an impact, this is the role for you! Working at Essity is not just a career; it is a chance to directly make the world a healthier, more hygienic and safer place. At Essity, we believe every career is as unique as the individual and empower employees to reach their full potential in a winning culture motivated by a powerful purpose. Our commitment is supported by our beliefs: We are committed, we care, we collaborate and we have courage. What You Will Do At Essity, we see graduates as playing an important role in challenging the status quo, driving change, and making a positive impact on the business and on hygiene and health globally. As GO! Graduate IT & Data Analysis, you will be involved in various Technology projects to gain experience within projects and implementation of technology through partnership and cooperation with external suppliers and internal customers. Work in different areas within and around IT Data & Analytics, preparing you to start a promising career within this area Develop skills like Data Modelling /Information Architecture /Dashboarding /Business Requirement Analysis /Project Management /Data Science (ML) Work with and learn latest technologies like SAP Business Warehouse /SAP HANA /SAP Application Cloud /Azure Data Lake /MS Power Bi Work in direct collaboration with both IT & business colleagues Support experienced colleagues in daily tasks Participate into project activities to deliver profitable growth Perform standard technical tasks Monitor technical processes and work methods Collect and analyze technical data to identify improvement opportunities and propose corrective actions Ensure collaboration and information flow across multiple businesses and stakeholders Network with other graduates and build networks across Essity Who You Are You have a minimum Bachelor degree in Physics /Mathematics /Information Technology /Computer Science or similar completed at program start. You have maximum 24 months of work experience after graduation. Work experience is not required, but appropriate internships in industry is an advantage. You have an interest in IT and can show this through (extra) curricular activities. You have an excellent level of English is required and also good skills in local language (mention the country specific) You have International mindset from working and/or studying abroad and an interest in collaborating with people from different cultures. You have willingness to travel and relocate if needed. You have an interest in developing and leading yourself, a curiosity to learn new things and an ability to inspire people to achieve things together. With your solutionoriented approach you are eager to make a difference and have the courage to act on own ideas. You are a great networker and communicator who thrives in a changing environment. You have a strong learning agility and ability to adapt to change. What We Can Offer You At Essity, we believe everyone's learning and professional development is unique and want to empower employees to reach their full potential in a winning culture motivated by a powerful purpose. Collaborative and Inclusive Culture | Empowering & Engaged Leaders | Working with Powerful Purpose & Sustainable Impact | Learning and Growing in your Career | Supporting Wellbeing & Sustainable Working Life | Lifechanging Innovations | Competitive Total rewards Together, we are improving lives, every day. Working at Essity is not just a career; it is a chance to directly make the world a healthier, more hygienic and safer place. With impactful innovations coupled with sustainable solutions, we strive to reach more people every year with the necessary and essential solutions for wellbeing.",data analyst


# Tokenizing the descriptions + removing stopwords & other things

In [227]:
# Tokenising + lowercase + removing punctuation
tokenizer = RegexpTokenizer(r'\w+')
df['description'] = df['description'].map(lambda x: tokenizer.tokenize(x.lower()))
# Removing stopwords
stop_words = stopwords.words('english')
#for index, des in df['description'].iteritems():
    #df.iloc[index, 3] =  [w for w in des if not w in stop_words]
# Lemmatization
def lemma(text):
    return [WordNetLemmatizer().lemmatize(w) for w in text]
df['description'] = df['description'].apply(lemma)

In [229]:
df.head(2)

Unnamed: 0,title,company,modality,description,position
0,language data analyst norwegian speakers (barcelona),TransPerfect,On-site,"[job, description, data, force, is, part, of, the, trans, perfect, family, of, company, the, world, s, largest, provider, of, language, and, technology, solution, for, global, business, with, office, in, more, than, 100, city, worldwide, we, are, currently, hiring, norwegian, speaker, in, barcelona, to, join, u, in, an, innovative, and, interesting, project, to, improve, artificial, intelligence, and, technology, i, e, speech, or, text, recognition, input, method, keyboard, swipe, technology, or, other, area, of, humanmachine, interaction, related, to, language, a, a, language, data, analyst, in, data, force, your, main, task, will, be, to, classify, sort, label, and, annotate, data, that, are, used, ...]",data analyst
1,go! graduate it & data analysis,Essity,On-site,"[go, graduate, it, data, analytics, do, you, want, to, kickstart, a, career, with, purpose, and, break, barrier, to, improved, wellbeing, are, you, thriving, in, a, fastpaced, environment, where, you, can, make, an, impact, and, be, part, of, a, global, team, the, essity, go, program, offer, you, a, real, job, with, real, responsibility, from, day, one, within, the, global, business, service, bu, we, are, looking, for, candidate, to, join, a, go, graduate, go, graduate, technology, based, in, san, joan, despi, in, barcellona, to, help, shaping, the, future, at, essity, the, go, program, is, a, great, way, to, start, to, use, those, ...]",data analyst


# Tokenizing the titles + removing stopwords & other things

In [101]:
# Tokenising + lowercase + removing punctuation
df['title'] = df['title'].map(lambda x: tokenizer.tokenize(x.lower()))
# Removing stopwords
for index, title in df['title'].iteritems():
    df.iloc[index, 0] = [w for w in title if not w in stop_words]

AttributeError: 'list' object has no attribute 'lower'

# NLTK objects for titles & descriptions

In [19]:
# titles
all_titles = []
for title in df['title']:
    for word in title:
        all_titles.append(word)
titles = Text(all_titles)

In [49]:
for index, d in df['description'].iteritems():
    d = [w.replace('visualisation', 'visualization') for w in d]
    d = [w.replace('modelling', 'modeling') for w in d]
    d = [w.replace('artificial intelligence', 'ai') for w in d]
    d = [w.replace('dashboarding', 'dashboard') for w in d]
    d = [w.replace('kpis', 'kpi') for w in d]
    d = [w.replace('report', 'reporting') for w in d]
    d = [w.replace('predict', 'prediction') for w in d]
    d = [w.replace('creative', 'creativity') for w in d]
    df['description'][index] = d

In [52]:
# descriptions
all_des = []
all_wrds = []
for des in df['description']:
    all_des.append(des)
    for word in des:
        all_wrds.append(word)
all_wrds = [w.replace('visualisation', 'visualization') for w in all_wrds]
all_wrds = [w.replace('modelling', 'modeling') for w in all_wrds]
all_wrds = [w.replace('artificial intelligence', 'ai') for w in all_wrds]
all_wrds = [w.replace('dashboarding', 'dashboard') for w in all_wrds]
all_wrds = [w.replace('kpis', 'kpi') for w in all_wrds]
all_wrds = [w.replace('report', 'reporting') for w in all_wrds]
all_wrds = [w.replace('predict', 'prediction') for w in all_wrds]
all_wrds = [w.replace('creative', 'creativity') for w in all_wrds]
for l in all_wrds:
    if l == 'team' and all_wrds[all_wrds.index(l) + 1] == 'working':
        all_wrds[all_wrds.index(l) + 1] = 'work'
descriptions = Text(all_wrds)

In [21]:
# descriptions seggregated by job position
all_des_a = []
all_des_s = []
for des in df[df['position'] == 'data analyst']['description']:
    for word in des:
        all_des_a.append(word)
for des in df[df['position'] == 'data scientist']['description']:
    for word in des:
        all_des_s.append(word)

descriptions_a = Text(all_des_a)
descriptions_s = Text(all_des_s)

In [101]:
df['company'].to_csv('companies.csv', index=None, header=True)

# Seniority

In [23]:
# Simple search in the titles
srt = 0
jrt = 0
for title in df['title']:
    if 'senior' in title or 'sr' in title:
        srt += 1
    elif 'junior' in title or 'jr' in title:
        jrt += 1

print(srt)
print(jrt)

44
7


In [24]:
# Simple search in the description
sr = 0
jr = 0

for des in df['description']:
    if 'senior' in des or 'sr' in des:
        sr += 1
    elif 'junior' in des:
        jr += 1

print(sr)
print(jr)

53
13


## Segmenting by seniority

In [25]:
seniority = []
for des in df['description']:
    if 'senior' in des or 'sr' in des:
        seniority.append('senior')
    elif 'junior' in des:
        seniority.append('junior')
    else:
        try:
            s = Text(des).concordance_list('year')[0]
            try:
                y = int(s[0][-1])
                if y < 4:
                    seniority.append('junior')
                else:
                    seniority.append('senior')
            except ValueError:
                seniority.append('unknown')
        except IndexError:
            seniority.append('unknown')

In [26]:
df['seniority'] = seniority
df['seniority'].value_counts()

senior     83
junior     64
unknown    63
Name: seniority, dtype: int64

# Keywords

## Education level

In [27]:
ed_level = ['bachelor', 'master', 'ph']

## Major

In [28]:
major = ['quantitative', 'computer science', 'engineering', 'mathematics','statistic','economics']

## Tools

In [29]:
tools = ['python', 'r', 'sql', 'excel', 'tableau', 'power bi', 'qlik', 'aws', 'azure', 'looker',
        'agile']

## Hard skills

In [3]:
hard_skills = ['programming', 'machine learning','visualization',
               'modeling','research','deep learning', 'analytical skill',
               'optimization', 'automation', 'ab', 'ai', 'dashboard',
               'kpi', 'big data', 'data mining', 'etl', 'reporting', 'segmentation',
               'business intelligence', 'natural language', 'prediction', 'algorithm']

## Soft skills

In [1]:
soft_skills = ['communication', 'team work', 'presentation', 'storytelling', 'decision making',
              'creativity', 'curiosity', 'leadership', 'problem solving', 'attention detail',
              'motivation', 'proactive']

## Keyword extraction

In [88]:
tfidf = TfidfVectorizer(ngram_range=(1,2), token_pattern='(?u)\\b\\w+\\b')
text = [' '.join(doc) for doc in df['description']]
tfidf.fit(text)
dtm = tfidf.transform(text)
dtm = pd.DataFrame(dtm.todense(), columns=tfidf.get_feature_names_out())

In [91]:
key_words = dtm.loc[:, dtm.columns.isin(ed_level + major + tools + hard_skills + soft_skills)]

# DF with keywords

In [92]:
df_key = pd.concat([df, key_words], axis=1)

In [93]:
df_key.drop(['description', 'title'], axis=1, inplace=True)

In [94]:
df_key.groupby('position').agg('mean')

Unnamed: 0_level_0,ab,agile,ai,algorithm,analytical skill,attention detail,automation,aws,azure,bachelor,...,r,reporting,research,segmentation,sql,statistic,storytelling,tableau,team work,visualization
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
data analyst,0.002983,0.003096,0.004362,0.000815,0.004535,0.004938,0.003958,0.001023,0.002404,0.004313,...,0.007112,0.011977,0.004218,0.002054,0.011595,0.0056,0.001729,0.009503,0.002891,0.011211
data scientist,0.002318,0.005574,0.008394,0.018985,0.001737,0.002043,0.002825,0.008828,0.005426,0.004143,...,0.008649,0.00375,0.010673,0.00539,0.008997,0.011822,0.000615,0.003671,0.002685,0.005245


In [96]:
df_key.to_csv('data/df_keys.csv', index=None, header=True)

In [97]:
test = pd.read_csv('../data/keywords.csv')
test

Unnamed: 0,company,modality,position,seniority,ab,agile,ai,algorithm,analytical skill,attention detail,...,r,reporting,research,segmentation,sql,statistic,storytelling,tableau,team work,visualization
0,TransPerfect,On-site,data analyst,unknown,0.0,0.000000,0.042066,0.000000,0.0,0.000000,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00000,0.000000
1,Essity,On-site,data analyst,unknown,0.0,0.000000,0.000000,0.000000,0.0,0.000000,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00000,0.000000
2,Glovo,On-site,data analyst,junior,0.0,0.000000,0.000000,0.000000,0.0,0.038231,...,0.00000,0.000000,0.000000,0.000000,0.019574,0.011530,0.0,0.013602,0.00000,0.012944
3,Essity,On-site,data analyst,unknown,0.0,0.017084,0.000000,0.000000,0.0,0.000000,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00000,0.000000
4,Preply,Unknown,data analyst,senior,0.0,0.000000,0.000000,0.000000,0.0,0.000000,...,0.00000,0.000000,0.000000,0.000000,0.011321,0.013337,0.0,0.015734,0.02569,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
205,Babel,Remote,data scientist,senior,0.0,0.043456,0.000000,0.037606,0.0,0.000000,...,0.00000,0.000000,0.000000,0.000000,0.022025,0.051893,0.0,0.000000,0.00000,0.000000
206,Accenture España,On-site,data scientist,senior,0.0,0.000000,0.073148,0.000000,0.0,0.000000,...,0.01157,0.000000,0.000000,0.000000,0.000000,0.019241,0.0,0.000000,0.00000,0.000000
207,"Solera, Inc.",Remote,data scientist,unknown,0.0,0.000000,0.042967,0.016381,0.0,0.000000,...,0.00000,0.000000,0.000000,0.021772,0.000000,0.000000,0.0,0.000000,0.00000,0.000000
208,Premier Research,Remote,data scientist,senior,0.0,0.000000,0.000000,0.000000,0.0,0.000000,...,0.00000,0.016663,0.095499,0.000000,0.000000,0.000000,0.0,0.000000,0.00000,0.000000


## To Tableau with that!

# Industries
 Extracted from Glassdoor in another notebook