In [1]:
# Import initial dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [3]:
#Read in CSV
jobs_csv = "Resources/Google_job_skills.csv"

In [8]:
# Read in data and store in Pandas dataframe
jobs_df = pd.read_csv(jobs_csv)
jobs_df.columns = ["Company", "Title", "Category", "Location", "Responsibilities", "Min_qual", "Pref_qual"]
jobs_df.head(10)

Unnamed: 0,Company,Title,Category,Location,Responsibilities,Min_qual,Pref_qual
0,Google,Google Cloud Program Manager,Program Management,Singapore,"Shape, shepherd, ship, and show technical prog...",BA/BS degree or equivalent practical experienc...,Experience in the business technology market a...
1,Google,"Supplier Development Engineer (SDE), Cable/Con...",Manufacturing & Supply Chain,"Shanghai, China",Drive cross-functional activities in the suppl...,BS degree in an Engineering discipline or equi...,"BSEE, BSME or BSIE degree.\nExperience of usin..."
2,Google,"Data Analyst, Product and Tools Operations, Go...",Technical Solutions,"New York, NY, United States",Collect and analyze data to draw insight and i...,"Bachelor’s degree in Business, Economics, Stat...",Experience partnering or consulting cross-func...
3,Google,"Developer Advocate, Partner Engineering",Developer Relations,"Mountain View, CA, United States","Work one-on-one with the top Android, iOS, and...",BA/BS degree in Computer Science or equivalent...,"Experience as a software developer, architect,..."
4,Google,"Program Manager, Audio Visual (AV) Deployments",Program Management,"Sunnyvale, CA, United States",Plan requirements with internal customers.\nPr...,BA/BS degree or equivalent practical experienc...,CTS Certification.\nExperience in the construc...
5,Google,"Associate Account Strategist (Czech/Slovak), G...",Technical Solutions,"Dublin, Ireland",Communicate with customers via phone and email...,Bachelor's degree or equivalent practical expe...,"Experience in sales, customer service, account..."
6,Google,"Supplier Development Engineer, Camera, Consume...",Hardware Engineering,"Mountain View, CA, United States",Manage cross-functional activities in the supp...,BS degree in Engineering or equivalent practic...,Master's degree.\nExperience in the developmen...
7,Google,"Strategic Technology Partner Manager, Healthca...",Partnerships,"Sunnyvale, CA, United States",Lead the development and strategy with partner...,BA/BS degree or equivalent practical experienc...,"BA/BS degree in a technical, life sciences or ..."
8,Google,"Manufacturing Business Manager, Google Hardware",Manufacturing & Supply Chain,"Xinyi District, Taiwan",Develop CM/ODM strategy and implement supplier...,"BA/BS degree in Engineering, Supply Chain or e...",MBA degree.\nExperience in procurement and sup...
9,Google,"Solutions Architect, Healthcare and Life Scien...",Technical Solutions,"New York, NY, United States",Help compile customer requirements as well as ...,"BA/BS degree in Computer Science, related Soft...","Master's degree in Computer Science, related E..."


In [11]:
# Determine which skills are most desired based on word frequency
# Get word count per word in minimum qualifications and preferred qualifications columns using Pandas
pd.Series(np.concatenate([x.split() for x in jobs_df.Min_qual.astype(str)])).value_counts()

in             2357
or             2209
and            2043
a              1153
experience.    1113
               ... 
Statistics)       1
ドライバや、Linux       1
Hardware          1
natively          1
form              1
Length: 3245, dtype: int64

In [12]:
pd.Series(np.concatenate([x.split() for x in jobs_df.Pref_qual.astype(str)])).value_counts()

and         6244
to          2965
in          2456
with        2411
of          1639
            ... 
uses           1
OEMs,          1
rapport.       1
Broad,         1
surround       1
Length: 5590, dtype: int64

In [18]:
# Changing method to use qualifcation strings and list of programming languages to view only relevant results
# Creating a list of programming languages to abbreviated as pl
# List is created using the top 20 as provided by towardsdatascience.com
pl = ["Java", "C", "Python", "C++", "C#", "Visual Basic .NET", "JavaScript", "PHP", "Swift", "SQL", "Ruby", "Delphi", "Objective-C", "Go", 
      "Assembly language", "Visual Basic", "D", "R", "Perl", "MATLAB"]

In [20]:
# importing regular expression dependency
import re

In [30]:
# converting column to list of strings for regular expression
Min_qual_text = jobs_df.Min_qual.tolist()
Min_qual_text = ''.join(map(str, Min_qual_text))

In [31]:
# chekcing list
print(Min_qual_text)

BA/BS degree or equivalent practical experience.
3 years of experience in program and/or project management in cloud computing, enterprise software and/or marketing technologies.BS degree in an Engineering discipline or equivalent practical experience.
7 years of experience in Cable/Connector Design or Manufacturing in an NPI role.
Experience working with Interconnect Engineering and Product Design (PD)/Mechanical Engineer in developing, manufacturing and testing.
Ability to speak and write in English and Mandarin fluently and idiomatically.Bachelor’s degree in Business, Economics, Statistics, Operations Research or related analytical field, or equivalent practical experience.
2 years of work experience in business analysis.
1 year of experience with statistical modeling, forecasting or machine learning. Experience with R, SAS or Python.
1 year of experience developing and manipulating large datasets.BA/BS degree in Computer Science or equivalent practical experience.
Experience workin

In [32]:
# Using regular expression to count the value of each word in pl for min qual and pref qual columns
pl_count = dict((x,0) for x in pl)
for p in re.findall(r"[\w'+#-]+|[.!?;’]", Min_qual_text):
    if p in pl_count:
        pl_count[p] += 1
# print
print(pl_count)

{'Java': 76, 'C': 18, 'Python': 97, 'C++': 54, 'C#': 15, 'Visual Basic .NET': 0, 'JavaScript': 74, 'PHP': 7, 'Swift': 0, 'SQL': 73, 'Ruby': 14, 'Delphi': 0, 'Objective-C': 3, 'Go': 26, 'Assembly language': 0, 'Visual Basic': 0, 'D': 6, 'R': 37, 'Perl': 6, 'MATLAB': 16}


In [33]:
# repeating process for preferred qualifications
# converting column to list of strings for regular expression
Pref_qual_text = jobs_df.Pref_qual.tolist()
Pref_qual_text = ''.join(map(str, Pref_qual_text))

In [34]:
pl_count = dict((x,0) for x in pl)
for p in re.findall(r"[\w'+#-]+|[.!?;’]", Pref_qual_text):
    if p in pl_count:
        pl_count[p] += 1
# print
print(pl_count)

{'Java': 50, 'C': 20, 'Python': 79, 'C++': 35, 'C#': 6, 'Visual Basic .NET': 0, 'JavaScript': 57, 'PHP': 21, 'Swift': 2, 'SQL': 64, 'Ruby': 6, 'Delphi': 0, 'Objective-C': 5, 'Go': 9, 'Assembly language': 0, 'Visual Basic': 0, 'D': 9, 'R': 20, 'Perl': 11, 'MATLAB': 6}
