# **Introduction**
> In this notebook, I will be analyzing 'Google Job Skills' dataset. Firstly I will be analyzing which programming language is popular for Google Jobs.

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 
# pandas for handling our dataset
import pandas as pd
# numpy for numeric operations
import numpy as np
# matplotlib for plotting
import matplotlib.pyplot as plt
# use ggplot style
plt.style.use('ggplot')
# seaborn for beautiful visualizations
import seaborn as sns
# regualar expression
import re
# print inline in this notebook
%matplotlib inline

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

from subprocess import check_output
print(check_output(["ls", "../input"]).decode("utf8"))

# Any results you write to the current directory are saved as output.

In [None]:
# read the data set using pandas .read_csv() method
df_job_skills = pd.read_csv('../input/job_skills.csv')
# print the top 5 row from the dataframe
df_job_skills.head()

In [None]:
# most popular language list 
programing_language_list = ['python', 'java', 'c++', 'php', 'javascript', 'objective-C', 'ruby', 'perl','c','c#', 'sql', 'swift','scala']

In [None]:
# get our Minimum Qualifications column and convert all of the values to a list
minimum_qualifications = df_job_skills['Minimum Qualifications'].tolist()
# let's join our list to a single string and lower case the letter
miniumum_qualifications_string = " ".join(str(v) for v in minimum_qualifications).lower()

In [None]:
# find out which language occurs in most in minimum Qualifications string
wordcount = dict((x,0) for x in programing_language_list)
for w in re.findall(r"\w+", miniumum_qualifications_string):
    if w in wordcount:
        wordcount[w] += 1
# print
print(wordcount)

In [None]:
# sort the dict
programming_language_popularity = sorted(wordcount.items(), key=lambda kv: kv[1], reverse=True)

In [None]:
# make a new dataframe using programming_language_popularity for easy use cases
df_popular_programming_lang = pd.DataFrame(programming_language_popularity,columns=['Language','Popularity'])
# Capitalize each programming language first letter
df_popular_programming_lang['Language'] = df_popular_programming_lang.Language.str.capitalize()

In [None]:
# plot
df_popular_programming_lang.plot.bar(x='Language',y='Popularity',figsize=(10,5))
# add a suptitle
plt.suptitle("Programming Languages at Google Jobs", fontsize=18)
# set xlabel to ""
plt.xlabel("")
# change xticks fontsize to 14
plt.xticks(fontsize=14)
# finally show the plot
plt.show()

**Wow! It's look like Python hold the first position.**

In [None]:
miniumum_qualifications_string = " ".join(str(v) for v in minimum_qualifications)

In [None]:
degree_list = ["BA", "BS", "Bachelor's", "PhD"]

In [None]:
wordcount = dict((x,0) for x in degree_list)
for w in re.findall(r"[\w']+|[.,!?;’]", miniumum_qualifications_string):
    if w in wordcount:
        wordcount[w] += 1
# print
print(wordcount)


In [None]:
degree_popularity = sorted(wordcount.items(), key=lambda kv: kv[1], reverse=True)

In [None]:
df_degree_popular = pd.DataFrame(degree_popularity,columns=['Degree','Popularity'])

In [None]:
df_degree_popular = df_degree_popular[::-1] 
# plot
df_degree_popular.plot.barh(x='Degree',y='Popularity',figsize=(15,10), stacked=True)
# add a suptitle
plt.suptitle("Popularity of academic degree at Google Jobs ", fontsize=18)
# set xlabel to ""
plt.xlabel("")
# change xticks fontsize to 14
plt.yticks(fontsize=18)
# finally show the plot
plt.show()

# It's look like BA and BS degree are most popular at Google Jobs

*Thank you for reading the notebook. Please upvote if this helps you and if you have any suggestion please post it at the comment.*
*To be continued*