In [1]:
#
###### https://github.com/LucaCanali/Miscellaneous/blob/master/Pyspark_SQL_Magic_Jupyter/IPython_Pyspark_SQL_Magic.py
#
#
# IPython magic functions to use with Pyspark and Spark SQL
# The following code is intended as examples of shorcuts to simplify the use of SQL in pyspark
# The defined functions are:
#
# %sql <statement>          - return a Spark DataFrame for lazy evaluation of the SQL
# %sql_show <statement>     - run the SQL statement and show max_show_lines (50) lines
# %sql_display <statement>  - run the SQL statement and display the results using a HTML table 
#                           - this is implemented passing via Pandas and displays up to max_show_lines (50)
# %sql_explain <statement>  - display the execution plan of the SQL statement
#
# Use: %<magic> for line magic or %%<magic> for cell magic.
#
# Author: Luca.Canali@cern.ch
# September 2016
#

from IPython.core.magic import register_line_cell_magic

# Configuration parameters
max_show_lines = 50         # Limit on the number of lines to show with %sql_show and %sql_display
detailed_explain = True     # Set to False if you want to see only the physical plan when running explain


@register_line_cell_magic
def sql(line, cell=None):
    "Return a Spark DataFrame for lazy evaluation of the sql. Use: %sql or %%sql"
    val = cell if cell is not None else line 
    return sqlContext.sql(val)

@register_line_cell_magic
def sql_show(line, cell=None):
    "Execute sql and show the first max_show_lines lines. Use: %sql_show or %%sql_show"
    val = cell if cell is not None else line 
    return sqlContext.sql(val).show(max_show_lines) 

@register_line_cell_magic
def sql_display(line, cell=None):
    """Execute sql and convert results to Pandas DataFrame for pretty display or further processing.
    Use: %sql_display or %%sql_display"""
    val = cell if cell is not None else line 
    return sqlContext.sql(val).limit(max_show_lines).toPandas() 

@register_line_cell_magic
def sql_explain(line, cell=None):
    "Display the execution plan of the sql. Use: %sql_explain or %%sql_explain"
    val = cell if cell is not None else line 
    return sqlContext.sql(val).explain(detailed_explain)


In [2]:
##############################
###### Load The Delta   ######
##############################
###
### Input delta in folder :  /data/delta
job_dir="/home/notebookuser/notebooks/cv-ngrams-classifier/job-dir/"
json_cv_file=job_dir+"data/delta/json-cv-pdf"
json_cv_table="pdf_cv"
#
ngrams_cv_file=job_dir+"data/delta/cv-files-ngrams"
ngrams_cv_table="ngrams_cv"
#
skills_file=job_dir+"data/delta/role_skills"
skills_table="role_skills"
###
######
##############################Execution##########################
import findspark
findspark.init()
#
#
import pyspark
from pyspark.sql import functions as pfunc
from pyspark.sql import SQLContext
from pyspark.sql import Window, types
import re
import pandas as pd
import numpy as np
from pandas import DataFrame
from pyspark.sql.types import IntegerType
from pyspark.sql.types import FloatType
from pyspark.sql.functions import udf
from pyspark.sql.functions import *
from scipy.stats import kstest
from scipy import stats
#
import subprocess
#
sc = pyspark.SparkContext(appName="Daily_CV_Analysis-Delta")
sqlContext = SQLContext(sc)
#

#
# Join with Internal Curation Data in urltopredict staged folder
from pyspark.sql import functions as F
### use version=1
version=1
## .option("versionAsOf", version)
delta_df1=sqlContext.read.format("delta").load(json_cv_file)\
.persist(pyspark.StorageLevel.MEMORY_AND_DISK_2)
delta_df1.printSchema()
delta_df1.registerTempTable(json_cv_table)
#
delta_df2=sqlContext.read.format("delta").load(ngrams_cv_file)\
.persist(pyspark.StorageLevel.MEMORY_AND_DISK_2)
delta_df2.printSchema()
delta_df2.registerTempTable(ngrams_cv_table)
#
delta_df3=sqlContext.read.format("delta").load(skills_file)\
.persist(pyspark.StorageLevel.MEMORY_AND_DISK_2)
delta_df3.printSchema()
delta_df3.registerTempTable(skills_table)
#
print("Table Loading Done")
#

root
 |-- filename: string (nullable = true)
 |-- pages: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- p_content: string (nullable = true)
 |    |    |-- page_n: string (nullable = true)

root
 |-- Filename: string (nullable = true)
 |-- pagei: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- 1_grams: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- 2_grams: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- 3_grams: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- 4_grams: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- 5_grams: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- 6_grams: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- 1_counts: vector (nullable = true)
 |-- 2_counts: vector (nullable = true)
 |-- 3_counts: vector (nullable = true)
 |-- 4

In [3]:
#
####
#### Expose most frequent Terms in CV pages
####
from pyspark.sql.functions import *
from pyspark.sql.types import *
#
mywords=sqlContext.sql("select filename,pages from pdf_cv where filename IS NOT NULL ") ## ='cv-x1' 
mywords=mywords.select("filename",explode("pages.p_content").alias("p_cont"))\
.select("filename",explode(split(col("p_cont"), "\s+")).alias("terms_in_pages"))
###
mywords.printSchema()
####
filler_words_list=['the','a','of','to','is','or','in','on','for','by','an','The','and','A','at',\
                   'your','as','that','when','their','it','be','with','you','are','It','from','can','usually',\
                   '--','-',':','•','|','●','§','&','–','.','_',';',',','(',')','/',\
                   '1','2','3','4','5','6','7','8','9','0',\
                   'a','b','c','d','e','f','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',\
                   ' ','\n','\n ','  ','\n  ','   ','\n   ','    ','     ','      ','       ','        ','         ','          ','           ']
####
wordCountDF = mywords.filter(~(col("terms_in_pages").isin(filler_words_list))).groupBy("filename","terms_in_pages").count().orderBy(col('count').desc())
####        
wordCountDF.show(138)
#

root
 |-- filename: string (nullable = true)
 |-- terms_in_pages: string (nullable = true)

+-------------------+-----------------+-----+
|           filename|   terms_in_pages|count|
+-------------------+-----------------+-----+
|Candidate2-AWS_Data|             data|   99|
|Candidate4-AWS_Data|             data|   73|
|Candidate4-AWS_Data|             Data|   70|
|Candidate1-AWS_Data|             data|   70|
|Candidate1-AWS_Data|           Oracle|   61|
|Candidate1-AWS_Data|             Data|   38|
|Candidate2-AWS_Data|             Data|   35|
|Candidate4-AWS_Data|           Oracle|   32|
|Candidate1-AWS_Data|            using|   31|
|Candidate4-AWS_Data|            using|   30|
|Candidate2-AWS_Data|           Oracle|   28|
|Candidate2-AWS_Data|         business|   27|
|Candidate1-AWS_Data|               BI|   22|
|Candidate2-AWS_Data|            using|   21|
|Candidate3-AWS_Data|           system|   20|
|Candidate2-AWS_Data|             team|   18|
|Candidate4-AWS_Data|             

In [4]:
#
data_analytics_df1=sqlContext.sql("select * from pdf_cv limit 5")
data_analytics_df1.printSchema()
data_analytics_df1.show(5)
#
#
data_analytics_df2=sqlContext.sql("select * from ngrams_cv limit 5")
data_analytics_df2.printSchema()
data_analytics_df2.show(5)
#
#
data_analytics_df3=sqlContext.sql("select * from role_skills limit 5")
data_analytics_df3.printSchema()
data_analytics_df3.show(5)
#
#
data_analytics_df4=sqlContext.sql(" select distinct(a.filename) from ngrams_cv as a, role_skills as b where b.role = 'devops engineer' AND b.level='5' AND b.skill = 'terraform' AND (array_contains(a.1_grams,b.skill)) limit 10 ")
data_analytics_df4.printSchema()
data_analytics_df4.show(5)
#
#

root
 |-- filename: string (nullable = true)
 |-- pages: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- p_content: string (nullable = true)
 |    |    |-- page_n: string (nullable = true)

+-------------------+--------------------+
|           filename|               pages|
+-------------------+--------------------+
|Candidate4-AWS_Data|[[PAUL VENABLES T...|
|Candidate3-AWS_Data|[[ Daniel C. Dora...|
|Candidate1-AWS_Data|[[ Abhijit Jadhav...|
|Candidate2-AWS_Data|[[ Masood Ahmad  ...|
+-------------------+--------------------+

root
 |-- Filename: string (nullable = true)
 |-- pagei: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- 1_grams: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- 2_grams: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- 3_grams: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- 4_grams: array (nullable = true)
 

In [5]:
%%sql_display
select distinct(a.filename) from ngrams_cv as a, role_skills as b 
 where b.role = 'devops engineer' AND b.level='5' AND b.skill = 'terraform' 
  AND (array_contains(a.1_grams,b.skill)) limit 10


Unnamed: 0,filename
0,Candidate3-AWS_Data
1,Candidate1-AWS_Data


In [8]:
%%sql_display
select * from role_skills limit 50

Unnamed: 0,skill,role,level
0,python,data engineer,2
1,python,data engineer,3
2,python,data engineer,4
3,python,data engineer,5
4,scala,data engineer,3
5,scala,data engineer,4
6,scala,data engineer,5
7,java,data engineer,2
8,java,data engineer,3
9,java,data engineer,4


In [9]:
%%sql_display
select * from ngrams_cv

Unnamed: 0,Filename,pagei,1_grams,2_grams,3_grams,4_grams,5_grams,6_grams,1_counts,2_counts,3_counts,4_counts,5_counts,6_counts,features
0,Candidate4-AWS_Data,"[paul, venables, tel, 44, 0, 7957, 109123, ema...","[paul, venables, tel, 44, 0, 7957, 109123, ema...","[paul venables, venables tel, tel 44, 44 0, 0 ...","[paul venables tel, venables tel 44, tel 44 0,...","[paul venables tel 44, venables tel 44 0, tel ...","[paul venables tel 44 0, venables tel 44 0 795...","[paul venables tel 44 0 7957, venables tel 44 ...","(45.0, 43.0, 14.0, 14.0, 12.0, 4.0, 9.0, 3.0, ...","(3.0, 2.0, 2.0, 2.0, 0.0, 4.0, 4.0, 1.0, 2.0, ...","(1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, ...","(0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, ...","(1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, ...","(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","(45.0, 43.0, 14.0, 14.0, 12.0, 4.0, 9.0, 3.0, ..."
1,Candidate4-AWS_Data,"[paul, venables, tel, 44, 0, 7957, 109123, ema...","[paul, venables, tel, 44, 0, 7957, 109123, ema...","[paul venables, venables tel, tel 44, 44 0, 0 ...","[paul venables tel, venables tel 44, tel 44 0,...","[paul venables tel 44, venables tel 44 0, tel ...","[paul venables tel 44 0, venables tel 44 0 795...","[paul venables tel 44 0 7957, venables tel 44 ...","(66.0, 52.0, 8.0, 11.0, 6.0, 7.0, 4.0, 15.0, 1...","(2.0, 6.0, 2.0, 2.0, 0.0, 5.0, 2.0, 3.0, 0.0, ...","(0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 2.0, 1.0, 0.0, ...","(0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, ...","(1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, ...","(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","(66.0, 52.0, 8.0, 11.0, 6.0, 7.0, 4.0, 15.0, 1..."
2,Candidate4-AWS_Data,"[paul, venables, tel, 44, 0, 7957, 109123, ema...","[paul, venables, tel, 44, 0, 7957, 109123, ema...","[paul venables, venables tel, tel 44, 44 0, 0 ...","[paul venables tel, venables tel 44, tel 44 0,...","[paul venables tel 44, venables tel 44 0, tel ...","[paul venables tel 44 0, venables tel 44 0 795...","[paul venables tel 44 0 7957, venables tel 44 ...","(49.0, 27.0, 16.0, 12.0, 15.0, 8.0, 7.0, 22.0,...","(2.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, ...","(0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, ...","(0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, ...","(1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, ...","(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","(49.0, 27.0, 16.0, 12.0, 15.0, 8.0, 7.0, 22.0,..."
3,Candidate4-AWS_Data,"[paul, venables, tel, 44, 0, 7957, 109123, ema...","[paul, venables, tel, 44, 0, 7957, 109123, ema...","[paul venables, venables tel, tel 44, 44 0, 0 ...","[paul venables tel, venables tel 44, tel 44 0,...","[paul venables tel 44, venables tel 44 0, tel ...","[paul venables tel 44 0, venables tel 44 0 795...","[paul venables tel 44 0 7957, venables tel 44 ...","(30.0, 25.0, 15.0, 5.0, 4.0, 24.0, 4.0, 3.0, 9...","(8.0, 5.0, 0.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, ...","(0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, ...","(1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, ...","(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","(30.0, 25.0, 15.0, 5.0, 4.0, 24.0, 4.0, 3.0, 9..."
4,Candidate4-AWS_Data,"[paul, venables, tel, 44, 0, 7957, 109123, ema...","[paul, venables, tel, 44, 0, 7957, 109123, ema...","[paul venables, venables tel, tel 44, 44 0, 0 ...","[paul venables tel, venables tel 44, tel 44 0,...","[paul venables tel 44, venables tel 44 0, tel ...","[paul venables tel 44 0, venables tel 44 0 795...","[paul venables tel 44 0 7957, venables tel 44 ...","(9.0, 13.0, 0.0, 0.0, 0.0, 1.0, 5.0, 0.0, 0.0,...","(0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, ...","(0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0, 0.0, ...","(0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, ...","(1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, ...","(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","(9.0, 13.0, 0.0, 0.0, 0.0, 1.0, 5.0, 0.0, 0.0,..."
5,Candidate1-AWS_Data,"[abhijit, jadhav, big, data, engineer, archite...","[abhijit, jadhav, big, data, engineer, archite...","[abhijit jadhav, jadhav big, big data, data en...","[abhijit jadhav big, jadhav big data, big data...","[abhijit jadhav big data, jadhav big data engi...","[abhijit jadhav big data engineer, jadhav big ...","[abhijit jadhav big data engineer architect, j...","(24.0, 20.0, 2.0, 9.0, 10.0, 13.0, 6.0, 3.0, 5...","(0.0, 0.0, 5.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, ...","(1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(24.0, 20.0, 2.0, 9.0, 10.0, 13.0, 6.0, 3.0, 5..."
6,Candidate1-AWS_Data,"[professional, experience, 1, company, name, i...","[professional, experience, 1, company, name, i...","[professional experience, experience 1, 1 comp...","[professional experience 1, experience 1 compa...","[professional experience 1 company, experience...","[professional experience 1 company name, exper...",[professional experience 1 company name inawis...,"(25.0, 48.0, 37.0, 4.0, 9.0, 2.0, 15.0, 9.0, 1...","(1.0, 0.0, 4.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, ...","(1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, ...","(3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...","(0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(25.0, 48.0, 37.0, 4.0, 9.0, 2.0, 15.0, 9.0, 1..."
7,Candidate1-AWS_Data,"[responsibilities, architect, build, aws, envi...","[responsibilities, architect, build, aws, envi...","[responsibilities architect, architect build, ...","[responsibilities architect build, architect b...","[responsibilities architect build aws, archite...",[responsibilities architect build aws environm...,[responsibilities architect build aws environm...,"(37.0, 32.0, 21.0, 10.0, 20.0, 16.0, 13.0, 3.0...","(2.0, 0.0, 6.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, ...","(0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, ...","(0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(37.0, 32.0, 21.0, 10.0, 20.0, 16.0, 13.0, 3.0..."
8,Candidate1-AWS_Data,"[my, work, involves, gathering, business, requ...","[my, work, involves, gathering, business, requ...","[my work, work involves, involves gathering, g...","[my work involves, work involves gathering, in...","[my work involves gathering, work involves gat...","[my work involves gathering business, work inv...",[my work involves gathering business requireme...,"(37.0, 17.0, 10.0, 9.0, 12.0, 19.0, 11.0, 3.0,...","(2.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, ...","(1.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ...","(1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...","(0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(37.0, 17.0, 10.0, 9.0, 12.0, 19.0, 11.0, 3.0,..."
9,Candidate1-AWS_Data,"[oracle, apex, xml, dba, activities, like, des...","[oracle, apex, xml, dba, activities, like, des...","[oracle apex, apex xml, xml dba, dba activitie...","[oracle apex xml, apex xml dba, xml dba activi...","[oracle apex xml dba, apex xml dba activities,...","[oracle apex xml dba activities, apex xml dba ...","[oracle apex xml dba activities like, apex xml...","(15.0, 4.0, 9.0, 13.0, 11.0, 20.0, 10.0, 5.0, ...","(2.0, 0.0, 0.0, 0.0, 5.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, ...","(2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(15.0, 4.0, 9.0, 13.0, 11.0, 20.0, 10.0, 5.0, ..."


In [16]:
%%sql_display
select *  from ngrams_cv as a, role_skills as b 
 where b.role = 'devops engineer' AND b.level='3' AND b.skill = 'terraform' 
  AND (array_contains(a.1_grams,b.skill)) limit 10

Unnamed: 0,Filename,pagei,1_grams,2_grams,3_grams,4_grams,5_grams,6_grams,1_counts,2_counts,3_counts,4_counts,5_counts,6_counts,features,skill,role,level
0,Candidate1-AWS_Data,"[abhijit, jadhav, big, data, engineer, archite...","[abhijit, jadhav, big, data, engineer, archite...","[abhijit jadhav, jadhav big, big data, data en...","[abhijit jadhav big, jadhav big data, big data...","[abhijit jadhav big data, jadhav big data engi...","[abhijit jadhav big data engineer, jadhav big ...","[abhijit jadhav big data engineer architect, j...","(24.0, 20.0, 2.0, 9.0, 10.0, 13.0, 6.0, 3.0, 5...","(0.0, 0.0, 5.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, ...","(1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(24.0, 20.0, 2.0, 9.0, 10.0, 13.0, 6.0, 3.0, 5...",terraform,devops engineer,3
1,Candidate1-AWS_Data,"[professional, experience, 1, company, name, i...","[professional, experience, 1, company, name, i...","[professional experience, experience 1, 1 comp...","[professional experience 1, experience 1 compa...","[professional experience 1 company, experience...","[professional experience 1 company name, exper...",[professional experience 1 company name inawis...,"(25.0, 48.0, 37.0, 4.0, 9.0, 2.0, 15.0, 9.0, 1...","(1.0, 0.0, 4.0, 1.0, 0.0, 2.0, 1.0, 1.0, 3.0, ...","(1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, ...","(3.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...","(0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(25.0, 48.0, 37.0, 4.0, 9.0, 2.0, 15.0, 9.0, 1...",terraform,devops engineer,3
2,Candidate3-AWS_Data,"[daniel, c, doran, mobile, 07980894462, page, ...","[daniel, c, doran, mobile, 07980894462, page, ...","[daniel c, c doran, doran mobile, mobile 07980...","[daniel c doran, c doran mobile, doran mobile ...","[daniel c doran mobile, c doran mobile 0798089...","[daniel c doran mobile 07980894462, c doran mo...","[daniel c doran mobile 07980894462 page, c dor...","(0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...",terraform,devops engineer,3
3,Candidate3-AWS_Data,"[daniel, c, doran, mobile, 07980894462, page, ...","[daniel, c, doran, mobile, 07980894462, page, ...","[daniel c, c doran, doran mobile, mobile 07980...","[daniel c doran, c doran mobile, doran mobile ...","[daniel c doran mobile, c doran mobile 0798089...","[daniel c doran mobile 07980894462, c doran mo...","[daniel c doran mobile 07980894462 page, c dor...","(26.0, 4.0, 13.0, 9.0, 12.0, 0.0, 6.0, 2.0, 9....","(0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ...","(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","(26.0, 4.0, 13.0, 9.0, 12.0, 0.0, 6.0, 2.0, 9....",terraform,devops engineer,3


In [6]:
sc.stop()