In [1]:
import json
import pandas as pd
import numpy as np
import random

In [2]:
def load_json(provider_name):
    data = []
    with open(f'../skill_recognition/raw_data/provider.{provider_name}.json') as json_file:
        for line in json_file:
            data.append(json.loads(line))
    return data

# credential_engine

In [3]:
cg_json = load_json('credential_engine')

In [4]:
cg_json[0]

{'_id': {'$oid': '5ccc9f44edc27e33d16cdc2b'},
 'ce_id': 'ce-438f33f1-c0be-4c22-a8ba-67b7f0bf172e',
 'description': 'The purpose of the recertification program is to support continuous competence and professional development of BCSP certificants.  The recertification cycle is every five years, which is based on the rate at which practice changes.  BCSP has determined that five years is a reasonable period to measure the activities in which a certificant engages to remain up-to-date with current trends in practice.',
 'description_short': '',
 'effort_min': '',
 'effort_max': '',
 'delivery_type': 'Blended Delivery',
 'place': '',
 'entitlements': [{'price': '', 'currency': ''}],
 'duration': '',
 'weeks_to_complete': None,
 'instructor': [],
 'prerequisites': 'CSP Renewal Requirements',
 'url': 'https://www.bcsp.org/CSP',
 'language': 'en',
 'subjects': '',
 'owner': 'Board of Certified Safety Professionals (BCSP)',
 'outcome': '',
 'learning_type': 'Certificate',
 'skill': [''],
 'titl

In [5]:
cg = pd.DataFrame({'title': [course['title'] for course in cg_json], 
              'description': [course['description'] for course in cg_json],
              'skill': [course['skill'] for course in cg_json]})

In [6]:
cg.shape

(363, 3)

In [7]:
cg.head(10)

Unnamed: 0,title,description,skill
0,Certified Safety Professional (CSP),The purpose of the recertification program is ...,[]
1,"HOSP 215, Front Office",This course presents a systematic approach to ...,[]
2,Computer Science Program at Texas Luthern Univ...,"We believe the analysis of problems, the desig...",[]
3,Bachelor of Science in Information Technology ...,Brandman University's Bachelor of Science in I...,[]
4,Pharmacy Technician Certification Board Certif...,Certificants must complete a minimum of twenty...,[]
5,Energy Industry Fundamentals Certificate,"The Energy Industry Fundamentals course, for w...",[Energy Industry]
6,Insurance Studies Certificate,The Certificate of Completion (CCL) in Insuran...,[Insurance Industry Knowledge]
7,Bachelor of Science in Information Technology ...,This program consists of the following courses...,[]
8,Respiratory Care Associate of Science,Students are admitted to the Respiratory Care ...,[]
9,BOC Level II Training Certificate of Completion,To earn a Level II Training Certificate of Com...,[]


In [8]:
cg['skill'] = cg['skill'].map(lambda x: x[0])

In [9]:
def empty_to_null(x):
    if x == '':
        return np.nan
    else:
        return x

In [10]:
cg['skill'] = cg['skill'].apply(empty_to_null)

In [11]:
cg['skill'].isnull().sum()

273

In [12]:
cg.shape

(363, 3)

In [13]:
cg_labeled = cg[cg['skill'].notna()]

In [14]:
cg_labeled.head(10)

Unnamed: 0,title,description,skill
5,Energy Industry Fundamentals Certificate,"The Energy Industry Fundamentals course, for w...",Energy Industry
6,Insurance Studies Certificate,The Certificate of Completion (CCL) in Insuran...,Insurance Industry Knowledge
10,BOC Level I Training Certificate of Completion,Level I classes support the BOC operator skill...,HVAC
11,Official (ISC)² CISSP Training,Our training helps you fully prepare for this ...,Cybersecurity Strategy
12,NSF Certificate in Food Safety Programs,"NSF International, NSF Training & Education Se...",Food Safety
13,Certified Food Safety Manager Training,Certified Managers have a responsibility to sh...,Food Safety
42,Professional Development Certificate in SQL Se...,An 11-week in-depth program designed to provid...,Microsoft SQL
44,.Net Web Developer Program,A 10-week comprehensive program covering web d...,.NET
47,Graduate Certificate in Cybersecurity - Critic...,The Cybersecurity area of study blends both in...,Infrastructure Security Fundamentals
49,Welding & Fabrication Industry Certification P...,The Monroe Community College Career Welding & ...,MIG (PULSE) Welding


# edx-courses

In [15]:
ec_json = load_json('edx-courses')

In [16]:
ec_json[0]

{'_id': {'$oid': '5c638595edc27e865ed53801'},
 'title': 'Evolution of the Human Sociality: A Quest for the Origin of Our Social Behavior',
 'description_short': 'Learn about the origins of human beings and primatology, the scientific study of primates.',
 'description_full': '<p>Through the process of evolution, animals have developed their biological features and their cultures based on their surrounding environments. How we live our lives today is a direct result of features developed from our primate ancestors as they adapted to new environments.  </p>\n<p>In primatology, it is essential to think about how cultural development and biological natures are inseparable.  </p>\n<p>This course will help you rediscover the process of evolution and will introduce primatological studies conducted by researchers at Kyoto University, Japan. Based on carefully conducted research on primate species, we will explore the origins of human beings and provide you with examples of common similarities 

In [17]:
ec = pd.DataFrame({'title': [course['title'] for course in ec_json], 
              'description': [course['description_full'] for course in ec_json],
              'skill': [course['skills'] for course in ec_json]})

In [18]:
ec.shape

(1406, 3)

In [19]:
ec.head(10)

Unnamed: 0,title,description,skill
0,Evolution of the Human Sociality: A Quest for ...,"<p>Through the process of evolution, animals h...",
1,The Chemistry of Life,<p>Chemistry and biology are traditionally tau...,
2,The Extremes of Life: Microbes and Their Diver...,<p>Life on our planet is diverse. While we can...,
3,应对气候变化的中国视角 | China’s Perspective on Climate C...,<p>This course will provide China’s perspectiv...,
4,Introduction to Statistical Methods for Gene M...,<p>This data course is a primer to statistical...,
5,Chinese History From Warring States to the Tan...,<p><em>Zizhi Tongjian _is one of the earliest ...,
6,文物精品与文化中国：农业与制造业 | Relics in Chinese History -...,<p>Chinese archaeology is one of the fastest g...,
7,文物精品与文化中国：天文与医药 | Relics of Chinese History - ...,<p>Chinese archaeology is one of the fastest-g...,
8,文物精品与文化中国：文字与乐礼 | Relics of Chinese History - ...,<p>Chinese archaeology is one of the fastest-g...,
9,Introduction to Mao Zedong Thought | 毛泽东思想概论,<p>With the world’s biggest population and sec...,


In [20]:
ec['skill'].isnull().sum()

0

In [21]:
ec['skill'] = ec['skill'].apply(empty_to_null)

In [22]:
ec.head(10)

Unnamed: 0,title,description,skill
0,Evolution of the Human Sociality: A Quest for ...,"<p>Through the process of evolution, animals h...",
1,The Chemistry of Life,<p>Chemistry and biology are traditionally tau...,
2,The Extremes of Life: Microbes and Their Diver...,<p>Life on our planet is diverse. While we can...,
3,应对气候变化的中国视角 | China’s Perspective on Climate C...,<p>This course will provide China’s perspectiv...,
4,Introduction to Statistical Methods for Gene M...,<p>This data course is a primer to statistical...,
5,Chinese History From Warring States to the Tan...,<p><em>Zizhi Tongjian _is one of the earliest ...,
6,文物精品与文化中国：农业与制造业 | Relics in Chinese History -...,<p>Chinese archaeology is one of the fastest g...,
7,文物精品与文化中国：天文与医药 | Relics of Chinese History - ...,<p>Chinese archaeology is one of the fastest-g...,
8,文物精品与文化中国：文字与乐礼 | Relics of Chinese History - ...,<p>Chinese archaeology is one of the fastest-g...,
9,Introduction to Mao Zedong Thought | 毛泽东思想概论,<p>With the world’s biggest population and sec...,


In [23]:
ec['skill'].isnull().sum()

1062

In [24]:
ec.shape

(1406, 3)

In [25]:
# take the skill out of the list for labeled entries
ec.loc[ec['skill'].notna(),['skill']] = ec[ec['skill'].notna()]['skill'].map(lambda x: x[0])

In [26]:
ec.head(10)

Unnamed: 0,title,description,skill
0,Evolution of the Human Sociality: A Quest for ...,"<p>Through the process of evolution, animals h...",
1,The Chemistry of Life,<p>Chemistry and biology are traditionally tau...,
2,The Extremes of Life: Microbes and Their Diver...,<p>Life on our planet is diverse. While we can...,
3,应对气候变化的中国视角 | China’s Perspective on Climate C...,<p>This course will provide China’s perspectiv...,
4,Introduction to Statistical Methods for Gene M...,<p>This data course is a primer to statistical...,
5,Chinese History From Warring States to the Tan...,<p><em>Zizhi Tongjian _is one of the earliest ...,
6,文物精品与文化中国：农业与制造业 | Relics in Chinese History -...,<p>Chinese archaeology is one of the fastest g...,
7,文物精品与文化中国：天文与医药 | Relics of Chinese History - ...,<p>Chinese archaeology is one of the fastest-g...,
8,文物精品与文化中国：文字与乐礼 | Relics of Chinese History - ...,<p>Chinese archaeology is one of the fastest-g...,
9,Introduction to Mao Zedong Thought | 毛泽东思想概论,<p>With the world’s biggest population and sec...,


In [27]:
# replace None in description with empty str
ec.loc[ec['description'].isnull(), ['description']] = ''

In [28]:
ec['description'].isnull().sum()

0

In [29]:
ec['description'] = ec['description'].map(lambda x: x.replace('<p>', ''))
ec['description'] = ec['description'].map(lambda x: x.replace('<em>', ''))
ec['description'] = ec['description'].map(lambda x: x.replace('<strong>', ''))

In [30]:
ec_labeled = ec[ec['skill'].notna()]

In [31]:
ec_labeled.shape

(344, 3)

In [32]:
ec_labeled.head(10)

Unnamed: 0,title,description,skill
14,Calculus 1B: Integration,How long should the handle of your spoon be so...,Calculus
15,Calculus 1C: Coordinate Systems & Infinite Series,How did Newton describe the orbits of the plan...,Calculus
54,Behavioural Economics in Action,"How can we get people to save more money, eat ...",Economics
68,Marketing Analytics: Marketing Measurement Str...,Begin your journey in a new career in marketin...,Marketing Analytics
69,Marketing Analytics: Price and Promotion Analy...,"In this marketing course, you will learn how t...",Marketing Analytics
70,Marketing Analytics: Competitive Analysis and ...,"In this marketing course, you will learn about...",Marketing Analytics
71,"Marketing Analytics: Products, Distribution an...","In this marketing course, you will learn how t...",Marketing Analytics
90,Supply Chain Analytics,Supply chains are complex systems involving mu...,Supply Chain Data Analysis
91,Supply Chain Fundamentals,This Supply Chain Fundamentals course is part ...,Supply Chain Knowledge
92,Supply Chain Design,This Supply Chain Design course is part of the...,Supply Chain Model Design


# futurelearn-courses

In [33]:
fl_json = load_json('futurelearn-courses')

In [34]:
fl_json[0]

{'_id': {'$oid': '5c8c4bb0edc27e807c68528c'},
 'uuid': 'baab3813-31a9-4e25-9d97-21ecffca2f97',
 'url': 'https://www.futurelearn.com/courses/good-brain-bad-brain-basics?utm_campaign=Courses+feed&utm_medium=courses-feed&utm_source=courses-feed',
 'image_url': 'https://ugc.futurelearn.com/uploads/images/69/6c/regular_696ca98e-c62a-4be3-8654-adb47abb2167.jpg',
 'introduction': 'What do we know and what are we discovering about the form and function of the human brain? Find out with this online course.',
 'description': 'Understand your own amazing brainWe will look at what your brain is made of, how these components are organised and how they function.This information is helping neuroscientists across the globe understand the brain - and how it is able to do everything from stopping you falling off your bike, to making you feel sad that your football team lost their game, to helping you learn how your brain works.This course is aimed at anyone with an interest in finding out the fundamenta

In [35]:
fl = pd.DataFrame({'title': [course['title'] for course in fl_json], 
              'description': [course['description'] for course in fl_json],
              'skill': [course['skill'] for course in fl_json]})

In [36]:
fl.shape

(931, 3)

In [37]:
fl['skill'].isnull().sum()

0

In [38]:
fl.head(10)

Unnamed: 0,title,description,skill
0,"Good Brain, Bad Brain: Basics",Understand your own amazing brainWe will look ...,[]
1,Forensic Science and Criminal Justice,"Over the past two decades, the criminal justic...",[]
2,Exploring Our Ocean,Discover how you can play a part in the safe k...,[]
3,Physical Theatre: Meyerhold and Biomechanics,Discover Meyerhold’s technique of theatrical b...,[]
4,Begin Programming: Build Your First Mobile Game,Take your first steps into the world of comput...,"[Java, Mobile Development]"
5,Improving Your Image: Dental Photography in Pr...,This free course will cover all aspects of the...,[Dental Technology]
6,Medicines Adherence: Supporting Patients with ...,It is estimated that 30-50% of patients do not...,[]
7,"Corpus Linguistics: Method, Analysis, Interpre...",Learn how to build and query corpora in this p...,[Computational Linguistics]
8,Community Journalism: Digital and Social Media,Community journalism is a growing area of inte...,"[Journalism, Digital Media, Social Media]"
9,Caring for Vulnerable Children,"In times of shrinking public services, the tas...",[]


In [39]:
# because there are values that is empty string (not list)
fl['skill'] = fl['skill'].map(lambda x: x[0] if isinstance(x, list) else None)

In [40]:
fl['skill'] = fl['skill'].apply(empty_to_null)

In [41]:
fl_labeled = fl[fl['skill'].notna()]

In [42]:
fl_labeled.shape

(123, 3)

# skillsoft-courses

In [43]:
ss_json = load_json('skillsoft-courses')

In [44]:
ss_json[0]

{'_id': {'$oid': '5ca04e69edc27eb97ce53f85'},
 'asset_id': 'oper_36_a01_bs_enus',
 'collection': 'BUSINESS STRATEGY and OPERATIONS',
 'curriculum': 'Operations Curriculum',
 'series': 'Six Sigma Black Belt (2015 BOK): Organization-wide Planning and Deployment',
 'link': 'https://www.skillsoft.com/courses/5411926-fundamentals-of-lean-and-six-sigma-and-their-applications/',
 'description': 'Six Sigma is a data-driven improvement strategy that views all activities within an organization as processes. Process inputs can be controlled and adjusted to effect significant improvements in process outputs. Six Sigma uses a rigorous and systematic methodology known as DMAIC (define, measure, analyze, improve, and control) and a number of qualitative and quantitative tools. Its goal is to drive process, product, and service improvements for reducing variation and defects. Lean is also an improvement methodology, but with a different focus. It aims to enhance process flow, reduce cycle time, and el

In [45]:
ss = pd.DataFrame({'title': [course['title'] for course in ss_json], 
              'description': [course['description'] for course in ss_json],
              'skill': [course['skills'] for course in ss_json]})

In [46]:
ss.shape

(464, 3)

In [47]:
ss['skill'].isnull().sum()

0

In [48]:
ss.head(10)

Unnamed: 0,title,description,skill
0,Fundamentals of Lean and Six Sigma and their A...,Six Sigma is a data-driven improvement strateg...,"[Six Sigma, Six Sigma Black Belt]"
1,"Six Sigma Project Selection, Roles, and Respon...","Deployment of Six Sigma, Lean, or another cont...","[Six Sigma, Six Sigma Black Belt]"
2,Six Sigma Strategic Planning and Deployment,Strategic planning of Six Sigma projects and L...,"[Six Sigma, Six Sigma Black Belt]"
3,Impact on Stakeholders and Benchmarking for Si...,The success of Six Sigma deployment in an orga...,"[Six Sigma, Six Sigma Black Belt]"
4,Using Business and Financial Measures in Six S...,Six Sigma improvement begins with assessing th...,"[Six Sigma, Six Sigma Black Belt]"
5,"Six Sigma Team Dynamics, Roles, and Success Fa...",Forming an effective Six Sigma team for drivin...,"[Six Sigma, Six Sigma Black Belt]"
6,Six Sigma Team Facilitation and Leadership,Six Sigma Black Belts must possess specific qu...,"[Six Sigma, Six Sigma Black Belt]"
7,Six Sigma Team Dynamics and Training,Black Belts have the challenging task of manag...,"[Six Sigma, Six Sigma Black Belt]"
8,Determining Requirements by Listening to the V...,Customers are at the heart of all Six Sigma in...,"[Six Sigma, Six Sigma Black Belt]"
9,"Six Sigma Business Case, Project Charter, and ...",A project charter is the most important docume...,"[Six Sigma, Six Sigma Black Belt]"


In [49]:
ss['skill'] = ss['skill'].map(lambda x: x[0])

In [50]:
ss.head(10)

Unnamed: 0,title,description,skill
0,Fundamentals of Lean and Six Sigma and their A...,Six Sigma is a data-driven improvement strateg...,Six Sigma
1,"Six Sigma Project Selection, Roles, and Respon...","Deployment of Six Sigma, Lean, or another cont...",Six Sigma
2,Six Sigma Strategic Planning and Deployment,Strategic planning of Six Sigma projects and L...,Six Sigma
3,Impact on Stakeholders and Benchmarking for Si...,The success of Six Sigma deployment in an orga...,Six Sigma
4,Using Business and Financial Measures in Six S...,Six Sigma improvement begins with assessing th...,Six Sigma
5,"Six Sigma Team Dynamics, Roles, and Success Fa...",Forming an effective Six Sigma team for drivin...,Six Sigma
6,Six Sigma Team Facilitation and Leadership,Six Sigma Black Belts must possess specific qu...,Six Sigma
7,Six Sigma Team Dynamics and Training,Black Belts have the challenging task of manag...,Six Sigma
8,Determining Requirements by Listening to the V...,Customers are at the heart of all Six Sigma in...,Six Sigma
9,"Six Sigma Business Case, Project Charter, and ...",A project charter is the most important docume...,Six Sigma


In [51]:
ss['skill'] = ss['skill'].apply(empty_to_null)

In [52]:
ss['skill'].isnull().sum()

168

In [53]:
ss_labeled = ss[ss['skill'].notna()]

In [54]:
ss_labeled.shape

(296, 3)

# combine all courses

In [55]:
courses_labeled = pd.concat([cg_labeled, ec_labeled, fl_labeled, ss_labeled], 
                   keys=['cg','ec','fl','ss'])

In [56]:
courses_labeled.shape

(853, 3)

In [57]:
courses_labeled.head(30)

Unnamed: 0,Unnamed: 1,title,description,skill
cg,5,Energy Industry Fundamentals Certificate,"The Energy Industry Fundamentals course, for w...",Energy Industry
cg,6,Insurance Studies Certificate,The Certificate of Completion (CCL) in Insuran...,Insurance Industry Knowledge
cg,10,BOC Level I Training Certificate of Completion,Level I classes support the BOC operator skill...,HVAC
cg,11,Official (ISC)² CISSP Training,Our training helps you fully prepare for this ...,Cybersecurity Strategy
cg,12,NSF Certificate in Food Safety Programs,"NSF International, NSF Training & Education Se...",Food Safety
cg,13,Certified Food Safety Manager Training,Certified Managers have a responsibility to sh...,Food Safety
cg,42,Professional Development Certificate in SQL Se...,An 11-week in-depth program designed to provid...,Microsoft SQL
cg,44,.Net Web Developer Program,A 10-week comprehensive program covering web d...,.NET
cg,47,Graduate Certificate in Cybersecurity - Critic...,The Cybersecurity area of study blends both in...,Infrastructure Security Fundamentals
cg,49,Welding & Fabrication Industry Certification P...,The Monroe Community College Career Welding & ...,MIG (PULSE) Welding


In [58]:
#courses_labeled.to_csv('courses_labeled.csv')

In [59]:
courses = pd.concat([cg, ec, fl, ss], 
                   keys=['cg','ec','fl','ss'])

In [60]:
courses.head(30)

Unnamed: 0,Unnamed: 1,title,description,skill
cg,0,Certified Safety Professional (CSP),The purpose of the recertification program is ...,
cg,1,"HOSP 215, Front Office",This course presents a systematic approach to ...,
cg,2,Computer Science Program at Texas Luthern Univ...,"We believe the analysis of problems, the desig...",
cg,3,Bachelor of Science in Information Technology ...,Brandman University's Bachelor of Science in I...,
cg,4,Pharmacy Technician Certification Board Certif...,Certificants must complete a minimum of twenty...,
cg,5,Energy Industry Fundamentals Certificate,"The Energy Industry Fundamentals course, for w...",Energy Industry
cg,6,Insurance Studies Certificate,The Certificate of Completion (CCL) in Insuran...,Insurance Industry Knowledge
cg,7,Bachelor of Science in Information Technology ...,This program consists of the following courses...,
cg,8,Respiratory Care Associate of Science,Students are admitted to the Respiratory Care ...,
cg,9,BOC Level II Training Certificate of Completion,To earn a Level II Training Certificate of Com...,


In [61]:
courses.to_csv('courses_all.csv')

# train-test split

In [62]:
courses = pd.read_csv('./data/courses_labeled.csv')

In [63]:
courses.shape

(853, 5)

In [64]:
courses.head(10)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,title,description,skill
0,cg,5,Energy Industry Fundamentals Certificate,"The Energy Industry Fundamentals course, for w...",Energy Industry
1,cg,6,Insurance Studies Certificate,The Certificate of Completion (CCL) in Insuran...,Insurance Industry Knowledge
2,cg,10,BOC Level I Training Certificate of Completion,Level I classes support the BOC operator skill...,HVAC
3,cg,11,Official (ISC)² CISSP Training,Our training helps you fully prepare for this ...,Cybersecurity Strategy
4,cg,12,NSF Certificate in Food Safety Programs,"NSF International, NSF Training & Education Se...",Food Safety
5,cg,13,Certified Food Safety Manager Training,Certified Managers have a responsibility to sh...,Food Safety
6,cg,42,Professional Development Certificate in SQL Se...,An 11-week in-depth program designed to provid...,Microsoft SQL
7,cg,44,.Net Web Developer Program,A 10-week comprehensive program covering web d...,.NET
8,cg,47,Graduate Certificate in Cybersecurity - Critic...,The Cybersecurity area of study blends both in...,Infrastructure Security Fundamentals
9,cg,49,Welding & Fabrication Industry Certification P...,The Monroe Community College Career Welding & ...,MIG (PULSE) Welding


In [65]:
random.seed(0)

In [66]:
courses_shuffled = courses.sample(frac=1)

In [67]:
courses_shuffled.head(10)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,title,description,skill
291,ec,820,Business Communications,"This course, part of the Business Fundamentals...",Business Communications
365,ec,1075,Machine Learning with Python: from Linear Mode...,Machine learning methods are commonly used acr...,Machine Learning
485,fl,386,Big Data and the Environment,Use current examples to explore the potential ...,Big Data
201,ec,488,Machine Learning,Machine Learning is the basis for the most exc...,Machine Learning
425,ec,1361,Programming Reactive Systems,Reactive programming is a set of techniques fo...,Scala
370,ec,1115,Computing in Python I: Fundamentals and Proced...,"This course starts from the beginning, coverin...",Python
102,ec,98,Pre-University Calculus,"Mathematics is the language of Science, Engine...",Calculus
205,ec,507,Best Practices for Project Management Success,Project management plays a key role in support...,Project Management
80,cg,294,PLUMBING,This series of courses is designed for plumber...,Plumbing
382,ec,1149,Microsoft Professional Capstone : Cybersecurity,The Enterprise Security Hands-On Lab provides ...,Cybersecurity Assessment


In [68]:
test_size = courses.shape[0]//5

In [69]:
test_size

170

In [70]:
test = courses_shuffled[:test_size]

In [71]:
train = courses_shuffled[test_size:]

In [72]:
train.head(10)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,title,description,skill
789,ss,340,Develop the Project Schedule (PMBOK® Guide Six...,This course covers the Estimate Activity Durat...,Project Planning and Development Skills
837,ss,403,The Value Proposition: Getting Your Pitch Right,The value a company offers to customers can co...,Sales
337,ec,954,Microsoft Azure Security Services,"This course is part of the <a href=""https://ww...",Microsoft Azure
572,ss,15,Determining Process Performance and Capability...,"In any improvement initiative, organizations m...",Six Sigma
221,ec,559,Orchestrating Big Data with Azure Data Factory,"This course is part of the <a href=""https://ww...",Microsoft Azure
213,ec,541,Financial Accounting Made Fun: Eliminating You...,Are financial statements a mystery to you? Do ...,Accounting
840,ss,421,Interacting with Customers,Failing to realize the importance of customer ...,Customer Service
802,ss,353,Analyzing Risk (PMBOK® Guide Sixth Edition),This course covers the Perform Qualitative Ris...,Risk Management
463,fl,211,Introduction to Dutch,Start learning Dutch todayAre you planning to ...,Dutch
581,ss,24,Statistical Process Control (SPC) and Control ...,Ensuring a process is in control is critical t...,Six Sigma


In [73]:
train.shape

(683, 5)

In [74]:
test.shape

(170, 5)

In [75]:
train.to_csv('train.csv')

In [76]:
test.to_csv('test.csv')