# 1. Imports

In [1]:
import re

#import nltk
#Uncomment for first run to download datasets.
#nltk.download()

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from io import StringIO

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC

from sklearn import metrics

# 2. Utilty and Common Functions (Please document sufficiently)

In [2]:
# Given an input string, this method performs the folowing operations:
# 1. Mask string fragments like \x<nn>
# 2. Mask string fragments for escape sequences like \a \b \t \n
# 3. Mask all numbers
# 4. Create a list of all the words containing letters from a-z and A-Z
# 5. Return the final filtered list of words.

def clean(text):
    # Mask string fragments like \x<nn>
    filtered_string = re.sub(r'\\x[a-z]{0,2}[0-9]{0,2}', "", text)
    
    # Mask string fragments for escape sequences like \a \b \t \n
    filtered_string = re.sub(r'\\a+', "", filtered_string)
    filtered_string = re.sub(r'\\b+', "", filtered_string)
    filtered_string = re.sub(r'\\t+', "", filtered_string)
    filtered_string = re.sub(r'\\n+', "", filtered_string)
    
    # Mask all numbers
    filtered_string = re.sub(r'[0-9]+', "", filtered_string)
    
    # Create a list of all the words containing letters from a-z and A-Z
    filtered_list = re.findall(r"\b([a-zA-Z]+)\b", filtered_string, re.M)
    
    # Return the final filtered List
    return filtered_list

In [3]:
# Give an input string this method replaces all the single letter words with empty string,
# basically removing them from the resultant string.

def mask_single_letters(text):
    filtered_string = re.sub(r'r"\b([a-zA-Z]{1})\b"', "", text)
    return filtered_string

In [4]:
# Provided a string, this method creates and returns a set containing all the unique words
# in the passed string.

def unique_words_str(text):
    result = set([])
    for word in text:
        result.add(word)
    
    return result

In [5]:
# Provided a list of strings, this method creates and returns a set containing all the unique words
# considering all the strings in the passed list.

def unique_words_list(data):
    result = set([])
    for text in data:
        result.update(unique_words_str(text))
    
    return result

# 3. Read Dataset and Clean

In [6]:
df = pd.read_csv("resume_data.csv")

In [7]:
df.sample(10)

Unnamed: 0,ID,Category,Resume
336,337,Education,b'JOB SEEKER\n128 Pleasant Street\nMinneapolis...
172,173,Information Technology,b'Jan Stolarek\nInformatics Forum 5.10\n10 Cri...
485,486,Health & Fitness,"b'CURRICULUM VITAE\nEmily S. Pugh, MA, OTR/L, ..."
481,482,Health & Fitness,"b""CV-Zeng, 10/20/2015\n\nCURRICULUM VITAE\nXia..."
942,943,Engineering,b'Sample Resume \xe2\x80\x93 Engineering\nDO N...
174,175,Information Technology,b'SOFTWARE ENGINEER\nDEVELOPER RESUME\nDEVELOP...
1210,1211,Aviation,b'Entry Level Flight Attendant Resume \xe2\x80...
642,643,Sales,"b""Retail Sales Representative Resume\nAdam J. ..."
637,638,Sales,"b'JANE DOE\nFrederick, MD | C: (301) 222-5487 ..."
27,28,HR,"b'RESUME WORLD INC.\n1200 Markham Road, Suite ..."


> ### Drop all attributes except Resume

In [8]:
df = df['Resume']

In [9]:
print("Dataset Size: %d" %(df.shape[0]))

Dataset Size: 1219


In [10]:
df.sample(5)

4       b"HUMAN RESOURCES DIRECTOR\n\xef\x82\xb7Expert...
167     b'Mason\t\r \xc2\xa0Silber\t\r \xc2\xa0\n6595\...
1162    b'Sample Debt Collector Resume Template\nAppli...
414     b'R IC H AR D A. C OUR TN EY,\n\nCELA*\n\n*Cer...
509     b'Resume Example 1 \xe2\x80\x93 Recent Graduat...
Name: Resume, dtype: object

> ### Pick one sample record

In [11]:
sample = df.iloc[10]
print(sample)

b'RESUME WORLD INC.\n1200 Markham Road, Suite 108, Toronto, Ontario M1H 3C3\nTel: (416) 438.3606 / E-mail: info@resumworld.ca\nCAREER PROFILE\nA Human Resources & Payroll Manager with over 15 years of progressive experience augmented by a\nstrong post-secondary background in Human Resources, Accounting and Business Administration.\nExperienced in Payroll Administration, Benefits/Compensation Administration, Talent/Performance\nManagement, Recruitment/Selection, Training/Development, HR Policy and Organizational Design,\nContract Negotiation, Job Costing Analysis, People Management, Progress Improvement, and\nStrategic Planning. Sound knowledge of Labour Relations, Occupational Health & Safety, Pay Equity\nand other related labour laws. Proven ability to function as a Strategic HR Business Partner and develop\nand implement successful human resources management strategies to support corporate mandate.\nCreative and innovative thinker with effective human resources management and goal se

> ### Cleanup

In [12]:
cleaned = clean(sample)
print(cleaned)

['b', 'RESUME', 'WORLD', 'INC', 'Markham', 'Road', 'Suite', 'Toronto', 'Ontario', 'MH', 'CTel', 'E', 'mail', 'info', 'resumworld', 'caCAREER', 'PROFILEA', 'Human', 'Resources', 'Payroll', 'Manager', 'with', 'over', 'years', 'of', 'progressive', 'experience', 'augmented', 'by', 'astrong', 'post', 'secondary', 'background', 'in', 'Human', 'Resources', 'Accounting', 'and', 'Business', 'Administration', 'Experienced', 'in', 'Payroll', 'Administration', 'Benefits', 'Compensation', 'Administration', 'Talent', 'PerformanceManagement', 'Recruitment', 'Selection', 'Training', 'Development', 'HR', 'Policy', 'and', 'Organizational', 'Design', 'Contract', 'Negotiation', 'Job', 'Costing', 'Analysis', 'People', 'Management', 'Progress', 'Improvement', 'andStrategic', 'Planning', 'Sound', 'knowledge', 'of', 'Labour', 'Relations', 'Occupational', 'Health', 'Safety', 'Pay', 'Equityand', 'other', 'related', 'labour', 'laws', 'Proven', 'ability', 'to', 'function', 'as', 'a', 'Strategic', 'HR', 'Business'

In [13]:
print('Original Record Length: %d, Unique Count: %d' %(len(sample), len(unique_words_str(sample))))

Original Record Length: 8202, Unique Count: 76


In [14]:
print('Cleaned Record Length: %d, Unique Count: %d' %(len(cleaned), len(unique_words_list(cleaned))))

Cleaned Record Length: 923, Unique Count: 51


# 4. Analysis (Need to start on this now as of Mar 14, 2019)

In [None]:
col = ['Category', 'Resume']
df = df[col]
df = df[pd.notnull(df['Resume'])]
df.columns = ['Category', 'Resume']
df['category_id'] = df['Category'].factorize()[0]
category_id_df = df[['Category', 'category_id']].drop_duplicates().sort_values('category_id')
category_to_id = dict(category_id_df.values)
id_to_category = dict(category_id_df[['category_id', 'Category']].values)
df

In [None]:
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(10,8))
df.groupby('Category').Resume.count().plot.bar(ylim=0)
plt.show()

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer(sublinear_tf=True, min_df=5, norm='l2', encoding='latin-1', ngram_range=(1, 2), stop_words='english')
features = tfidf.fit_transform(df.Resume).toarray()
labels = df.category_id
features.shape

In [None]:
from sklearn.feature_selection import chi2
import numpy as np
N = 2
for Category, category_id in sorted(category_to_id.items()):
  features_chi2 = chi2(features, labels == category_id)
  indices = np.argsort(features_chi2[0])
  feature_names = np.array(tfidf.get_feature_names())[indices]
  unigrams = [v for v in feature_names if len(v.split(' ')) == 1]
  bigrams = [v for v in feature_names if len(v.split(' ')) == 2]
  print("# '{}':".format(Category))
  print("  . Most correlated unigrams:\n. {}".format('\n. '.join(unigrams[-N:])))
  print("  . Most correlated bigrams:\n. {}".format('\n. '.join(bigrams[-N:])))

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
X_train, X_test, y_train, y_test = train_test_split(df['Resume'], df['Category'], random_state = 0)
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(X_train)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
clf = MultinomialNB().fit(X_train_tfidf, y_train)

In [None]:
software_engineer = '''﻿Irshad Ali
Email Address: irshadali18@gmail.com,irshadali@outlook.com 
Cell: 0321-7588568

Objective
I am looking forward to join a progressive organization. I am Strong team builder and leader. I have high level of personal morals and integrity. I am Goal oriented, self-motivated and committed to the successful outcome of the project. I am willing to work hard and have a great desire to learn.
Summary
    • Since March 2007, have 6 years plus of extensive hands on experience of website development.
    • An experienced team lead and team player with excellent communication and interpersonal skills who has the ability to work independently under pressure.
    • Currently working as Senior Software Engineer/Team Lead at Hashe Computer Solutions.
    • Masters in 2007 from the University College of Information Technology, Lahore, Pakistan.
Skills
Languages/Web Development
PHP, C# .Net, JavaScript, HTML, CSS, Java, XML, SQL
Frameworks
AJAX, Zend, Symfony2,  CodeIgniter
Open Source
Wordpress, Joomla, XCart, CSCart
Databases
MySQL, Oracle
Tools
Netbeans, Dreamweaver, SqlYog, NavicoSoft, MicroOlap


Experience
Hashe Computer Solution, Lahore, Pakistan 
Senior Software Engineer / Team Lead
(March 2008 – To date)
Responsibilities include team management, direct client communication and software development.
Mechtechnologies, Lahore, Pakistan 
Software Engineer
(March 2007 – February 2008)

Projects
Freight Ordering System – Hashe Computer Solutions
Role:
    • Development Lead
Tools:
    • PHP, MySQL, Ajax, JQuery , Web Services
Details:
    • This is a web based system, which provides an online competitive freight quotes within zip code range from best courier & transport companies of the region with favorable discounts and transit days. Later shipment can be booked out of these quotes and tracked though website. Companies can add / manage their locations, product catalog for swift use of the system.
    • This application works with SMC3 to acquire shipment rates for given locations and then apply different accessorial and fuel charges added by admin to calculate final shipment rates.
    • Using the back office application, admin can set different accessorial, discounts, fuel charges, and FAK classes for each company and carrier. Manage the Sales Representatives & this commission for different companies & Carriers.
    • Comprehensive report system provides reports about the shipment, carrier, customer, sale representative commission and billing reports. 
    • Complete Accounting System.

http://www.freightanywhere.com
http://www.tech-logistics.com

Online Golf Course Booking System – Hashe Computer Solutions
Role:
    • Application Developer
Tools:
    • PHP, MySQL, Ajax, JQuery, Web Services
Details:
    • This is a web based system, which provides golfers an easy way to use website to search, compare, and book golf tee times for free. With numerous golf courses available for play in most areas.

http://www.back9booking.com

Ecommerce Shopping System – Hashe Computer Solutions
Role:
    • Application Developer/Team Lead
Tools:
    • PHP, MySQL, JQuery, Zend
Details:
Complete ecommerce shopping system with following features
    • Administration system to easily update all product details, prices, pictures, stock details and other information online.
    • Manage Customer Accounts
    • Manage Wish list
    • Customer Reviews & Rating
    • Manage categories and products
    • Manage Product options and related products
    • Advanced pricing algorithms
    • Order and Invoice history
    • Take payments online using PayPal
    • Shopping cart system to allow easy purchase of products
    • Automatic email notification of orders
    • Full checkout procedure
    • Fast and friendly quick search and advanced search features
    • Reports of site visits, pages viewed, most viewed products, most ordered products and most viewed categories
http://www.tcig.co.uk

Free Home Listing – Hashe Computer Solutions
Role:
    • Application Developer
Tools:
    • PHP, MySQL, JQuery, Codeigniter
Details:
This is a property portal with three access level
    • Customer Login
        ◦ Search Properties by State And City, Key words and Zip Code with option in different miles radius i.e. search all properties having zip code 03055 and within 10 miles radius around it.  
        ◦ Register as Customer
        ◦ Manage their Listings
        ◦ Add/Edit property
        ◦ Add/Edit/Delete Properties Images
        ◦ Delete Properties
    • Agent Login
        ◦ Add/Edit property
        ◦ Add/Edit/Delete Properties Images
        ◦ Delete Properties
    • Admin login
        ◦ Manage Customers (Add/Edit/Delete/Active/Inactive)
        ◦ Manage Customer Packages
        ◦ Manage Agents (Add/Edit/Delete/Active/Inactive)
        ◦ Manage Listings (Add/Edit/Delete/Active/Inactive)
http://demo.hashe.com/freehomelistings/

Rockingham Acres – Hashe Computer Solutions
Role:
    • Application Developer
Tools:
    • PHP, MySQL, JQuery
Details:
This is an Online Flower Store has
    • Online Shopping Cart
    • Word Press Blog
http://www.rockinghamacres.com/


Third Coast Collection – Hashe Computer Solutions
Role:
    • Application Developer
Tools:
    • PHP, MySQL, JQuery
Details:
This website has
    • Online Shopping Cart
    • Authorized .Net Payment Integration
    • Word Press Blog
http://www.thirdcoastcollection.com/

PPA-Office Management System – Hashe Computer Solutions
Role:
    • Application Developer
Tools:
    • PHP, MySQL, JQuery
Details:
PPA (Pakistan Progressive Associate) is  licensed  by  Ministry   of  Labor,  Manpower  and  Overseas   Employment ,   Government of  Pakistan for recruitment  of  manpower.  So PPA-Office Management System is developed to manage & integrate all PPA internal processes (i.e. client, contracts, jobs, job seeker registration, resume bank, recruitment process, and visa & departure process). We split this big system into following modules.
    • Office Workflow Management System Administration: This application will allow the administration to
        ◦ Manage Companies, Contracts
        ◦ Application Configurations
        ◦ Manage invoices
        ◦ Manage administrative expenses
            ▪ Advertisement costs
            ▪ Courier charges
            ▪ Misc. charges to be posted
    • Office Workflow Management System: This application will automate the recruitment process of PPA administration and will implement all the business processes hence allowing straight through processing of jobs. This application will have three separate work flows
        ◦ Pre Processing – Jobs management, Resume management and data entry, short listing, interview scheduling and execution, selection of candidates and forwarding for post-processing. 
        ◦ Post Processing
        ◦ Archiving
    • Online Client / Candidate Portal: This portal will allow
        ◦ PPA administration to manage advertisement jobs
        ◦ PPA affiliated companies to:
            ▪ Login into the system
            ▪ Add jobs
            ▪ View list of candidates forwarded by PPA administration, short list them, add notes
            ▪ Browse/Search (if allowed) resume database, create resume lists, add notes on resumes
        ◦ Potential candidates to:
            ▪ Register
            ▪ Add resumes
            ▪ Search for jobs
    • System will allow the printing of all documents required during the execution of a case. System will allow three types of print
        ◦ Printing with PPA logo
        ◦ Printing without PPA logo – to be printed on PPA letter head
        ◦ Custom printing

NetSignNews.com – Hashe Computer Solutions
Role:
    • Development Lead
Tools:
    • PHP, MySQL
Details:
    • Net Sign News is a specialized news channel for with hearing disabilities. NetSignNews.com is an online news portal for NetSignNews. News videos are streamed on demand using FLV format files. This application has a power administration utility using which administrator can manage the contents being published on the website.

VegaPrint.co.uk – VegaSoft Technologies
Role:
    • Development Lead (Freelance)
Tools:
    • PHP, MySQL
Details:
    • This is print media service provider’s website. Here user can order print media products by paying online payment through PayPal, users can also track there orders online. 
    • Using the back office application, admin can add different products, services, special offers, shipment charges, manage users and orders. 

Bug Tracking – Mechtechnologies
Role:
    • Development Team Member
Tools:
    • PHP, MySQL
Details:
This is a web based application which allows software developers to track new bugs, prioritize and assign bugs to team members, generate bug reports, send email messages between users, attach files, customize the account according to their special needs and more.

Academic Projects
Student Information System - MIT Final Project
    • Student Information System superior University Lahore is a web based application developed in PHP and MySQL as database.


Education
Punjab University College of Information Technology, Lahore, Pakistan 
MSC Information Technology 
Year: 2007

Certifications
Microsoft Technologies (Exam: 70-480)
Microsoft Certified Professional 
Year: 2013
Microsoft Technologies (C# .Net)
EVS Lahore 
Year: 2013

Interests
Computer Gaming
References
References can be provided on request.'''
print(clf.predict(count_vect.transform([software_engineer])))

In [None]:
job_profile = '''﻿
Jyoti Jha Domicile State: Jharkhand, India
Female,  Languages Known: English, Hindi, Bengali and Maithili
CAREER OBJECTIVE
To work with an organization that provides a competitive environment so that I can explore my potentials and make a difference.
PROFILE SUMMARY
· Competent professional with 30 months of experience in Project advisory & consulting and Sales and marketing
· Comprehensive understanding of various government schemes and policies related to agriculture and rural development
· Working experience in developing project feasibilities and DPR’s, field survey, evaluation studies
· Working experience of handling a regional office and organizing stakeholders workshops
· Comprehensive understanding of agricultural production and marketing system, value chain of the different agricultural commodities in India
· Designing & implementing marketing plans for augmenting the business volume by enhancing brand visibility
· Driving sales initiatives to achieve business goals & conducting competitor analysis to track the market trends
PROFESSIONAL EXPERIENCE
Global AgriSystem Pvt Ltd
Manager July 2015-Present (5 months)

 • Data Analysis • Feasibility and Market Studies
Key Expertise • Stakeholder Mapping & Interaction • Project Report preparation
 • Project management  
1. Asian Development Bank’s Agribusiness Infrastructure Development Investment Programme: Transaction Advisory Services (April, 2015 onwards)
This is project is to assist the Government structure the project through a bid process for selection and engagement of private sector partner for investment in the 2 Integrated Value Chains in Bihar.
· Stakeholders mapping and interaction for investment & organized investors meet covering 100 participants from across India.
· Development of various models for PPP investment
2. Asian Development Bank’s Agribusiness Infrastructure Development Investment Programme: Baseline Survey & Post Survey Activities
This project is aimed to develop backward links to the production areas through contract farming and producer companies, capacity building to strengthen technical and managerial skills along the value chain.
· Co-ordination with all Project Management Unit, Government of Bihar
· Questionnaire designing, Pilot Survey, Data Analysis and Report writing
3. World Bank’s Project for ‘Bihar Food Processing Policies and its implications’ (May 2015-June 2015)
The Project aimed to do sectorial analysis of food processing sector in Bihar benchmarking it with the other developed states Review of the regulatory and policy environment to identify policy/regulatory barriers/incentives to investments in food processing and thereof suggesting policy reforms for food processing policy in Bihar.
· Data research, selection of the stakeholders to be interviewed, questionnaire, conducting field survey, meeting with stakeholders, analysis of data & report writing, etc.
4. Marketing Strategies for Organic Produce in Sikkim, Government of Sikkim (December 2015 onwards)
The project aims to develop strategies and action plan for selected commodities in Sikkim for Government of Sikkim.
· Studied the market of national, international for these commodities
· Analyzing and planning the strategies for developing market strategies
5. Managed regional office operations at Patna, Bihar
6. Prepared technical proposals for applying in various Private, Government and International Development Agency projects.
Global AgriSystem Pvt Ltd
Email: jyotijha.niam11@gmail.com Ph. No. +91-7042837028
 
Jyoti Jha Domicile State: Jharkhand, India
Female,  Languages Known: English, Hindi, Bengali and Maithili
Assistant Manager May 2014-June 2015 (13 months)

 • Data Analysis • Feasibility and Market Studies
Key Expertise • Stakeholder Mapping & Interaction • Project Report preparation
 • Project management  
7. Preparation of Commodity Profiles and Conducting Value Chain Studies for Selected Commodities (Maharashtra Agricultural Competitiveness Project) (May 2014-June 2015)
This project was to prepare commodity profile for identified 25 commodities and doing value chain analysis thereof of the 10 priority crop in Maharashtra.
• Prepared and designed the commodity profiles’ methodology and survey tools for identified stakeholders.
• Analyzed survey result & findings and prepared commodities profiles for 25 crops.
8. Impact Evaluation of Sugar Development Fund, Department of Food and Public Distribution, Ministry of Consumer affairs, Food & Public Distribution (Jam 2015-March 2015) It was an impact evaluation study to study the progress and impact of SDF loan on sugar mills across all sugar growing states of India
• Data analysis and report preparation
9. Impact Evaluation and Socio economic study of Bt Cotton in India, Department of Agriculture & Co-operation, Ministry of Agriculture (Dec 2015-June 2015)
This was study conducted in 9 cotton growing states to evaluate the impact of Bt cotton on farmer’s socio economic condition, environment, agriculture economy of India.
• Stakeholders consultation, questionnaire drafting, pilot survey, data analysis and preparation of report
10. Impact Evaluation of Agricultural Marketing Infrastructure Grading and Standardization, Directorate of Marketing and Inspection (April 2014-September 2014)
The project aimed to undertake comprehensive study of various types of projects sanctioned under
the Scheme in selected States. The study was focused on impact of scheme on reduction in Post-Harvest Losses, Value Addition, Increase in Farmers Income
• Information analysis, design survey and research methodology design, questionnaires for different stakeholders, Questionnaires pilot testing, field survey monitoring and coordination, primary data analysis with excel and SPSS, report writing.
11. Prepared technical proposals for applying in various Government and International Development Agency projects.
Monsanto India Ltd.
Territory Sales Manager at Bhavnagar April 2013-April 3014 (12 months)

Key Expertise • Stakeholder Mapping & Interaction • Sales & Marketing • Demand Estimation Survey & Analysis
  
INTERNSHIP
BASF April 2012-June 2012 (3 months)
 
Jyoti Jha Domicile State: Jharkhand, India
Female,                                                                                                                 Languages Known: English, Hindi, Bengali and Maithili

 
Project undertaken To study farmers acceptance level for the BASF package in Cucurbits (West Bengal)
Key learning:
• Understood the agro chemical market by analyzing the competitors in the same segment. • Understood the buying behavior of farmers for agrochemical products.
• Analyzed the importance of channel partner in making marketing strategies in agrochemical industry.
• Prepared strategies to increase sales
• Prepared new product launch strategy for a new agrochemical.
 

Live Project
Philips India June 2012-August 2012 (3 months)

 Project undertaken Customer Delight for low and high involvement products
• Understood the customer preferences and behavior for electronic goods both pre and post-sale of goods.
   
EDUCATION  

Degree Institute/University Year Percentage
PGDABM NIAM, Jaipur (Rajasthan) 2013 73.30%
B.Sc.(Agriculture) Bidhan Chandra Krishi Vishwavidayalaya, Nadia (West Bengal) 2011 82.40%
Sr. Secondary Motilal Nehru Public School, Jamshedpur (Jharkhand) 2007 67.20%
Secondary Motilal Nehru Public School, Jamshedpur (Jharkhand) 2005 74.4%

POSITION OF RESPONSIBILITY
NIAM, Jaipur Core member of Placement Committee and Library committee (2013)
 

EXTRACURRICULAR ACTIVITIES
Social Events and
Academic Competitions • Participated in Global Agri connect held at IARI, Delhi, where I interacted with the eminent
personalities in Agri-business sector (2012)
• Participated in debate at Jaipur National University, Jaipur (2012)
• Participated in the Workshop on Export Import in Jaipuria University, Jaipur (2012)
• Participated in changing tomorrow an International Youth Cultural event based on “Thought
Leadership” held at Jaipur (2011)
• Worked as a volunteer under NSS at Nadia, West Bengal (2011)
Hobbies • Reading novels both fiction and non-fiction '''

print(clf.predict(count_vect.transform([job_profile])))


In [None]:
job_profile = '''﻿
www downloadmela com satheesh mobile email seeking middle
level assignment in human resource management hrm with a
growth oriented organization professional synopsis a dynamic professional 
with  years of experience in hrm broadly in recruitment performance
appraisal compensation training and development policy procedures 
employee engagement activates general administration and personnel
management iso auditing and employee relation currently associated
with thakkar group of companies as assistant manager hr borivli
west proficient in planning strategizing and implementing abilities
with demonstrated success in handling hr related issues as well as
administrative issues expertise in handling hr functions entailing 
recruitment training grievance issues and managing vendors adept
at handling day to day administrative activities in coordination with 
internal external departments for smoothen business operations an 
effective communicator with excellent relationship building 
interpersonal skills strong analytical problem solving and 
organizational ability posses flexible and details orientated
attitude organisational experience tenure company name designation september 
till date thakkar group of companies assistant manager hr april  december  wire and wireless india ltd an essel group 
company zee network sr executive hr january  april  raffles solutions pvt ltd thakral 
group of companies executive hr january  december  perfect careers consultancy coordinator
hr core competencies hr functions recruitment compensation coordinating manpower planning 
recruitment induction exit interviews ensuring culture fit developing introducing new 
recruitment policy as per staffing projections handling salary negotiation payroll
system and compensation administration handling joining formalities inductions employee records and 
file generation assigning employee no new system and policies www downloadmela com defining plans
polices and procedures for gather operational efficiency and employee development developing updating and implementing compensation plans rewards recognition scheme hr policies and communicating them across the organization at all level maintaining an employee master data along with their leave travel administration counseling grievance handling of the employee to maintain healthy work environment and facilitating employee satisfaction hr survey and community development activities performance appraisal handling performance management system and identify scope for enhancing them initiated reward recognition program incentive system and training and development based on the pms identify the talent enhance them for the benefit of an employee as well as organization training and development conducted managerial technical training to enhance to enhance the managerial and soft skills of the employee employee engagement activities introduced knowledge sharing forum k share to help employees in increasing their technical knowledge these interactive forums enable employees to share their technical knowledge with the rest of the teams and it s also a good time out from their regular work schedule it also acts as a platform to discuss new technologies thus benefiting both employees and company in many ways organize employee events birthday parties sports and games and family get together etc introduced r xc xaflexion a platform where the new joiners can interact with the existing employees and get to know each other while having fun general administration coordinating with chartered accountant for tax computation coordinating with employees to ease the tax filing process coordinating with consultant for all the shop and establishment related work client audit management iso auditing iso contribution following the iso procedures for the quality maintenance filing the hr dept activities as per the iso documentation to ensure quality work employee relations this role includes maintaining a good relation with all the employees in case employees are facing some problem helping them out and counseling them if employees are finding it difficult to work with their team members finding out what are the reasons and solving them if it becomes difficult to handle at first level then escalating the matter to next level notable avhievements received appreciation letter from management clients as well as employees for day to day interaction and coordination successfully completed shop and establishment for raffles solutions pvt ltd at mumbai www downloadmela com first rank holder in final year in college qualifications persuing masters of business administration hr from icfai university trichur  diploma in business administration from icfai university trichur  masters of commerce from mumbai university  bachelors of management studies from mumbai university personal details residential address date of birth th june  nationality indian marital status married english hindi gujarati marathi malayalam languages known www downloadmela com
.'''
print(clf.predict(count_vect.transform([job_profile])))

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.model_selection import cross_val_score
models = [
    RandomForestClassifier(n_estimators=200, max_depth=3, random_state=0),
    LinearSVC(),
    MultinomialNB(),
    LogisticRegression(random_state=0),
]
CV = 5
cv_df = pd.DataFrame(index=range(CV * len(models)))
entries = []
for model in models:
  model_name = model.__class__.__name__
  accuracies = cross_val_score(model, features, labels, scoring='accuracy', cv=CV)
  for fold_idx, accuracy in enumerate(accuracies):
    entries.append((model_name, fold_idx, accuracy))
cv_df = pd.DataFrame(entries, columns=['model_name', 'fold_idx', 'accuracy'])
import seaborn as sns
sns.boxplot(x='model_name', y='accuracy', data=cv_df)
sns.stripplot(x='model_name', y='accuracy', data=cv_df, 
              size=8, jitter=True, edgecolor="gray", linewidth=2)
plt.show()

In [None]:
cv_df.groupby('model_name').accuracy.mean()

In [None]:
model = LinearSVC()
X_train, X_test, y_train, y_test, indices_train, indices_test = train_test_split(features, labels, df.index, test_size=0.33, random_state=0)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
from sklearn.metrics import confusion_matrix
conf_mat = confusion_matrix(y_test, y_pred)
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(conf_mat, annot=True, fmt='d',
            xticklabels=category_id_df.Category.values, yticklabels=category_id_df.Category.values)
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()

In [None]:
from sklearn import metrics
print(metrics.classification_report(y_test, y_pred, target_names=df['Category'].unique()))