# Prediction Model for Candidate Join-NoJoin

In [3]:
import pandas as pd 
import spacy
import en_core_web_sm
import re

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)


# We can load the English NLP pipeline in the following way:
nlp = spacy.load("en_core_web_sm")

# Define a custom method for Data Pre-Processing 
def pre_process(reviewTexts):
    cleanReviewList = []
    # Lowercasing
    text = reviewTexts.lower()
    text = text.replace("'", '')
    # Replacing punctuation in the tokens
    text = re.sub(r"[!\"#$%&'()*+,/:;<=>?[\]^_`{|}~]+", " ", text)
    x = " ".join(token.text for token in nlp(text) if token.text.isalpha())
    return x

In [4]:
import docx
import fitz  # PyMuPDF Module for PDF Parsing
import os

# Definining a custom method for reading PDF Resume using PyMuPDF Library
def getPdfResumeText(file) :
    pdfFileObj = fitz.open(file)
    page_count = pdfFileObj.pageCount

    #print("pdfReader.numPages" ,page_count )
    #print(pdfFileObj.metadata)

    # Extract text from every page on the file
    text = [pdfFileObj.loadPage(i).getText("text") for i in range(page_count)]
    return str(text).replace("\\n", "")

# Defining a custom method for reading docs resume
def getDocResumeText(filename):
    return '\n'.join([para.text for para in docx.Document(filename).paragraphs if para.text.strip()])

# Initialise the path where all the resumes are stored
mypath='E:/AMPBA/Term2/FP/FP-Project/Resumes/' #enter your path here where you saved the resumes
resumeFiles = [os.path.join(mypath, f) for f in os.listdir(mypath) if os.path.isfile(os.path.join(mypath, f))]
print(resumeFiles)

#Read content of Resumes into a Datarframe 
resumeText = []
for resume in resumeFiles:
    f,ext = os.path.splitext(resume)
    if (ext == ".pdf"):
        resumeText.append(getPdfResumeText(resume))
    elif (ext == ".docx"):
        resumeText.append(getDocResumeText(resume))
    else:
        continue
        
resumes_df = pd.DataFrame(resumeText, columns = ['Resume'])
resumes_df['Id'] = resumes_df.index + 1
resumes_df

['E:/AMPBA/Term2/FP/FP-Project/Resumes/Aayushi Resume.docx', 'E:/AMPBA/Term2/FP/FP-Project/Resumes/Bhaskar_Verma.pdf', 'E:/AMPBA/Term2/FP/FP-Project/Resumes/Manager- Sumit Chadha.doc', 'E:/AMPBA/Term2/FP/FP-Project/Resumes/Manager- Sumit Chadha.pdf', 'E:/AMPBA/Term2/FP/FP-Project/Resumes/Manish_Manohar_Resume.pdf', 'E:/AMPBA/Term2/FP/FP-Project/Resumes/SiddharthResume.pdf', 'E:/AMPBA/Term2/FP/FP-Project/Resumes/VarnikaGupta_finalresume.pdf']


Unnamed: 0,Resume,Id
0,"RESUME\nAAYUSHI KUMARI\t\t\t\t \nContact No: 9835791952\nEmail id: aayushirajpoot1997@gmail.com\t\t\t \nAmbition :\n- Desirable for a Job where I can use my skills and which will help to me enhance my knowledge and skills. That will also helpful in growth of company.\nEducation :\n2012\t\t10thClass\t\t\tR.K.M.S High college 71%\n2014\t\t12thClass\t\t\tJ.L.N College\t\t 58.3%\n2015-2018\tB.tech\t\t\t\tK.I.T.M College\t\t 75%\nTraining:-\nProfessional Training\ncomplete 6 month Professional training on Core JAVA, Manual Testing from QSpider Noida \nTechnical Ability:\nManual Testing \nBasic Computer Graphics\nProgramming:\nBasic C language\nCore JAVA\nHTML\nTools:\nNet Beans\nEclips\nJDK\nTechnical Activity:-\nProject:\nObject:- Locate me.\nDescription:- The objective of this project is to find location of anything like if we search atm then it display atm near a selected radius.\nTool:- Android.\nPersonal Details:-\nFather’s Name : Krishn Kumar Singh\nOccupation : Farmer\nAddress: Vill- Mirzapur Post- Mirzapur Bihar.\nDeclaration:\nHere I Declare that I am responsible of my all skills and knowledge which are mention in this resume.\nDate: \nAayushi Kumari\n\n\n",1
1,"[' Bhaskar’s Resume This document is the exclusive property of Xavient Digital - Powered by TELUS International. Copying, distribution, electronic mail is strictly prohibited unless written permission is obtained from the authorized personnel. Bhaskar Verma Summary: \uf0b7 Currently I am working as Technical Lead with HCL Technologies and have 7.2 years experience. \uf0b7 Technical proficiency in a high paced production environment. \uf0b7 Experience in Business application software development, production support, team management, designing, coding and testing of variety of software applications with excellent analytical, logical and programming skills. \uf0b7 An effective team player with exceptional planning and execution skills coupled with systematic approach and quick adaptability. \uf0b7 Rich domain knowledge & comprehensive understanding of various Software & designing tools. \uf0b7 Worked on E-Commerce/Financial/Publishing/Hospital-Workflow/Inventory-Billing and Textile domains. \uf0b7 Proficient in working with Java/J2EE, Spring, Spring Boot, Struts, JDBC, Hibernate, JPA and OJB. \uf0b7 Extensive experience and hands on in writing Java applications with design patterns, Oops concepts, optimized codding and algorithms. \uf0b7 Generating the weekly monthly statistics reports related to the support/enhancements and providing them to the senior management. \uf0b7 One point of contact for java applications related issues/escalation and maintain effective communication between the customer and offshore team. \uf0b7 Also has expertise in client side technologies like JavaScript, JQuery, CSS. Technical Skills: Languages/Technology: JAVA, J2EE, Object Oriented Programming, Design Pattern, EJB, Multithreading, Spring, Spring boot, JAXB, Hibernate, JPA, Rest API, Microservices, ELK, Kafka, JUNIT. Operating Systems: Windows XP, Windows 7, Windows 8 and Windows 10 Web Programming Lang: Servlet, JSP, Freemarker Template(FTL), VM, HTML ,CSS3, SASS Database: MySQL Server2005, Mysql Server 5.7.11, Solr Web/App servers: Apache Tomcat, JBOSS Web/App servers: Apache Tomcat, JBOSS Tools: Mysql Query Analyser, Mysql Workbench , Fortify, SonarJava Editor: Eclipse Oxygen, Juno, Helios & Indigo, IntelJIdea 15.1 Project Tools: Maven Education: Degree/Class University/Board Place Year BCA Agra, U.P. 2006-2009 MCA U.P.T.U Greater Noida, U.P. 2009-2012 Professional Experience: Technical Lead, HCL Technologies Oct 2018–Present Project Summary ', ' Bhaskar’s Resume This document is the exclusive property of Xavient Digital - Powered by TELUS International. Copying, distribution, electronic mail is strictly prohibited unless written permission is obtained from the authorized personnel. Client: Delta Dental(US, California) Oct 2018-Present CX-CoreTech(A Ecommerce Domain Based Project. ) Team Size: 15+ project resources Role: Technical Lead. Technology: Windows platform, Oracle, Solr, Java/J2EE, Tomcat, Spring Boot, Microservices Architecture, Rest, SOAP, KAFKA, Spring Data JPA Details: CX-Coretech ecommerce domain based project. It is implementing for provide the reliable platform for sale the products of client on virtual environment. It started very recently from scratch. In this project we are following the microservices architecture with Spring Boot and Spring Data JPA. My roles and responsibility in this project as Technical Lead. I am serving for this project by researches on the technologies and provide the solution of the coming challenges. Sr. Software Engineer, L&T Infotech Dec 2017–Oct 2018 Project Summary Client: ABSA (Bank of South Africa) Feb 2018–Oct 2018 ROA Teller(A Banking Domain Project.) Role: Sr. Software Engineer. Team Size: 20+ project resources Technology: Windows platform, MySQL Server 5.7, Java/J2EE, JBoss 7.1.0, Spring MVC Spring boot 1.5.10, Microservices, Spring JDBC Template, Angular-4. Details: ROA Teller was a banking domain based project. It was just newly started project from scratch, based on transactions of banking with B2B and B2C way. We were implementing the rest api under the microservice architecture using Spring Boot and consuming api using Angular-4 in this project. My role as a Sr. Java Developer and responsibilities are for implementing the business logic and implementation of rest api. Sr. Java Developer, Attune Infocom PVT. LTD. Jan 2017-Dec 2017 Project Summary Role: Sr. Java Developer Client: Pixalere Organization(U.S) Jan 2017-Dec 2017 Pixalere(Patient Management and Treatment Flow Management System ) Team Size: 20+ project resources Technology: Windows platform, MySQL Server 5.7, Java/J2EE, Tomcat-7, Spring MVC, Hibernate, Spring boot, JPA, VM, CSS3, jQuery, Angular 2. Details: Pixalere was patient management system. This project was based on hospital domain. It provides the functionality for set and manage the treatment flow, patient’s status, required treatment, point-out diseases, required tools, doctor’s requirements etc.. Client: Alight Solutions Sep 2017- Dec 2017 Log Aggregation(An IBM Project) Role: Sr. Java developer Team Size: 10+ Technology: Windows platform, Java/J2EE, Spring Boot, Microservices Elastic Search, Logstash, Kibana, Filebeat Details: Log Aggregation was a just start-up application by scratch. The purpose of this application is that provide the rich way to tracking and maintaining the generated logs in bulk of another huge applications. Java Developer, Logixinfo Solutions PVT. LTD. Feb 2014-Jan 2017 Project Summary Client: Orderica Organization(US, California). Feb 2014-Jan 2017 Orderica(Website) ', ' Bhaskar’s Resume This document is the exclusive property of Xavient Digital - Powered by TELUS International. Copying, distribution, electronic mail is strictly prohibited unless written permission is obtained from the authorized personnel. Team Size: 15+ project resources Technology: Windows platform, MySQL Server 5.6, Java/J2EE, Tomcat-7, Spring, Hibernate, JPA, Webservices, FTL, JSP, CSS3, SASS, Compass, Dust.js, jQuery. Details: Orderica was a Multi-Tenancy based E-Commerce domain and Content Management System website, specially based on jewellery business. It was also providing facility for create own website and templates. It has category of world’s best E- Commerce website like Shopify.com. Role: Java Developer + Strong Frontend Handler and Developer. Java Developer, TechSync Consulting Pvt.Ltd. Jun 2012–Oct 2013 Project Summary Inventory & Billing System Client: DELTA. Team Size: 4+ project resources. Technology: Windows platform, MySql, Java/J2EE, Tomcat, Struts, Hibernate. Details: The project was to enhance & maintain Inventory/Billing application. This project was based on Finance domain. Role: Java developer Client: Harsh Policlinic Sep 2012 – Feb 2013 Team Size: 6+ project resources. Technology: Windows platform, MySql, Java/J2EE, Tomcat 7.0, Jsp, Spring, Hibernate. Details: This was a web based hospital management system which has modules like IPD (In Patient details) registration, and OPD (Out Patients details) registration, Stores, Doctor Diagnosis and Labs. Role: Java developer (Bhaskar Verma) ']",2
2,"['Sumit Chadha Contact: +91-7093883741 (India), Email: sumit.chaddha@gmail.com SUMMARY: \uf0b7 16+ years of total experience in DevOps, Release Management and QA. \uf0b7 Proficient in Project Management and Release Management activities. \uf0b7 Rich experience in handling Customer Requirements, Team Management, Interfacing with various stakeholders, testing and Defect Management. \uf0b7 Proficient in communication. Leveraging technical and business knowledge to communicate effectively with various stakeholders. \uf0b7 Strong ability to estimate, implement and manage project, resources, track issues, report status and mitigate risks. \uf0b7 Managing relationships and coordinating work between different teams at different locations. \uf0b7 Expertise in various domains like Retail, Security, HR, e-Commerce, Medical. Onsite client handling experience in USA and Australia. \uf0b7 Worked on tools/language like – Jenkins, Jira, Shell scripting, T-Plan, elementool.com, Silk Performer, VSS. \uf0b7 3 months of previous experience in HSBC, Delhi. ACHIEVEMENTS & AWARDS: \uf0b7 Insta award in Engineering Unit in 2020, 2019, 2018 \uf0b7 250 ESOPs by Infosys for excellent performance in 2017. \uf0b7 Insta award in Engineering Unit in 2015 \uf0b7 Certificate of appreciation for best transition from outgoing vendor in year 2014. \uf0b7 Award for Excellence in Infosys for year 2010 for Internationalization of CA Spectrum product. \uf0b7 PRIMA award in Infosys for year 2010 for Internationalization of CA Spectrum product. \uf0b7 Received Spot Award for best performer in team in year 2007. \uf0b7 Received Spot Award for best performer in team in year 2005. Technical/Process Proficiencies: \uf0b7 Agile tool – Jira \uf0b7 Agile Methodologies - Scrum \uf0b7 Project Management - Microsoft Project, Infosys specific allocation, confirmation tools \uf0b7 Quality - Knowledge of CMMI principals, Experience in Quality audits \uf0b7 Dev ops – Jenkins \uf0b7 Scripting - Shell scripting \uf0b7 Cloud – Basics of AWS \uf0b7 Testing – Functional (Manual) \uf0b7 IAM tool - Siteminder ', ' EDUCATIONAL QUALIFICATIONS: Bachelor of Information Technology – 2003 Shyam Lal College, Delhi University with 65% CBSE Delhi – XII – 1999 with 81% CBSE Delhi – X – 1997 with 82% PROFESSIONAL EXPERIENCE: Organization Role Duration Infosys Limited Senior Project Manager Jul 2019 till date Infosys Limited Project Manager Oct 2015 to Jun 2019 Infosys Limited Technology Lead Oct 2009 to Sep 2015 Infosys Limited Programmer Analyst Feb 2007 to Sep 2009 Infosys Limited Software Engineer May 2004 to Jan 2007 HSBC Temporary Assistant Oct 2003 to Dec 2003 Infosys Limited, Chandigarh, India May 2004 - Present Senior Project Manager Key Responsibilities: \uf0b7 Leading multiple accounts across various geographical locations. \uf0b7 End-To-End project responsibility. \uf0b7 Client handling (Meetings and Status Reporting) \uf0b7 Onsite Coordination \uf0b7 Preparing projects for CMMI and getting the internal assessments done. \uf0b7 Infosys internal management activities like allocations, budget submissions, performance appraisal of team members. Projects Worked Upon (Starting from recent): Project-1 Program Manager for AML DevOps team Customer Leading technology company in USA Period Jan 2020 – Till Date Role Senior Project Manager Key Activities \uf0b7 End to end project management activities \uf0b7 Creation of dashboard for weekly status to client \uf0b7 Onsite Coordination \uf0b7 Handling team issues \uf0b7 Customer handling Project-2 Program Manager for Central DevOps team Customer Leading Bank in Australia Period Jun 2017 – Dec 2019 Role Project Manager ', 'Key Activities \uf0b7 Creation of dashboard for weekly status to client \uf0b7 End to end project management activities \uf0b7 Onsite Coordination \uf0b7 Played role of Scum Master \uf0b7 Handling team issues. \uf0b7 Ideas for productivity improvement Project-3 Release Management for Yard Management System Customer Leading retailer in USA Period Apr 2014 – May2017 Role Project Manager Key Activities \uf0b7 Working as Automation Consultant \uf0b7 Automating complex manual scenarios using shell scripting \uf0b7 Managing release schedules \uf0b7 Creation of dashboard for weekly status to client \uf0b7 Onsite Coordination \uf0b7 Knowledge Transfer from outgoing vendor at the start of the project \uf0b7 Handling team issues. Project-4 CA, Premier Care, Automation Framework development Customer Leading Computer Software provider in US and other countries Period Sep 2013 - Mar 2014 Role Project Manager Key activities \uf0b7 Requirement Gathering \uf0b7 Creation of POC for a test case using QTP \uf0b7 Weekly client meetings and daily status reporting \uf0b7 Handling team issues and getting them resolved \uf0b7 Infosys internal management activities like allocation, billing, and budget. \uf0b7 Handling team issues. Project-5 Malibu, QA of Medical Software Customer Leading Medical device organization in US Period Dec 2012 - Aug 2013 Role QA lead Key Activities \uf0b7 Understanding client product, requirements and processes \uf0b7 Knowledge transfer to other team members. \uf0b7 Onsite Coordination \uf0b7 Testing scenarios which were complex and needed hardware available at onsite only. \uf0b7 Reviewing Test plans and getting them approved in formal client meetings(requirement in medical domain) \uf0b7 Client meetings and Status reporting \uf0b7 Handling team issues. Project-6 CA Siteminder implementation Customer Leading Automobile company in USA/Japan Period Aug 2011 to Nov 2012 Role Siteminder Consultant Key Activities \uf0b7 Understanding requirements of various Intranet sites for the client \uf0b7 Creation of requirements and detailed implementation document and handing over to offshore team ', '\uf0b7 Onsite coordination \uf0b7 Representing production changes in client Change Request meetings. \uf0b7 Client meetings and Status reporting \uf0b7 Handling team issues. Project-7 Siteminder Certification Customer Leading Computer Software provider in US and other countries Period Aug 2010 to Jul 2011 Role QA Lead Key Activities \uf0b7 Understanding CA’s clients specific requirements \uf0b7 To do POC on third party new software versions or new third party tools. \uf0b7 To do changes to make them work with Siteminder \uf0b7 Getting the code changes reviewed with CA \uf0b7 Regression testing CA siteminder with the new software versions/tools. \uf0b7 Onsite coordination. \uf0b7 Client meetings and Status reporting \uf0b7 Handling team issues. Project-8 Spectrum Internationalization(I18N) QA Customer Leading Computer Software provider in US and other countries Period Feb 2009 to July 2011 Role QA Lead Key Activities \uf0b7 Reviewing I18N test plans \uf0b7 Managing the machine setup for OS with various languages \uf0b7 Installing necessary packages on Solaris and Linux boxes for Japanese language \uf0b7 Review of defects opened \uf0b7 Representing QA team in Triage meetings with development teams \uf0b7 Ensuring defects closure with status to all stakeholders \uf0b7 Creating of status reports for I18N testing releases \uf0b7 Handling team issues Project-9 Time Clock Enhancement Customer Leading retailer in USA Period Sep 2008 to Jan 2009 Role QA onsite coordinator Key Activities \uf0b7 Understanding client requirements \uf0b7 Creation of test plans, test cases and getting them reviewed with client \uf0b7 Testing scenarios related to hardware present at onsite \uf0b7 Review of defects opened by offshore team \uf0b7 Representing QA team in Triage Meetings held at onsite \uf0b7 Ensuring defects closure with status to all stakeholders Project-10 CoPilot Health Management System Localization(L10N) Customer Leading health care in USA Period July 2007 to Aug 2008 Role QA Module Lead Key Activities \uf0b7 Reviewing test cases written by other team members. \uf0b7 Working with language experts to test the language specific OS \uf0b7 Reviewing hand written test plans to be sent to FDA (A requirement for medical domain) ', '\uf0b7 Logging defects for team working on 8 different European languages \uf0b7 Writing test cases Project-11 Tactical Merchandizer Customer Leading ecommerce in Australia Period Aug 2006 to June 2007 Role QA Engineer Key Activities \uf0b7 Writing test cases \uf0b7 Manual testing for e-commerce site. \uf0b7 Defect logging and status update as per Triage meetings Project-12 Policy Server QA Customer Leading Computer Software provider in US and other countries Period July 2004 to July 2006 Role QA Engineer Key Activities \uf0b7 Writing test cases \uf0b7 Manual testing \uf0b7 Automation of test cases using Perl on client provided framework \uf0b7 Performance testing using SilkPerformer with client provided performance scripts \uf0b7 Defect logging and status update as per Triage meetings PERSONAL DETAILS: Date of Birth 14th Sep, 1981 Nationality Indian Marital Status Married (having two kids) Language Known English & Hindi ']",3
3,"['Manish Manohar Mobile: +91-9743075566 Email id: manish201manohar@gmail.com To be a successful software professional making significant contribution and providing my best to the company that offers professional growth and personal satisfaction. • Working as Associate at Goldman Sachs (On ITC Infotech payroll) in Software Design & Development with skill set in Java, SpringBoot, Dropwizard and microservices, Reac-Redux-Mobx since March 2019 till present. • Working as Software Engineer at Huawei technology in Software Development with skill set in Object Oriented Analysis/Design and Java Development since feb 2017 till feb 2019. • Worked as Application Developer in IBM India Private Ltd, Bangalore from July 2014-feb 2017. • 6 years of experience in Software Development with skill set in Object Oriented Analysis/Design and Java Developments. • Ability to work with the team ,can takes initiative and works independently as well. • Highly motivated and energetic self-starter with good analytical, organisational, creative and communication skills. • Proven ability to effectively plan, coordinate and meet the deadlines of a project with quality standards. Ensured robust code development and identifies areas of improvement in code development. • Proficient in Sun Microsystems Technologies like JSE(Collection, Threading and Database Connectivity). • Design and Development of Enterprise Applications (J2EE) in Banking domain. • Comprehensive problem solving ability, excellent verbal and written communication skill. CAREER OBJECTIVEPROFESSIONAL EXPERIENCEPROFESSIONAL SUMMARY', '• Working end to end in project delivery including phases like requirement analysis, design, development, status update to client, testing, implementation and deployment into web server. • Experience working in Agile projects and deliver with quality. • Extensively involved in Problem Analysis, Debugging and Tuning in Performance Test environment Project #1 : Trade Settlement Automation Environment : JDK1.8, Spring Boot, DB2, ReactJs,Redux Role : Developer Duration : April 2019 to till Date Server : Apache Tomcat DataBase : Oracle 11g. Description: Working for one of the financial and investment bank dealing in bank loans and mortgages where we have developed a gateway for the automatic processing of any trade by consuming the messages sent by ClearPar(platform for trade settlement) and allocating the same in our db. Responsibilities: • Gather requirements for a business problem and design optimum solution for the same. • Collaborate with various teams in the process to achieve the delivery of solution on time. • Design and Develop high quality code. • Test and Deploy the deliverable. • Support for the product delivered. Project #2 : SQM Environment : JDK1.7, BME Framework,Spring Framework, JUnit, Eclipse Role : Developer Duration : Aril 2016 to till Feb 2018 Server : Apache Tomcat DataBase : Oracle 11g. Description: SQM is aimed at reflecting customer’s perceived quality by accurately visualizing quality issues in multiple views.The measurement data(raw data which is the underlying data to be analyzed) is transmitted periodically across the network(reporting), and quality indicators will be then calculated and prepared for modeling and visualization after the data aggregation SQM employs quantitative measurement data to afford qualitative analysis and visualization features to the service manager. SQM dashboard provides a graphical view of key aspects for each of the service perspective.One of the main sub system in SQM which is mainly dealing with internal data is ISEE which at the core of service management and provides access to devices in the field through TIA servers.TIA servers collect and aggregate raw data reported in each reporting cycle.The aggregated data will made available for MQMC which is also part of ISEE for fulfilling the presentation layer. PROJECT PROFILE', ""Responsibilities: • Involved in the development and enhancement as per the requirements. • Involved in identify the root cause of site issues and fixing. • Involved in development of Service and DAO classes to interact with business layer. • Responsible for providing the spring bean classes to deal as model component. • Fixing find bugs issue. • Writing Junit test cases with Powerckito. • Involved in self code review. • Involved in Automation Framework Development. • Implemented Stub. Project #3 : eBPP (Telecom Domain) Client : Telefonica (UK Telecom Company) Environment : Core Java, J2EE, Spring Framework, JavaScript,DB2,Websphere7.0, AIX Duration : 11 months Summary: Worked in 3 application projects, more than 10 change requests Description: With 9 months of design & development effort, eDocs eaSuite is replaced with custom-built and packaged solution developed by our team which saved build & migration cost, and potential license saving to Telefonica and IBM. The solution is highly scalable and has higher flexibility and lead to significant improvement in the following areas: Higher performance and reliability, Simplified system integration, Call center deflection, Reduced print and mail costs. Project #4 : RBM,Treatment Handler& eBPP Notification Jobs ,EAI Adapter Client : Telefonica (UK Telecom Company) Environment : AIX ,Sql Core Java, J2EE Duration : 15 months Description: A real-time, transaction-based rating and billing application capable of billing for any service or combination of services, and offering the performance required by major carriers handling large volumes of data. It’s Convergent's and Netcracker's billing system which delivers active revenue management. It is delivered to work with a range of goods and services, typically those delivered online. Comptel event link Mediation is using an Application developed by Comptel to carry on tasks. Application is being installed on a AIX Unix Server and there is a GUI to manage these activities. Electronic Bill Presentation and Payment, allows a variety of O2 customers to manage their bills online. Provides other services: Making payments and viewing payment history and Viewing unbilled usage etc. Roles: •Was working as a developer and was involved in Information gathering and design phases. •Development and Unit testing of front-end framework and back-end services. •Presented the application to the client during Client visit. "", 'Responsibilities: •Analyze the incidents and identify the root cause for the issue. And then fix the code to avoid the other customer’s are getting impacted with same issue. •Analyze and modify the code to use optimized queries and proper indexes to get the best performance in production. •Help the Production support team to run various Geneva VPAs and Billing Interfaces and identify the root cause when it fails or reports any error. •Handling releases, migrations and other up-gradation activities. • Languages : Java, J2EE,SQL • Frameworks : Springs,BME,Struts 2.0 ,Hibernate, ReactJs • Operating Systems : Linux, Windows, AIX 6.1 • Scripting Language : JavaScript • Unix scripting : Shell scripting • Databases : Oracle, MySQL • App/Web Servers : Websphere 7.0, Apache Tomcat 6.0 • Tools : RAD 7.5,Eclipse 4.2,Rational Clearcase, AMDOCS • Completed Bachelor of Technology (B.TECH) in 2013 from Hindustan Institute of Technology and Science, Chennai, with 7.2 CGPA with specialization in Information Technology. • Completed 12th in 2008 from NIOS with 65% aggregate with specialization in PCM. • Completed 10th in 2005 from BSEB board with 66.4% aggregate. Gone through 6 months exclusive training on JAVA/J2EE,SQL and Frameworks at Jspiders (Bangalore) in Dec 2013. Date of Birth : 20th April, 1990 Languages Known : English, Hindi Nationality : Indian SOFTWARE SKILLS:EDUCATIONAL PROFILE:TRAINING & CERTIFICATIONPERSONAL DETAILS', 'Current address : 4th Cross, Nalluruhalli Colony,Whitefield, Bangalore - 560066 Marital status : Single Passport number : M7974022']",4
4,"[""Siddharth GargEEE Engineering StudentMy career objective is to work with result oriented team and explore every mile stone to perceive great career with excellence andknowledge.169siddharth@gmail.com09821180551Delhi-110092, NEW DELHI,IndiaTECHNICALSKILLSC++CMatlabMS OfficeArduino IDELANGUAGESEnglishFull Professional ProficiencyHindiFull Professional ProficiencyINTERESTSTravellingReadingMusic and MoviesGymmingEDUCATIONB.Tech EEEAmity University - Noida07/2017 - Present, 6.25 CGPAXII(PCM)Welham Boys' School03/2016, 65%XWelham Boys' School03/2014, 8.4 GPORGANIZATIONSAmity School of engineering and technologyI have participated voluntarily in various events, conferences and college fests as a member ofmanagement/organizing team.PERSONAL PROJECTSMotion Sensing Light - Inhouse practical trainingA project based on electrical automation. It works on PIR sensor which turns on the light on any movementnear it.Application of electric motors in EVs: Challenges in design and operation - SummerInternshipA research was done on electric motors used in EVs and challenges faced in their design and operation. Afinal report was drafted on the topic as part of summer internship since due to covid-19 no practicalinternships were offered.Design of firefighting robotic vehicle - Minor projectA design was made and a working model was developed for the same and further work is being done on itin major project.Sonic fire fighting - Research PaperThe principal motive behind above mentioned exploration work is to supersede the present use ofchemical and water based extinguishing methods by sound waves, thus leading to optimize usage ofprecious assets along with the protection of health of individuals and apparatus damage resulted bychemicals.Design of firefighting robotic vehicle - Major ProjectWork is being done on the firefighting robotic vehicle which includes both sonic technology and theconventional technologies for better results.STRENGTHSTime ManagementTeam WorkProblem Solving SkillsAdaptabilityCommunication Skills""]",5
5,"['Varnika GuptaNoida, India | +91-8006660021varnikagupta.97@gmail.comEDUCATIONGautam Buddha University, Greater NoidaAug’15-June’20Integrated B.Tech (Computer Science Engineering) +79.5 %M.Tech (Software Engineering)RELEVANT WORK EXPERIENCECetpa Infotech Pvt. Ltd.TraineeJune’19-July’19●Was introduced to the Python programming language.Implemented the same atvarious instances.●Machine Learning was later introduced to our curriculum. Worked on and executed afew of its algorithms using Python.●Developed multiple projects to get a better understanding of the concepts of MachineLearning.IndiaMART InterMESH LimitedInternJune’18-July’18●Worked for the m-site team which was responsible for the maintenance of IndiaMART’smobile website.●The m-site functioned using the Yii framework and the MVC model. Was introduced tothese.●Developed a demo application using CSS, HTML and Bootstrap.●Resolved a bug on the OTP screen in the Opera Mini Browser.TECHNICAL SKILLS●Operating Systems: Windows, Linux, MacOS●Programming Languages: C, C++, Javascript, Core Java, Advanced Java, Python, PHP,MySQL, HTML, CSS.●Introduction to the Amazon Web Services.●Machine LearningPROJECTSSegmentation and Classification of a Bank Customer’s Profile through Random Forests and aWide and Deep Neural Network. (Dissertation Project)June’20', '●In this project,the profiles of a large database of customers are divided andrepresented in order to group them with each group availing some common set ofbenefits and services.●Data-preprocessing is performed on the dataset sourced from the UCI MachineLearning Repository.●Classification is done firstly via the Wide and Deep Neural Network. It is also done viaRandom Forests.●It is implemented using the NumPy, Matplotlib, Sci-kit, Tensorflow libraries, Keras APIand Python Programming Language.Natural Language Processing through Sentiment Analysis. (Major Project)May’19●This project aims at classifying tweets from Twitter into “positive” or “negative”sentiment by building a model based on probabilities.●The analysis is performed through classification which is a Machine Learningtechnique. It crawls over Twitter collecting all tweets related to the user’s searchkeyword using the Twitter API.●This is followed by data pre-processing where the stop words or any unrelatedinformation is filtered. The formatted tweets are stored and are classified using theNaive Bayes’ Classifier.●Implementation was done by using the NLTK, NumPy and Matplotlib libraries and thePython Programming Language.Text Detection and Translation Application. (Minor Project)Dec’18●This android application focuses on retrieving the text from images captured by realtime camera and then converting it into text format and further translating it into thedesired language.●It is based on Optical Character Recognition and is developed using Google’s MobileVision framework and the Translation API.●The task is performed via photo scanning of the text character-by-character, analysisof the scanned-in image, and then translation of the character image into charactercodes.HONORS AND RESPONSIBILITIES●Member of the University Placement Cell, responsibilities included creating placementbrochures, profiles and contacting companies.●Organized an Event ‘Artificial Intelligence to Reality’ in TECHNOPHILIA Organized Under●NANOfIM 2017 (IEEE INTERNATIONAL CONFERENCE).●Compered in various informal events of the college.●Member of the PR team for university’s cultural fest.●Active participation in literary events.●Secured first position in a Creative Writing Competition.']",6


In [7]:
# Create features list using the input JD 

from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import docx

# define method to read the jd document
def getText(filename):
    return '\n'.join([para.text for para in docx.Document(filename).paragraphs if para.text.strip()])

stop_words = stopwords.words("english")

# Reading the jd from the given folder path
jd_text = getText('E:/AMPBA/Term2/FP/FP-Project/JD/jd_java_developer.docx')
jd_temp = set(word_tokenize(pre_process(jd_text)))

# Removing the stop-words from the feature vocabulory
jd_vocab = [token for token in jd_temp if not token in stop_words]
jd_vocab

['noida',
 'location',
 'spring',
 'skill',
 'rest',
 'core',
 'years',
 'exp',
 'webservices',
 'hibernate',
 'java',
 'developers']

In [8]:
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import make_pipeline

# Pre-Processing the Resumes
resumes_df['Resume'] = resumes_df['Resume'].apply(pre_process)  

# Taking Resume content from the resume df for training the model
train_data = resumes_df.Resume 
stop_words = stopwords.words("english")

# Initialize the TfidfVectorizer with maximum features or vocabulories to 50k 
tfidfVectorizer = TfidfVectorizer(stop_words=stop_words, max_features = 1500, vocabulary = jd_vocab)

# Learn and transform train documents by obtaining TFIDF features
vectorised_train_documents = tfidfVectorizer.fit_transform(train_data)
print(vectorised_train_documents.shape)
tfidf_tokens = tfidfVectorizer.get_feature_names()

(6, 12)


In [9]:
df_tfidfvect = pd.DataFrame(data = vectorised_train_documents.toarray(),index = resumes_df.Id, columns = tfidf_tokens)
df_tfidfvect

Unnamed: 0_level_0,noida,location,spring,skill,rest,core,years,exp,webservices,hibernate,java,developers
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,0.366816,0.447328,0.0,0.0,0.0,0.619382,0.0,0.0,0.0,0.0,0.530763,0.0
2,0.042206,0.0,0.71751,0.0,0.205882,0.0,0.035634,0.0,0.05147,0.253239,0.610706,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.466348,0.568707,0.0,0.295292,0.098431,0.0,0.0,0.116587,0.590434,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.503967,0.0,0.0,0.0,0.0,0.863723,0.0


In [10]:
# Convert the input JD also into a vector based on the TFIDFVectorizer Model
vectorised_jd = tfidfVectorizer.transform([jd_text])
print('JD: \n', vectorised_jd.todense())
vectorised_jd.shape

JD: 
 [[0.23867714 0.29106461 0.23867714 0.29106461 0.29106461 0.20150764
  0.20150764 0.38062157 0.29106461 0.23867714 0.34535348 0.38062157]]


(1, 12)

In [12]:
# Calculating the Cosine score for each Resume against the jd to find the similarity

from sklearn.metrics.pairwise import cosine_similarity

sim = cosine_similarity(vectorised_jd, vectorised_train_documents)
final_df = pd.DataFrame(sim.T, index = resumes_df.Id, columns = ['JDCosineScore'])
final_df.reset_index(inplace=True)
final_df
final_df.sort_values(by=["JDCosineScore"],ascending=False)

Unnamed: 0,Id,JDCosineScore
3,4,0.587911
1,2,0.534765
0,1,0.525863
5,6,0.399843
2,3,0.201508
4,5,0.0
