In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from scipy import stats
from ast import literal_eval
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity

In [7]:
apps = pd.read_csv('filtered_apps.csv')
jobs = pd.read_csv('filtered_jobs.csv')
users = pd.read_csv('filtered_users.csv')

In [8]:
users.head()

Unnamed: 0,UserID,WindowID,Split,City,State,Country,ZipCode,DegreeType,Major,GraduationDate,WorkHistoryCount,TotalYearsExperience,CurrentlyEmployed,ManagedOthers,ManagedHowMany
0,554,1,Train,Altamonte Springs,FL,US,32701,Bachelor's,Legal Studies,2006-12-01 00:00:00,2,4.0,No,No,0
1,769,1,Test,Roselle,IL,US,60172,Bachelor's,Radio-Television,2011-05-01 00:00:00,5,5.0,Yes,No,0
2,1697,1,Train,Justice,IL,US,60458,High School,Not Applicable,2007-01-01 00:00:00,4,6.0,,No,0
3,2652,1,Test,Glendale,CA,US,91204,High School,Not Applicable,1987-01-01 00:00:00,3,21.0,No,Yes,3
4,4359,1,Train,Valrico,FL,US,33594,High School,Not Applicable,2010-01-01 00:00:00,3,2.0,Yes,No,0


In [9]:
users['DegreeType'] = users['DegreeType'].fillna('')
users['Major'] = users['Major'].fillna('')
users['TotalYearsExperience'] = users['TotalYearsExperience'].fillna('').astype(str)

users['DegreeType'] = users['DegreeType'] + ' ' + users['Major'] + ' ' + users['TotalYearsExperience']


In [12]:
users.head()

Unnamed: 0,UserID,WindowID,Split,City,State,Country,ZipCode,DegreeType,Major,GraduationDate,WorkHistoryCount,TotalYearsExperience,CurrentlyEmployed,ManagedOthers,ManagedHowMany
0,554,1,Train,Altamonte Springs,FL,US,32701,Bachelor's Legal Studies 4.0,Legal Studies,2006-12-01 00:00:00,2,4.0,No,No,0
1,769,1,Test,Roselle,IL,US,60172,Bachelor's Radio-Television 5.0,Radio-Television,2011-05-01 00:00:00,5,5.0,Yes,No,0
2,1697,1,Train,Justice,IL,US,60458,High School Not Applicable 6.0,Not Applicable,2007-01-01 00:00:00,4,6.0,,No,0
3,2652,1,Test,Glendale,CA,US,91204,High School Not Applicable 21.0,Not Applicable,1987-01-01 00:00:00,3,21.0,No,Yes,3
4,4359,1,Train,Valrico,FL,US,33594,High School Not Applicable 2.0,Not Applicable,2010-01-01 00:00:00,3,2.0,Yes,No,0


In [14]:
tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0.0, stop_words='english')
tfidf_matrix = tf.fit_transform(users['DegreeType'])

In [15]:
tfidf_matrix.shape

(6223, 4049)

In [16]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [17]:
cosine_sim.shape

(6223, 6223)

In [18]:
user_based_approach = users.reset_index()
userid = user_based_approach['UserID']
indices = pd.Series(user_based_approach.index, index=user_based_approach['UserID'])

In [19]:
def get_recommendations_userwise(userid):
    idx = indices[userid]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    user_indices = [i[0] for i in sim_scores]
    user_indices_top = user_indices[1:12]
    similar_users = list(user_based_approach.loc[indices.iloc[user_indices_top]]['UserID'].values)
    return similar_users

In [22]:
print ("-----Top 10 Similar users with userId: 554------")
get_recommendations_userwise(554)

-----Top 10 Similar users with userId: 554------


[744467,
 681099,
 1337193,
 214540,
 734150,
 1243561,
 158773,
 282295,
 1080768,
 382785,
 1087895]

In [23]:
apps['UserID'].unique()

array([    554,     769,    1697, ..., 1467940, 1468595, 1470416])

In [24]:
def get_job_id(usrid_list):
    jobs_userwise = apps['UserID'].isin(usrid_list) #
    df1 = pd.DataFrame(data = apps[jobs_userwise], columns=['JobID'])
    joblist = df1['JobID'].tolist()
    Job_list = jobs['JobID'].isin(joblist) #[1083186, 516837, 507614, 754917, 686406, 1058896, 335132])
    df_temp = pd.DataFrame(data = jobs[Job_list], columns=['JobID','Title','Description','City','State'])
    return df_temp

In [26]:
get_job_id(get_recommendations_userwise(554))

Unnamed: 0,JobID,Title,Description,City,State
30,13926,Customer Service Representatives,<p>The adidas Group is one of the world&rsquo;...,Indianapolis,IN
37,16581,Administrative Assistant,"<P><SPAN style=""TEXT-DECORATION: underline""><S...",Carmel,IN
62,29711,DENTAL FRONT OFFICE,Dental Front Office Position Available We a...,Avon,IN
81,34975,Data Entry Clerk/Warehouse Associate,<p>Seeking <b>Data Entry Clerk/Warehouse Assoc...,Marietta,GA
87,39361,Customer Service Representative,"<br>\r<p style=""text-align: center"" align=""cen...",Charlotte,NC
...,...,...,...,...,...
6876,979213,Marketing Assistant,"A Washington, DC law firm is currently seeking...",Washington,DC
6897,1029851,Executive Assistant / Board Liaison,<p><b><span><br>\rExecutive Assistant / Board ...,Alexandria,VA
6941,1080871,Resident Service Associate,"<p><span>Richman Property Services, Inc. a rap...",Bladensburg,MD
6959,1101067,"Admissions Representative - Silver Spring, MD",<p><span>Medtech College is a private institut...,Silver Spring,MD
