In [1]:
import random

import pandas as pd
import numpy as np
import nltk
from sqlalchemy import create_engine
from nltk.tokenize import word_tokenize
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import stopwords
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report
from sklearn.externals import joblib

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/adrianlievano/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/adrianlievano/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/adrianlievano/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


  from numpy.core.umath_tests import inner1d


## Explore the input data

In [2]:
opportunity = {
    'industry': 'Internet',
    'min_years_experience': 3,
    'city': 'San Francisco',
    'highestLevelOfEducation': 'bachelor',
    'blurb': 'I need a software engineer that has experience in pytorch, python, aws, spark, and airflow. They should want to work for a startup that has less than 500 employees. We do not want new graduates.',
    'title': 'Seeking Software Engineer (Full-Time)'
}

In [3]:
industrys = ['Internet', 'Technology', 'Enterprise Software']
min_years_experiences = [1, 2, 3, 4, 5, 6, 7]
citys = ['San Francisco', 'New York City', 'Los Angeles']
highestLevelOfEducations = ['high_school', 'associate', 'bachelor', 'master', 'phd']
blurbs = ['I need a software engineer that has experience in pytorch, python, aws, spark, and airflow. They should want to work for a startup that has less than 500 employees. We do not want new graduates.',
          'We need a full stack machine learning engineer that knows REST APIs, flask, and tensorflow.',
          'Find me data scientist that knows SQL, tableu, excel, machine learning.', 
          'We just need a developer that has six or 7 years of experience']
titles = ['Software Engineer', 'Data Scientist', 'Machine Learning Engineer', 'Software Engineer (Machine Learning)']

df_test = pd.DataFrame()

num_opportunities = 5
rows = []

for i in range(num_opportunities):
    row_ele = {
    'industry': random.choice(industrys),
    'min_years_experience': random.choice(min_years_experiences),
    'city': random.choice(citys),
    'highestLevelOfEducation': random.choice(highestLevelOfEducations),
    'blurb': random.choice(blurbs),
    'title': random.choice(titles)
    }
    rows.append(row_ele)


## Construct Dummy Table - Opportunity Submission Table

#### This is a table with queries that a poster might use to find a candidate in their network

In [4]:
opportunities_df = pd.DataFrame(rows)
opportunities_df.head(10)

Unnamed: 0,blurb,city,highestLevelOfEducation,industry,min_years_experience,title
0,We just need a developer that has six or 7 yea...,Los Angeles,associate,Technology,7,Software Engineer (Machine Learning)
1,I need a software engineer that has experience...,New York City,associate,Enterprise Software,7,Data Scientist
2,We just need a developer that has six or 7 yea...,New York City,high_school,Enterprise Software,7,Data Scientist
3,I need a software engineer that has experience...,New York City,associate,Technology,1,Software Engineer
4,"Find me data scientist that knows SQL, tableu,...",New York City,master,Enterprise Software,4,Machine Learning Engineer


## Construct Dummy Table - Candidate Table

Candidate dictionary
{
    id: int
    email: 'str'
    name: 'str'
    location: list['city', 'state']
    current_position: 'str',
    years_of_experience: int
    industry: 'str'
    degree_connection: str
    degree = str
}

In [5]:
candidates_total = 10000
emails = []
names = []

for i in range(candidates_total):
    email = ('{}'+'xxxxxxx@gmail.com').format(i)
    emails.append(email)
    
for i in range(candidates_total):
    name = ('name + {}'.format(i))
    names.append(name)

ids = [i for i in range(candidates_total)]
emails = emails
names = names
citys = ['San Francisco', 'New York City', 'Los Angeles']
#state = ['California', 'New York', 'Florida']
current_positions = ['Software Engineer', 'Data Scientist', 'Machine Learning Engineer', 'Bioengineer']
industrys = ['Internet', 'Technology', 'Enterprise Software', 'Biotech']
years_of_experiences = [1, 2, 3, 4, 5, 6, 7]
linkedin_urls = ['https://www.linkedin.com/adrianlievano']
image_urls = ['https://www.linkedin.com/adrianlievano/profile_pic.png']
degrees = ['1st', '2nd']
highestLevelOfEducations = ['high_school', 'associate', 'bachelor', 'master', 'phd']

candidate_rows = []

for i in range(candidates_total):
    row_ele = {
    'id': random.choice(ids),
    'email': random.choice(emails),
    'name': random.choice(names),
    'phone_number': '786-515-4282',
    'city': random.choice(citys),
    #'location': random.choice(locations),
    'current_position': random.choice(current_positions),
    'industry': random.choice(industrys),
    'years_of_experience': random.choice(years_of_experiences),
    'degree': random.choice(degrees),
    'linkedin_url': random.choice(linkedin_urls),
    'image_url': random.choice(image_urls),
    'highestLevelOfEducation': random.choice(highestLevelOfEducations)
    }
    candidate_rows.append(row_ele)

In [7]:
len(candidate_rows)

10000

In [9]:
df_candidates = pd.DataFrame(candidate_rows)
df_candidates.head(5)

Unnamed: 0,city,current_position,degree,email,highestLevelOfEducation,id,image_url,industry,linkedin_url,name,phone_number,years_of_experience
0,Los Angeles,Machine Learning Engineer,2nd,6457xxxxxxx@gmail.com,phd,7468,https://www.linkedin.com/adrianlievano/profile...,Biotech,https://www.linkedin.com/adrianlievano,name + 5921,786-515-4282,3
1,San Francisco,Software Engineer,1st,3099xxxxxxx@gmail.com,associate,4872,https://www.linkedin.com/adrianlievano/profile...,Technology,https://www.linkedin.com/adrianlievano,name + 3433,786-515-4282,5
2,San Francisco,Software Engineer,2nd,7068xxxxxxx@gmail.com,associate,9040,https://www.linkedin.com/adrianlievano/profile...,Internet,https://www.linkedin.com/adrianlievano,name + 9952,786-515-4282,4
3,Los Angeles,Machine Learning Engineer,2nd,4307xxxxxxx@gmail.com,phd,8477,https://www.linkedin.com/adrianlievano/profile...,Internet,https://www.linkedin.com/adrianlievano,name + 4899,786-515-4282,5
4,Los Angeles,Bioengineer,1st,4571xxxxxxx@gmail.com,associate,3319,https://www.linkedin.com/adrianlievano/profile...,Biotech,https://www.linkedin.com/adrianlievano,name + 4352,786-515-4282,4


### Phase 1: Knowledge Based Heuristic ###

1. Filter 1: Filter by location
2. Filter 2: Filter by >= years of experience
3. Filter 3: Filter subset by title embeddings
4. Filter 4: Filter by degrees
5. Filter 5: Filter by industry
6. Filter 6: Filter by blurb

Create score column in candidate table that assigns reward for each applicable filter

In [13]:
opportunity_sample_request = dict(opportunities_df.iloc[0])
opportunity_sample_request['industry']

'Technology'

In [14]:
opportunity_sample_request['city']

'Los Angeles'

In [15]:
opportunity_sample_request['min_years_experience']

7

# Heuristic 1: Knowledge Based Recommendation System


In [16]:
def heuristic_one(opportunity_request, df_candidates):
    '''Arguments:
            opportunity: a dictionary from a poster seeking candidates
            df_candidates: total pool of 1st and 2nd degree connections from the opportunity poster
            
        OUTPUTS:
        ranked dataframe based on simple heuristic of filters
    '''
    blurb = opportunity_request['blurb']
    highestLevelOfEducation = opportunity_request['highestLevelOfEducation']
    industry = opportunity_request['industry']
    years_exp = opportunity_sample_request['min_years_experience']
    df_candidates['city'] = df_candidates['city']
        
    #filter based on years experience 
    ranked_df = df_candidates[df_candidates['years_of_experience'] >= int(opportunity_sample_request['min_years_experience'])].head()

    #filter based on location
    ranked_df = ranked_df[(ranked_df['city'] == opportunity_sample_request['city'])]
    
    #filter based on match to degree
    #ranked_df = ranked_df[ranked_df['highestLevelOfEducation'] == str(opportunity_sample_request['highestLevelOfEducation'])].head()
    
    ranked_df = ranked_df.sort_values(by = 'degree', ascending = True).reset_index().drop(['index'], axis = True)
    return ranked_df


In [17]:
opportunity_sample_request = opportunities_df.iloc[1]

heuristic_one(opportunity_request=opportunity_sample_request, df_candidates=df_candidates)

Unnamed: 0,city,current_position,degree,email,highestLevelOfEducation,id,image_url,industry,linkedin_url,name,phone_number,years_of_experience
0,New York City,Data Scientist,1st,5085xxxxxxx@gmail.com,bachelor,6409,https://www.linkedin.com/adrianlievano/profile...,Biotech,https://www.linkedin.com/adrianlievano,name + 6941,786-515-4282,7
1,New York City,Machine Learning Engineer,1st,6266xxxxxxx@gmail.com,associate,5487,https://www.linkedin.com/adrianlievano/profile...,Technology,https://www.linkedin.com/adrianlievano,name + 946,786-515-4282,7
2,New York City,Machine Learning Engineer,2nd,8075xxxxxxx@gmail.com,high_school,9559,https://www.linkedin.com/adrianlievano/profile...,Internet,https://www.linkedin.com/adrianlievano,name + 2816,786-515-4282,7


# Heuristic Two: NLP Driven Score Metric for Relevance + No Filters (All Score Based) - Neighborhood Based Collaborative Filtering


In [18]:
opportunity_sample_request

blurb                      I need a software engineer that has experience...
city                                                           New York City
highestLevelOfEducation                                            associate
industry                                                 Enterprise Software
min_years_experience                                                       7
title                                                         Data Scientist
Name: 1, dtype: object

### Opportunity title to current position - similarity score (TBD)


import gensim 
from gensim.models import Word2Vec 

##helper function to tokenize words
def tokenize(tokens):
    tokens = word_tokenize(opportunity_sample_request['title'])
    stop_words = stopwords.words('english')
    tokens = [word for word in tokens if word not in stop_words]
    cleaned_tokens = []
    lemmatizer = WordNetLemmatizer()

    for tok in tokens:
        cleaned_tokens.append(lemmatizer.lemmatize(tok).lower().strip())

    return cleaned_tokens

tokens = word_tokenize(opportunity_sample_request['title'])
stop_words = stopwords.words('english')
tokens = [word for word in tokens if word not in stop_words]
cleaned_tokens = []
lemmatizer = WordNetLemmatizer()

for tok in tokens:
    cleaned_tokens.append(lemmatizer.lemmatize(tok).lower().strip())


pipeline = Pipeline([
    ('vect', CountVectorizer(tokenizer=tokenize)),
    ('tfidf', TfidfTransformer()),
    ('clf', RandomForestClassifier())
])


model_job = gensim.models.Word2Vec(cleaned_tokens, min_count = 1,  
                              size = 100, window = 5)



model_job.similarity('software', 'engineer')

### Blurb embeddeding score to text info

### Location score function - similarity score

Distance based similarily score 

In [28]:
def city_point_converter(city):
    if city == opportunity_sample_request['city']:
        return -1
    else:
        return 0
    

df_candidates['city_eucld_score'] = df_candidates['city'].apply(city_point_converter)

### Years of Experience Score - similarity score 

In [29]:
df_candidates['years_of_experience_eucld_dist'] = df_candidates['years_of_experience'].apply(lambda x: np.linalg.norm(x-opportunity_sample_request['min_years_experience']))


### Euclidean Distance for Education Level

In [30]:
def educonverter(education_level):
    if education_level == 'high_school':
        return 1
    elif education_level == 'associate':
        return 2
    elif education_level == 'bachelor':
        return 3
    elif education_level == 'master':
        return 4
    elif education_level == 'phd':
        return 5

In [31]:
df_candidates['edunum'] = df_candidates['highestLevelOfEducation'].apply(educonverter)

In [32]:
mag_edu = educonverter(opportunity_sample_request['highestLevelOfEducation'])

In [33]:
df_candidates['highestLevelOfEducation_eucld_dist'] = df_candidates['edunum'].apply(lambda x: np.linalg.norm((x-mag_edu)/mag_edu))                                                                             
                                                                                                                      

### Calculate Total Similarity Score

In [35]:
df_candidates['eucld_dist'] = df_candidates['highestLevelOfEducation_eucld_dist'] + df_candidates['years_of_experience_eucld_dist'] + df_candidates['city_eucld_score']

#### Sort by Euclidean Distance - Less means they are closer in similarity

In [37]:
n_top = 5000
ranked_df = df_candidates.sort_values(by = 'eucld_dist', ascending = True).reset_index().drop(['index', 'edunum'], axis = 1)
ranked_df.iloc[0:n_top]

Unnamed: 0,city,current_position,degree,email,highestLevelOfEducation,id,image_url,industry,linkedin_url,name,phone_number,years_of_experience,city_eucld_score,highestLevelOfEducation_eucld_dist,years_of_experience_eucld_dist,eucld_dist
0,New York City,Software Engineer,1st,4640xxxxxxx@gmail.com,associate,7520,https://www.linkedin.com/adrianlievano/profile...,Internet,https://www.linkedin.com/adrianlievano,name + 1506,786-515-4282,7,-1,0.0,0.0,-1.0
1,New York City,Machine Learning Engineer,1st,9158xxxxxxx@gmail.com,associate,2866,https://www.linkedin.com/adrianlievano/profile...,Biotech,https://www.linkedin.com/adrianlievano,name + 5897,786-515-4282,7,-1,0.0,0.0,-1.0
2,New York City,Machine Learning Engineer,2nd,4225xxxxxxx@gmail.com,associate,6511,https://www.linkedin.com/adrianlievano/profile...,Enterprise Software,https://www.linkedin.com/adrianlievano,name + 3120,786-515-4282,7,-1,0.0,0.0,-1.0
3,New York City,Software Engineer,1st,3346xxxxxxx@gmail.com,associate,3318,https://www.linkedin.com/adrianlievano/profile...,Enterprise Software,https://www.linkedin.com/adrianlievano,name + 1997,786-515-4282,7,-1,0.0,0.0,-1.0
4,New York City,Software Engineer,2nd,1542xxxxxxx@gmail.com,associate,6175,https://www.linkedin.com/adrianlievano/profile...,Internet,https://www.linkedin.com/adrianlievano,name + 1388,786-515-4282,7,-1,0.0,0.0,-1.0
5,New York City,Bioengineer,2nd,3780xxxxxxx@gmail.com,associate,8608,https://www.linkedin.com/adrianlievano/profile...,Enterprise Software,https://www.linkedin.com/adrianlievano,name + 7744,786-515-4282,7,-1,0.0,0.0,-1.0
6,New York City,Bioengineer,2nd,4913xxxxxxx@gmail.com,associate,3226,https://www.linkedin.com/adrianlievano/profile...,Internet,https://www.linkedin.com/adrianlievano,name + 4086,786-515-4282,7,-1,0.0,0.0,-1.0
7,New York City,Software Engineer,1st,1839xxxxxxx@gmail.com,associate,7851,https://www.linkedin.com/adrianlievano/profile...,Enterprise Software,https://www.linkedin.com/adrianlievano,name + 4747,786-515-4282,7,-1,0.0,0.0,-1.0
8,New York City,Software Engineer,2nd,2444xxxxxxx@gmail.com,associate,6353,https://www.linkedin.com/adrianlievano/profile...,Biotech,https://www.linkedin.com/adrianlievano,name + 8079,786-515-4282,7,-1,0.0,0.0,-1.0
9,New York City,Software Engineer,2nd,1617xxxxxxx@gmail.com,associate,5436,https://www.linkedin.com/adrianlievano/profile...,Enterprise Software,https://www.linkedin.com/adrianlievano,name + 4714,786-515-4282,7,-1,0.0,0.0,-1.0


### Generate list of candidates

In [38]:
candidates_total = 10000
emails = []
names = []

for i in range(candidates_total):
    email = ('{}'+'xxxxxxx@gmail.com').format(i)
    emails.append(email)
    
for i in range(candidates_total):
    name = ('name + {}'.format(i))
    names.append(name)

ids = [i for i in range(candidates_total)]
emails = emails
names = names
citys = ['San Francisco', 'New York City', 'Los Angeles']
#state = ['California', 'New York', 'Florida']
current_positions = ['Software Engineer', 'Data Scientist', 'Machine Learning Engineer', 'Bioengineer']
industrys = ['Internet', 'Technology', 'Enterprise Software', 'Biotech']
years_of_experiences = [1, 2, 3, 4, 5, 6, 7]
linkedin_urls = ['https://www.linkedin.com/adrianlievano']
image_urls = ['https://www.linkedin.com/adrianlievano/profile_pic.png']
degrees = ['1st', '2nd']
highestLevelOfEducations = ['high_school', 'associate', 'bachelor', 'master', 'phd']

candidate_rows = []

for i in range(candidates_total):
    row_ele = {
    'id': random.choice(ids),
    'email': random.choice(emails),
    'name': random.choice(names),
    'phone_number': '786-515-4282',
    'city': random.choice(citys),
    #'location': random.choice(locations),
    'current_position': random.choice(current_positions),
    'industry': random.choice(industrys),
    'years_of_experience': random.choice(years_of_experiences),
    'degree': random.choice(degrees),
    'linkedin_url': random.choice(linkedin_urls),
    'image_url': random.choice(image_urls),
    'highestLevelOfEducation': random.choice(highestLevelOfEducations)
    }
    candidate_rows.append(row_ele)

In [39]:
candidate_rows

[{'city': 'San Francisco',
  'current_position': 'Software Engineer',
  'degree': '2nd',
  'email': '8894xxxxxxx@gmail.com',
  'highestLevelOfEducation': 'bachelor',
  'id': 6471,
  'image_url': 'https://www.linkedin.com/adrianlievano/profile_pic.png',
  'industry': 'Internet',
  'linkedin_url': 'https://www.linkedin.com/adrianlievano',
  'name': 'name + 7072',
  'phone_number': '786-515-4282',
  'years_of_experience': 1},
 {'city': 'New York City',
  'current_position': 'Data Scientist',
  'degree': '2nd',
  'email': '7382xxxxxxx@gmail.com',
  'highestLevelOfEducation': 'high_school',
  'id': 1225,
  'image_url': 'https://www.linkedin.com/adrianlievano/profile_pic.png',
  'industry': 'Internet',
  'linkedin_url': 'https://www.linkedin.com/adrianlievano',
  'name': 'name + 2300',
  'phone_number': '786-515-4282',
  'years_of_experience': 6},
 {'city': 'New York City',
  'current_position': 'Machine Learning Engineer',
  'degree': '1st',
  'email': '5430xxxxxxx@gmail.com',
  'highestLev

In [78]:
sample_candidate = candidate_rows[0:500]
sample_candidate

[{'city': 'San Francisco',
  'current_position': 'Software Engineer',
  'degree': '2nd',
  'email': '8894xxxxxxx@gmail.com',
  'highestLevelOfEducation': 'bachelor',
  'id': 6471,
  'image_url': 'https://www.linkedin.com/adrianlievano/profile_pic.png',
  'industry': 'Internet',
  'linkedin_url': 'https://www.linkedin.com/adrianlievano',
  'name': 'name + 7072',
  'phone_number': '786-515-4282',
  'years_of_experience': 1},
 {'city': 'New York City',
  'current_position': 'Data Scientist',
  'degree': '2nd',
  'email': '7382xxxxxxx@gmail.com',
  'highestLevelOfEducation': 'high_school',
  'id': 1225,
  'image_url': 'https://www.linkedin.com/adrianlievano/profile_pic.png',
  'industry': 'Internet',
  'linkedin_url': 'https://www.linkedin.com/adrianlievano',
  'name': 'name + 2300',
  'phone_number': '786-515-4282',
  'years_of_experience': 6},
 {'city': 'New York City',
  'current_position': 'Machine Learning Engineer',
  'degree': '1st',
  'email': '5430xxxxxxx@gmail.com',
  'highestLev

In [130]:
opportunity_sample_request

blurb                      I need a software engineer that has experience...
city                                                           New York City
highestLevelOfEducation                                            associate
industry                                                 Enterprise Software
min_years_experience                                                       7
title                                                         Data Scientist
Name: 1, dtype: object

In [47]:
def city_point_converter(city):
    if city == opportunity_sample_request['city']:
        return -1
    else:
        return 0

def educonverter(education_level):
    if education_level == 'high_school':
        return 1
    elif education_level == 'associate':
        return 2
    elif education_level == 'bachelor':
        return 3
    elif education_level == 'master':
        return 4
    elif education_level == 'phd':
        return 5

pd.DataFrame.from_dict(sample_candidate)
pd.DataFrame(list(sample_candidate.items()), columns=['id', 'email', 'name', 'phone_number', 'city', 
                                                     'current_position', 'industry', 'years_of_experience',
                                                     'degree', 'linkedin_url', 'image_url', 'highestLevelOfEducation'], typ='series')

In [153]:
def heuristic_two(candidates, opp_request, n_top = 50): 
    '''Arguments:
            candidate: list of User info
            opp_request: a dictionary from a poster seeking candidates

       Returns:
        ranked dataframe of users and similarity score sorted by degree connection
    '''
    
    df_candidates = pd.DataFrame(candidates)
    #print(df_candidates)
    def city_point_converter(city):
        if city == opp_request['city']:
            return -1
        else:
            return 0
    
    #Calculate Location Similarity Score 
   
    df_candidates['city_eucld_score'] = df_candidates['city'].apply(city_point_converter)
    
    #Calculate Years of Experience Similarity Score
    df_candidates['years_of_experience_eucld_dist'] = df_candidates['years_of_experience'].apply(lambda x: np.linalg.norm(x-opp_request['min_years_experience']))

    #Calculate Level of Education Similarity Score
    df_candidates['edunum'] = df_candidates['highestLevelOfEducation'].apply(educonverter)
    df_candidates['highestLevelOfEducation_eucld_dist'] = df_candidates['edunum'].apply(lambda x: np.linalg.norm(x-educonverter(opp_request['highestLevelOfEducation'])))
    
    #Calculate Total Similarity Score
    df_candidates['eucld_dist'] = df_candidates['highestLevelOfEducation_eucld_dist'] + df_candidates['years_of_experience_eucld_dist'] + df_candidates['city_eucld_score']
    
    df_candidates = df_candidates.sort_values(by = ['eucld_dist'], ascending = True).reset_index().drop(['index', 'edunum'], axis = 1)
    ranked_df = df_candidates.iloc[0:n_top]
    #tupp = (candidate, df_candidates['eucld_dist'][0])
    for index, row in ranked_df.iterrows():
        print(row['id'], row['eucld_dist'], [])
        
    #print((row['name'], row['eucld_dist'], []) for row in ranked_df.iterrows())
    #return((row['name'], row['eucld_dist'], []) for row in ranked_df.iterrows())

In [154]:
opportunity_sample_request

blurb                      I need a software engineer that has experience...
city                                                           New York City
highestLevelOfEducation                                            associate
industry                                                 Enterprise Software
min_years_experience                                                       7
title                                                         Data Scientist
Name: 1, dtype: object

In [155]:
heuristic_two(sample_candidate, opportunity_sample_request)

3480 -1.0 []
1296 -1.0 []
7323 0.0 []
4042 0.0 []
678 0.0 []
2197 0.0 []
6770 0.0 []
2001 0.0 []
3355 0.0 []
9541 0.0 []
2716 0.0 []
1694 0.0 []
1085 0.0 []
4553 0.0 []
5007 0.0 []
805 0.0 []
4252 0.0 []
1654 0.0 []
1294 0.0 []
4850 0.0 []
3899 0.0 []
1202 0.0 []
8929 0.0 []
4146 0.0 []
4349 0.0 []
6040 0.0 []
7722 0.0 []
4023 0.0 []
3579 0.0 []
4580 0.0 []
5529 0.0 []
3895 0.0 []
3976 1.0 []
9500 1.0 []
2038 1.0 []
4927 1.0 []
4460 1.0 []
5179 1.0 []
6120 1.0 []
1213 1.0 []
776 1.0 []
9365 1.0 []
4075 1.0 []
7519 1.0 []
9186 1.0 []
6451 1.0 []
5424 1.0 []
6228 1.0 []
8523 1.0 []
368 1.0 []


In [156]:
sample_candidate[0]

{'city': 'San Francisco',
 'current_position': 'Software Engineer',
 'degree': '2nd',
 'email': '8894xxxxxxx@gmail.com',
 'highestLevelOfEducation': 'bachelor',
 'id': 6471,
 'image_url': 'https://www.linkedin.com/adrianlievano/profile_pic.png',
 'industry': 'Internet',
 'linkedin_url': 'https://www.linkedin.com/adrianlievano',
 'name': 'name + 7072',
 'phone_number': '786-515-4282',
 'years_of_experience': 1}

In [157]:
class User:
    def __init__(self, city = None, curret):
        self.city = 'San Francisco',
        self.current_postion = 'Software Engineer',
        self.highestLevelofEducation = 'bachelor'
        

SyntaxError: non-default argument follows default argument (<ipython-input-157-446cf7ef30dc>, line 2)

In [144]:
ranked_df 

<generator object heuristic_two.<locals>.<genexpr> at 0x1230adbf8>

In [145]:
for index, row in ranked_df.iterrows():
    print(row['user'], row['eucld_dist'], [])

AttributeError: 'generator' object has no attribute 'iterrows'