# Model 2 with sklearn-recommender PyPi package

In [39]:
#Uncomment the cell and run this once before running the notebook
#pip install sklearn-recommender

In [40]:
import sklearn_recommender as skr

In [41]:
import pandas as pd
#url = "C:\\Users\\shrey\\Desktop\\my_sample_data.csv"
url = "my_sample_data.csv"
df = pd.read_csv(url)

#Categorical variables as available on our settings page
df = df.loc[:, ['user', 'skill', 'gender', 'organization',
                                  'jobtitle', 'industry']]
df

Unnamed: 0,user,skill,gender,organization,jobtitle,industry
0,Brandon,marketing,male,UHG,COO,Healthcare
1,Betty,python,female,Optum,Developer,Technology
2,Brennan,marketing,male,Target,CEO,Retail
3,Julie,java,female,Target,Senior Developer,Retail
4,Jennifer,java,female,UHG,Developer,Healthcare
5,John,scala,male,Target,Developer,Retail
6,Adam,java,male,QuickenLoans,CTO,Technology
7,Shreyas,python,male,Optum,Team Lead,Technology


# Method 1 for one hot encoding, use pandas get dummies

In [42]:
#cat_columns = ["skill", "gender", "organization", "jobtitle", "industry"]
#df_processed = pd.get_dummies(df, prefix_sep="__",
                             # columns=cat_columns)
#df_processed.head()

# Method 2 for one hot encoding, use scikit learn with flexibility of column transformer

In [43]:
#Preprocessing step
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
column_trans = make_column_transformer(
               (OneHotEncoder(), ['skill', 'gender', 'organization',
                                 'jobtitle', 'industry']),
                remainder='passthrough')
X = df.drop('user', axis='columns')
column_trans.fit_transform(X)


<8x19 sparse matrix of type '<class 'numpy.float64'>'
	with 40 stored elements in Compressed Sparse Row format>

In [44]:
#Build similarity matrix
import scipy.sparse
tf = skr.transformer.SimilarityTransformer(cols=(1, -1), normalize=True)
sim_mat = tf.transform(pd.DataFrame.sparse.from_spmatrix(column_trans.fit_transform(X), index=df.user))
sim_mat

Unnamed: 0,Brandon,Betty,Brennan,Julie,Jennifer,John,Adam,Shreyas
Brandon,1.0,0.0,0.25,0.0,0.285714,0.111111,0.142857,0.125
Betty,0.0,1.0,0.0,0.142857,0.333333,0.125,0.0,0.333333
Brennan,0.25,0.0,1.0,0.285714,0.0,0.428571,0.142857,0.125
Julie,0.0,0.142857,0.285714,1.0,0.142857,0.285714,0.0,0.0
Jennifer,0.285714,0.333333,0.0,0.142857,1.0,0.125,0.0,0.0
John,0.111111,0.125,0.428571,0.285714,0.125,1.0,0.142857,0.125
Adam,0.142857,0.0,0.142857,0.0,0.0,0.142857,1.0,0.166667
Shreyas,0.125,0.333333,0.125,0.0,0.0,0.125,0.166667,1.0


In [45]:
#Recommendations are made based on the similarity of item. 
#That requires the id of an item to be given and returns the n most similar candidates.
n = 5
rec = skr.recommender.SimilarityRecommender(n)
rec.fit(sim_mat)

SimilarityRecommender(ascending=False, num_items=5, sort=None)

In [46]:
rec.predict(['Brennan'])

array([['John', 'Julie', 'Brandon', 'Adam', 'Shreyas']], dtype=object)