# Ranking and Recommendation Model
Advanced Fitering System

Developed by: Kariyawasam K.G.S.S.K

# Required Libraries

In [None]:
import pandas as pd
import numpy as np
import os
import pickle
import warnings
import math 

from matplotlib import pyplot as plt
from pandas import DataFrame, Series
from math import *
from scipy.spatial import distance
from decimal import Decimal

warnings.filterwarnings('ignore')

status_data=pd.read_csv("RRMDataSet.csv",sep=",")
status_data

# Preprocessor

In [None]:
df = status_data.loc[status_data['cOPN'] == 1]
df.head(30)

In [None]:
def multiply_s1(x):
    return x * 0.7

df['cSkillScore'] = df['cSkillScore'].apply(multiply_s1)
df.head(5)


In [None]:
def multiply_s2(x):
    return x * 0.3

df['cLanUsageScore'] = df['cLanUsageScore'].apply(multiply_s2)
df.head(5)

In [None]:
S_cols = ['cSkillScore', 'cLanUsageScore']

df['S_Compound'] = df[S_cols].sum(axis=1)
df.head(5)

In [None]:
def multply(x):
    return x * (-1)

df['cNEU'] = df['cNEU'].apply(multply)
df.head(5)

In [None]:
P_cols = ['cOPN', 'cCON','cEXT', 'cAGR', 'cNEU']

df['P_Compound'] = df[P_cols].sum(axis=1)
df.head(5)

In [None]:
def P_multiply(x):
    return x * 20

df['P_Compound'] = df['P_Compound'].apply(P_multiply)
df.head(5)

In [None]:
# Drop NAs
status_data = status_data.dropna()

# We drop columns which give us a score for personality type
df = df.drop(['cSkillScore','cLanUsageScore'], axis=1)

In [None]:
df.head(10)

In [None]:
def C1_multiply(x):
    return x * 0.25 

df['P_Compound'] = df['P_Compound'].apply(C1_multiply)
df.head(5)

In [None]:
def C2_multiply(x):
    return x * 0.75 

df['S_Compound'] = df['S_Compound'].apply(C2_multiply)
df.head(100)

# Data Visualization

In [None]:
L_Count = status_data['Language_type'].value_counts()
print(L_Count)

In [None]:
Languages = ['Java', 'Python', 'React']
L_df = pd.DataFrame({'L_Count': L_Count,
                         'Languages': Languages})

ind = np.arange(len(L_df))
width = 0.4

fig, ax = plt.subplots(figsize=(8, 6), dpi=60)
ax.barh(ind, L_df.L_Count, width, label='Language Count')
ax.set(yticks=ind + width, yticklabels=L_df.Languages, ylim=[width - 1, len(L_df)])
ax.legend(bbox_to_anchor=(1.35, 0.9), loc='upper right', borderaxespad=0)
ax.bar_label(ax.containers[0], label_type='edge')
plt.title('Distribution of Lanuages')
plt.show()

In [None]:
cOPN_counts = status_data['cOPN'].value_counts()
print(cOPN_counts)

cCON_counts = status_data['cCON'].value_counts()
print(cCON_counts)

cEXT_counts = status_data['cEXT'].value_counts()
print(cEXT_counts)

cAGR_counts = status_data['cAGR'].value_counts()
print(cAGR_counts)

cNEU_counts = status_data['cNEU'].value_counts()
print(cNEU_counts)

In [None]:
personalities = ['Openness', 'Conscientiousness', 'Extraversion', 'Aggreableness', 'Neuroticism']
with_RelatedPersonality_counts = [cOPN_counts[1], cCON_counts[1], cEXT_counts[1], cAGR_counts[1], cNEU_counts[1]]
without_RelatedPersonality_counts = [cOPN_counts[0], cCON_counts[0], cEXT_counts[0], cAGR_counts[0], cNEU_counts[0]]

traits_df = pd.DataFrame({'with_RelatedPersonality_counts': with_RelatedPersonality_counts, 
                              'without_RelatedPersonality_counts': without_RelatedPersonality_counts,
                              'personalities': personalities})
ind = np.arange(len(traits_df))
width = 0.4

fig, ax = plt.subplots(figsize=(10, 7), dpi=60)
ax.barh(ind, traits_df.without_RelatedPersonality_counts, width, label='Without Related Personality')
ax.barh(ind + width, traits_df.with_RelatedPersonality_counts, width, label='With Related Personality')
ax.set(yticks=ind + width, yticklabels=traits_df.personalities, ylim=[2*width - 1, len(traits_df)])
ax.legend(bbox_to_anchor=(1.35, 0.9), loc='upper right', borderaxespad=0)
ax.bar_label(ax.containers[0], label_type='edge')
ax.bar_label(ax.containers[1], label_type='edge')
plt.title('Distribution of Big-Five Personalities')
plt.show()

In [None]:
df.plot(
    x = 'S_Compound',
    y = 'P_Compound',
    kind = 'scatter')
plt.show()

# Euclidean distance 

In [None]:
# initializing points in
# numpy arrays
x1 = 75
y1 = 25

x2= df.S_Compound.values
y2= df.P_Compound.values

d = np.square(x1 - x2) + np.square(y1 - y2)
distance = np.sqrt(d)

# print Euclidean distance 
print(distance)

 

df['E_distance'] = distance

df.head(10)

In [None]:
df.sort_values(by=['E_distance'])

# Manhattan Distance 

In [None]:
# initializing points in

def manhattan(a, b):
    return sum(abs(val1-val2) for val1, val2 in zip(a,b))

x1 = 75
y1 = 25

x2= df.S_Compound.values
y2= df.P_Compound.values

A = [x1,y1]
B = [x2,y2]

distance = manhattan(A, B)

# print Euclidean distance 
print(distance)

 

df['M_distance'] = distance
df.head(10)

In [None]:
df.sort_values(by=['M_distance'])

# Hamming Distance

In [None]:
def hamming_distance(a, b):
    return sum(abs(e1 - e2) for e1, e2 in zip(a, b)) / len(a)

x1 = 75
y1 = 25

x2= df.S_Compound.values
y2= df.P_Compound.values

A = [x1,y1]
B = [x2,y2]

distance = hamming_distance(A, B)

# print Euclidean distance 
print(distance)

df['H_distance'] = distance
df.head(10)

In [None]:
df.sort_values(by=['H_distance'])