### Import Libraries & Dataset

In [1]:
# Installing libraries
%pip install -q ipywidgets

In [2]:
## Import libraries
import pandas as pd
from ast import literal_eval
import ipywidgets as widgets
from IPython.display import clear_output

In [3]:
# path='https://raw.githubusercontent.com/sakshamsneh/sem4Proj/main/vectornew.csv'
# download = requests.get(path).content
# vectordf=pd.read_csv(io.StringIO(download.decode('utf-8')), index_col=0)
vectordf=pd.read_csv('data/vectornew.csv', index_col=0)
vectordf.repo=vectordf.repo.fillna('[]').replace('"', "'").apply(lambda x: literal_eval(x) if type(x)==str else x)
vectordf.lang=vectordf.lang.fillna('[]').replace('"', "'").apply(lambda x: literal_eval(x) if type(x)==str else x)
vectordf.langdict=vectordf.langdict.fillna('{}').replace('"', "'").apply(lambda x: literal_eval(x) if type(x)==str else x)
nvectordf=vectordf.copy(deep=True)
vectordf['exp']=0.0
vectordf['rank']=0.0
cols=['repocount','gistcount','contributioncount','followerscount','followingcount','langcount']
nvectordf[cols]=nvectordf[cols].fillna(0).apply(lambda x: (x - x.min()) / (x.max() - x.min()))

### Define Functions & Variables

In [4]:
def getnodeweight(r, feature_weights):
	weight=0.0
	for k, v in feature_weights.items():
		weight+=r[k]*v
	weight/=len(feature_weights)
	return weight

In [5]:
def getexp(r):
    totaldays=r.totaldays
    langdict=r.langdict
    suml=0
    if r.langcount==0:  return 0.0
    langd=list(langdata.value)
    res=1
    if len(langd)!=0:
        for lang in langd:
            suml+=langdict.get(lang,0)
        res=suml/sum(langdict.values())
    res=res*totaldays*r.pr*0.85
    return res+(r.weight)

In [6]:
langlist=[l for lang in vectordf.lang for l in lang]
langlist=list(set(langlist))
langlist.sort()

In [7]:
heading=widgets.HTML(
    value="<H1>Data Filter</H1>",
)
limit=widgets.IntSlider(
    value=0,
    min=1,
    max=20,
    step=1,
    description='Rows Count',
    tooltip='Rows Count',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
)
repoweight=widgets.FloatSlider(min=0, max=1, step=0.01, value=1.0, description='Repo', orientation='vertical', readout=True, tooltip='Repo Weight')
gistweight=widgets.FloatSlider(min=0, max=1, step=0.01, value=1.0, description='Gist', orientation='vertical', readout=True, tooltip='Gist Weight')
contributionweight=widgets.FloatSlider(min=0, max=1, step=0.01, value=1.0, description='Contribution', orientation='vertical', readout=True, tooltip='Contribution Weight')
langweight=widgets.FloatSlider(min=0, max=1, step=0.01, value=1.0, description='Lang', orientation='vertical', readout=True, tooltip='Language Weight')
followerweight=widgets.FloatSlider(min=0, max=1, step=0.01, value=1.0, description='Follower', orientation='vertical', readout=True, tooltip='Follower Weight')
followingweight=widgets.FloatSlider(min=0, max=1, step=0.01, value=1.0, description='Following', orientation='vertical', readout=True, tooltip='Following Weight')
minexp=widgets.FloatSlider(min=0.5, max=11, step=0.01, value=0.5, description='Minimum Experience', orientation='horizontal', readout=True, tooltip='Minimum Experience in years')
langdata=widgets.SelectMultiple(
    options=langlist,
    value=[],
    rows=5,
    description='Languages',
    tooltip='Languages',
    disabled=False
)
sortallow=widgets.Checkbox(
    value=False,
    description='Desc Date Sort',
    disabled=False,
    indent=False
)
button=widgets.Button(
    description='Get Data',
    disabled=False,
    button_style='primary',
    tooltip='Click here to get Filtered Data',
    icon='table',
)
clearbutton=widgets.Button(
    description='Clear All',
    disabled=False,
    button_style='danger',
    tooltip='Clear All',
    icon='trash'
)
out = widgets.Output()

In [8]:
def filterdata(_):
	feature_weights={'repocount':repoweight.value,'gistcount':gistweight.value,'contributioncount':contributionweight.value,
	                 'langcount':langweight.value,'followerscount':followerweight.value,'followingcount':followingweight.value}
	filtereddf=vectordf[vectordf.lang.apply(lambda r: set(list(langdata.value)).issubset(set(r)))].copy()
	filtereddf=filtereddf[filtereddf.totaldays>minexp.value].copy()
	if filtereddf.shape[0]!=0:
		nfiltereddf=nvectordf[nvectordf.lang.apply(lambda r: set(list(langdata.value)).issubset(set(r)))].copy()
		nfiltereddf['weight']=nfiltereddf.apply(lambda row: getnodeweight(row, feature_weights), axis=1)
		filtereddf['weight']=nfiltereddf['weight']
		filtereddf['exp']=filtereddf.apply(lambda row: getexp(row), axis=1)
		filtereddf['rank'] = filtereddf['exp'].rank(method='average', ascending=False)
		filtereddf.sort_values(by=['rank'], ascending=[True], inplace=True)
	with out:
		clear_output()
		if len(list(langdata.value))!=0:
			print('Selected Languages:', list(langdata.value))
		else:	print('Selected Languages: Any')
		if filtereddf.shape[0]!=0:
			limit.max=filtereddf.shape[0]
		if limit.value>filtereddf.shape[0]:
			limit.value=filtereddf.shape[0]
		if filtereddf.shape[0]==0:
			print('No rows left.')
		else:
			print('Displaying '+str(limit.value)+' rows out of '+str(filtereddf.shape[0])+' rows.')
			filtereddf=filtereddf.head(limit.value)
			if sortallow.value:
				filtereddf.sort_values(['created_at'], ascending=[True], inplace=True)
			pd.set_option('display.max_rows', None)
			display(filtereddf[['id','dev','created_at','repocount','gistcount','contributioncount','followerscount','followingcount','langcount','rank']])

In [9]:
def cleardata(_):
    langdata.value=[]
    limit.max=20
    limit.value=1
    minexp.value=0.5
    repoweight.value=1.0
    gistweight.value=1.0
    contributionweight.value=1.0
    langweight.value=1.0
    followerweight.value=1.0
    followingweight.value=1.0
    sortallow.value=False
    out.clear_output()

In [10]:
button.on_click(filterdata)
clearbutton.on_click(cleardata)

In [11]:
style="""
    <style>
        .boxdesign {margin: 5px; border:1px solid black; padding-left: 15px}
        .checkboxdesign {padding-left: 12px}
    </style>
    """
display(widgets.HTML(style))

HTML(value='\n    <style>\n        .boxdesign {margin: 5px; border:1px solid black; padding-left: 15px}\n     …

In [12]:
vbox=widgets.HBox([repoweight,gistweight,contributionweight,langweight,followerweight,followingweight])
hbox=widgets.HBox([button, clearbutton])
box = widgets.VBox(
    [heading, langdata, vbox, limit, minexp, sortallow.add_class('checkboxdesign'), hbox, out]
)

### WebApp

In [13]:
box.add_class('boxdesign')

VBox(children=(HTML(value='<H1>Data Filter</H1>'), SelectMultiple(description='Languages', options=('1c enterp…

In [14]:
# import time
# time.sleep(1800)