In [None]:
from sklearn import preprocessing
import numpy as np
import pandas as pd
from collections import defaultdict

In [None]:
#import data
fields = ['toolkit','forks','stars','so_tag_counts','so_question_count','search_results', 'growth_rate']
DF = pd.read_csv('../output/DB_data.csv', usecols=fields)

In [None]:
#replace not a number values with 0
DF.fillna(0, inplace=True)

In [None]:
#scale the values, such that for each metric: mean = 0 and std = 1
metrics = ['forks','stars','so_tag_counts','so_question_count','search_results', 'growth_rate']
scaled_DF = pd.DataFrame(columns=metrics)

for metric in metrics:
    mean = DF[metric].mean()
    std = DF[metric].std()
    scaled_DF[metric] = DF[metric].apply(lambda x: (x-mean)/std)

scaled_DF['toolkit']= DF['toolkit']

In [None]:
#merge forks and stars to one GitHub metric, Tags and Questions to Stack Overflow, and scale
final_DF = pd.DataFrame(columns=['Toolkit', 'Rank', 'Overall', 'Github', 'Stack Overflow', 'Google Results'])
final_DF['Library'] = scaled_DF['toolkit']
final_DF['Github'] = scaled_DF[['forks', 'stars']].mean(axis=1)
final_DF['Stack Overflow'] = scaled_DF[['so_tag_counts', 'so_question_count']].mean(axis=1)
final_DF['Google Results'] = scaled_DF[['search_results', 'growth_rate']].mean(axis=1)

#compute overall score and rank
final_DF['Overall'] = final_DF[['Github','Stack Overflow','Google Results']].sum(axis=1)
final_DF['Rank'] = final_DF['Overall'].rank(ascending=0).astype(int)

In [None]:
#sort, reindex, and set style of rankings table
final_DF.sort_values(['Overall'], axis=0, ascending=False, inplace=True)
final_DF = final_DF.reindex_axis(['Library','Rank','Overall','Github','Stack Overflow','Google Results'], axis=1)
try:
    final_DF.style.set_table_styles([
        {'selector': '.row_heading, .blank', 'props': [('display', 'none;')]}
    ])
except:
    pass

In [None]:
#save rankings table to csv file
final_DF.to_csv('../output/DB_final_Rankings.csv', index = False)