In [1]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from collections import Counter
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import matplotlib.pyplot as plt
import matplotlib.transforms
import matplotlib as mpl
%matplotlib inline

from matplotlib.font_manager import FontProperties
fontP = FontProperties()
fontP.set_size('xx-small')
mpl.rcParams['figure.dpi'] = 300

import warnings
warnings.filterwarnings("ignore")

In [2]:
df_p = pd.read_csv('top_fss_p.csv')
df_n = pd.read_csv('top_fss_n.csv')

df_p.drop(columns='Unnamed: 0', inplace=True)
df_n.drop(columns='Unnamed: 0', inplace=True)

In [3]:
def scoring(table, col):
    rg = table[col].max()-table[col].min()
    
    scr = []
    for i in range(20):
        a = (table[col][i]-table[col].min()) / rg 
        if a > 0.8:
            b = 5
        elif (a <= 0.8) and (a > 0.6):
            b = 4
        elif (a <= 0.6) and (a > 0.4):
            b = 3
        elif (a <= 0.4) and (a > 0.2):
            b = 2
        else:
            b = 1
        scr.append(b)
        
    return(scr)

In [4]:
df_p['c1_fr'] = scoring(df_p, 'frequency')
df_p['c2_fr'] = scoring(df_p, 'frequency.1')
df_p['c3_fr'] = scoring(df_p, 'frequency.2')
df_p['c4_fr'] = scoring(df_p, 'frequency.3')
df_p['c5_fr'] = scoring(df_p, 'frequency.4')

df_p['c1_st'] = scoring(df_p, 'strength')
df_p['c2_st'] = scoring(df_p, 'strength.1')
df_p['c3_st'] = scoring(df_p, 'strength.2')
df_p['c4_st'] = scoring(df_p, 'strength.3')
df_p['c5_st'] = scoring(df_p, 'strength.4')

df_p['score'] = df_p['c1_fr'] * df_p['c1_st']
df_p['score.1'] = df_p['c2_fr'] * df_p['c2_st']
df_p['score.2'] = df_p['c3_fr'] * df_p['c3_st']
df_p['score.3'] = df_p['c4_fr'] * df_p['c4_st']
df_p['score.4'] = df_p['c5_fr'] * df_p['c5_st']

In [5]:
df_p2 = df_p[['feature', 'frequency', 'strength', 'score',
              'feature.1', 'frequency.1', 'strength.1', 'score.1',
              'feature.2', 'frequency.2', 'strength.2', 'score.2',
              'feature.3', 'frequency.3', 'strength.3', 'score.3',
              'feature.4', 'frequency.4', 'strength.4', 'score.4']]

df_p3 = df_p2[['feature', 'score',
               'feature.1', 'score.1',
               'feature.2', 'score.2',
               'feature.3', 'score.3',
               'feature.4', 'score.4']]

df_p4 = pd.concat([df_p3[['feature', 'score']].sort_values(by='score', ascending=False).reset_index(drop=True).copy(),
                   df_p3[['feature.1', 'score.1']].sort_values(by='score.1', ascending=False).reset_index(drop=True).copy(),
                   df_p3[['feature.2', 'score.2']].sort_values(by='score.2', ascending=False).reset_index(drop=True).copy(),
                   df_p3[['feature.3', 'score.3']].sort_values(by='score.3', ascending=False).reset_index(drop=True).copy(),
                   df_p3[['feature.4', 'score.4']].sort_values(by='score.4', ascending=False).reset_index(drop=True).copy()],
                   axis=1)

df_p4.to_excel('feature_score_p.xlsx')
df_p4.head()

Unnamed: 0,feature,score,feature.1,score.1,feature.2,score.2,feature.3,score.3,feature.4,score.4
0,great/sound,20,great/sound,20,great/sound,20,great/sound,25,great/sound,20
1,good/sound,15,great/quality,10,great/quality,10,great/quality,10,good/sound,12
2,good/quality,12,good/sound,8,good/sound,8,good/quality,8,good/quality,9
3,great/quality,10,good/quality,6,good/quality,6,good/sound,6,excellent/sound,5
4,great/price,10,great/life,5,great/battery,5,great/battery,5,awesome/sound,5


In [6]:
df_n['c1_fr'] = scoring(df_n, 'frequency')
df_n['c2_fr'] = scoring(df_n, 'frequency.1')
df_n['c3_fr'] = scoring(df_n, 'frequency.2')
df_n['c4_fr'] = scoring(df_n, 'frequency.3')
df_n['c5_fr'] = scoring(df_n, 'frequency.4')

df_n['c1_st'] = scoring(df_n, 'strength')
df_n['c2_st'] = scoring(df_n, 'strength.1')
df_n['c3_st'] = scoring(df_n, 'strength.2')
df_n['c4_st'] = scoring(df_n, 'strength.3')
df_n['c5_st'] = scoring(df_n, 'strength.4')

df_n['score'] = df_n['c1_fr'] * df_n['c1_st']
df_n['score.1'] = df_n['c2_fr'] * df_n['c2_st']
df_n['score.2'] = df_n['c3_fr'] * df_n['c3_st']
df_n['score.3'] = df_n['c4_fr'] * df_n['c4_st']
df_n['score.4'] = df_n['c5_fr'] * df_n['c5_st']

In [7]:
df_n2 = df_n[['feature', 'frequency', 'strength', 'score',
              'feature.1', 'frequency.1', 'strength.1', 'score.1',
              'feature.2', 'frequency.2', 'strength.2', 'score.2',
              'feature.3', 'frequency.3', 'strength.3', 'score.3',
              'feature.4', 'frequency.4', 'strength.4', 'score.4']]

df_n3 = df_n2[['feature', 'score',
               'feature.1', 'score.1',
               'feature.2', 'score.2',
               'feature.3', 'score.3',
               'feature.4', 'score.4']]

df_n4 = pd.concat([df_n3[['feature', 'score']].sort_values(by='score', ascending=False).reset_index(drop=True).copy(),
                   df_n3[['feature.1', 'score.1']].sort_values(by='score.1', ascending=False).reset_index(drop=True).copy(),
                   df_n3[['feature.2', 'score.2']].sort_values(by='score.2', ascending=False).reset_index(drop=True).copy(),
                   df_n3[['feature.3', 'score.3']].sort_values(by='score.3', ascending=False).reset_index(drop=True).copy(),
                   df_n3[['feature.4', 'score.4']].sort_values(by='score.4', ascending=False).reset_index(drop=True).copy()],
                   axis=1)

df_n4.to_excel('feature_score_n.xlsx')
df_n4.head()

Unnamed: 0,feature,score,feature.1,score.1,feature.2,score.2,feature.3,score.3,feature.4,score.4
0,poor/quality,20,poor/quality,20,poor/quality,16,poor/quality,20,poor/quality,20
1,poor/sound,12,less/year,10,bad/battery,10,poor/sound,16,low/volume,10
2,terrible/sound,10,low/battery,9,bad/sound,9,less/month,15,useless/phone,5
3,bad/sound,9,bad/sound,8,terrible/sound,8,bad/quality,12,back/song,5
4,bad/quality,8,dead/battery,8,poor/sound,8,low/volume,10,standard/song,5
