# 05_Visualization

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Define File Paths

In [None]:
fname_result = 'data/result_both.pkl'

### Read the Result

In [None]:
df_result = pd.read_pickle(fname_result)

In [None]:
df_result = df_result.reindex( ['doc_id', 'fname', 'sentence', 'label', 'score',
                                'goal01', 'goal02', 'goal03', 'goal04', 'goal05', 'goal06', 'goal07',
                                'goal08', 'goal09', 'goal10', 'goal11', 'goal12', 'goal13', 'goal14',
                                'goal15', 'goal16', 'goal17'], axis=1)

In [None]:
df_result.columns

## 1) Sentence Similarity

### Convert the Score Range

In [None]:
col_list1 = list(df_result.columns[:5])
col_list2 = list(df_result.columns[5:])

In [None]:
MAX = df_result[col_list2].max().max()
MAX

In [None]:
MIN = df_result[col_list2].min().min()
MIN

In [None]:
def func(x):
    return (x - MIN) * 100 / (MAX - MIN)

In [None]:
df_result_s = pd.concat([df_result[col_list1], df_result[col_list2].applymap(func)], axis=1)

### Create New Columns: Company & Year

In [None]:
df_result_s['company'] = df_result_s['fname'].str.split('_').apply(lambda x: x[0])

In [None]:
df_result_s['year'] = df_result_s['fname'].str.split(r'_|\(|\-|\.').apply(lambda x: x[1])

In [None]:
df_result_s.round(2)

### Create List: Company & Year

In [None]:
# Company
comp_list = df_result_s['company'].unique().tolist()
# Year
year_list = df_result_s['year'].unique().tolist()

In [None]:
print(comp_list)
print(year_list)

### Average Score: by Company for 17 SDGs

In [None]:
df_comp = df_result_s.groupby(['company', 'year']).mean()
df_comp.drop('doc_id', axis=1, inplace=True)

In [None]:
df_comp.round(2)

### Average Score: by Company for 6 Categories

In [None]:
category_dic = {'goal01':'Life', 'goal02':'Life', 'goal03':'Life',
                'goal04':'Equity', 'goal05':'Equity', 'goal10':'Equity',
                'goal06':'Resources', 'goal07':'Resources', 'goal12':'Resources', 'goal14':'Resources',
                'goal08':'Economic', 'goal09':'Economic',
                'goal11':'Social', 'goal16':'Social', 'goal17':'Social',
                'goal13':'Environments', 'goal15':'Environments'}

In [None]:
cat_list = ['Life', 'Economic', 'Equity', 'Social', 'Resources', 'Environments']

In [None]:
df_comp_T = df_comp.drop('score',axis=1).T.copy()
df_comp_T.reset_index(inplace=True)
df_comp_T['category'] = df_comp_T['index'].map(category_dic)
df_comp_T = df_comp_T._drop_axis('index', axis=1, level=0)

In [None]:
df_comp_c = df_comp_T.groupby('category').mean().reindex(cat_list).T
df_comp_c

In [None]:
from matplotlib import rcParams
rcParams['font.size'] = 80
plt.rc('figure', figsize=(100, 50))

In [None]:
company = 'BASF'

df_comp_c.loc[company].plot(marker='o', markersize = 60, linewidth = 12, legend = None)
plt.title(f'{company}', fontsize = 150)
plt.xlabel('')
plt.xticks(np.arange(10), year_list)
plt.grid(True)
plt.show()

## 2) Sentiment Analsysis

In [None]:
df_sentiment = df_result_s[['company','year','label','score']].copy()
df_sentiment

### Positive & Negative Ratio

In [None]:
df_comp_cnt = df_sentiment.groupby(['company','year'])['label'].apply(lambda x:x.value_counts()).unstack()
df_comp_cnt['Ratio'] = (df_comp_cnt['Positive'] / df_comp_cnt['Negative']).round(2)
df_comp_cnt

In [None]:
df_comp_ratio = df_comp_cnt['Ratio'].unstack(level=0)
df_comp_ratio

In [None]:
# 컬럼 순서 변경 (비율이 커지는 순서로)
df_comp_ratio = df_comp_ratio[['BASF', 'MS', 'IKEA', 'Walmart', 'Nestle', 'Toyota']]
df_comp_ratio

In [None]:
from matplotlib import rcParams
rcParams['font.size'] = 80
plt.rc('figure', figsize=(100, 50))
rcParams['axes.unicode_minus'] = False
plt.style.use('tableau-colorblind10')

In [None]:
df_comp_ratio.plot(marker='o', markersize = 60, linewidth = 12, legend = None)
plt.grid(True)
plt.ylim([0.5, 7])
plt.xlabel('')
plt.xticks(np.arange(10), year_list)
plt.show()

---

In [None]:
# End of file