In [1]:
import pandas as pd
import numpy as np
import os
from scipy.cluster import hierarchy as hc
import matplotlib
import matplotlib.pyplot as plt
import warnings
import seaborn as sns
from scipy import stats
import math

%matplotlib inline
warnings.filterwarnings(action='ignore')

matplotlib.rcParams['font.family'] ='Malgun Gothic'
matplotlib.rcParams['axes.unicode_minus'] =False

# Accenture

## Load data

In [2]:
# dir_root = 'C:/Users/kkt71/OneDrive/바탕 화면/기업가치분석'
dir_root = 'C:/Users/10188/Desktop/기업가치/상관분석/기업가치분석_data_result'
dir_master = os.path.join(dir_root, '01. data', 'master')
dir_processed = os.path.join(dir_root, '01. data', 'processed')
dir_result = os.path.join(dir_root, '03. result')

In [3]:
company = 'Accenture'
y = '매출액증가율(YoY)(연도)'

In [4]:
dir_company = os.path.join(dir_result, company)
dir_company_y = os.path.join(dir_company, y)

In [5]:
result = pd.read_csv(os.path.join(dir_company_y, 'result.csv'))
result_sub = pd.read_csv(os.path.join(dir_company_y, 'result_sub.csv'))

## Compare : total ~ sub

### over 0.7

In [6]:
result_total_7 = pd.merge(result[np.abs(result['corr'])>0.7], result_sub[np.abs(result_sub['corr'])>0.7], 
                          on=['company', 'variable', 'group', 'importance'], how='outer',
                         suffixes = ("_total", "_sub"))
result_total_7 = result_total_7.sort_values(['corr_total'], ascending=False)

In [7]:
result_total_7 = result_total_7[['company', 'variable', 'group', 'importance', 'corr_total', 'corr_sub']]

In [8]:
result_total_7.to_csv(os.path.join(dir_company_y, 'compare_period_corr_07.csv'), index=False, encoding='utf-8-sig')

### over 0.5

In [9]:
result_total_5 = pd.merge(result[np.abs(result['corr'])>0.5], result_sub[np.abs(result_sub['corr'])>0.5], 
                          on=['company', 'variable', 'group', 'importance'], how='outer',
                         suffixes = ("_total", "_sub"))

result_total_5 = result_total_5.sort_values(['corr_total'], ascending=False)

In [10]:
result_total_5 = result_total_5[['company', 'variable', 'group', 'importance', 'corr_total', 'corr_sub']]

In [11]:
result_total_5.to_csv(os.path.join(dir_company_y, 'compare_period_corr_05.csv'), index=False, encoding='utf-8-sig')

### Save

In [12]:
writer = pd.ExcelWriter(os.path.join(dir_company_y, y+'.xlsx'), engine='xlsxwriter')
pd.concat([result, result_sub], axis=1).to_excel(writer, sheet_name='Correlation', index=False)
result_total_7.to_excel(writer, sheet_name='Correlation > 0.7', index=False)
result_total_5.to_excel(writer, sheet_name='Correlation > 0.5', index=False)
writer.save()

# Infosys

## Load data

In [13]:
# dir_root = 'C:/Users/kkt71/OneDrive/바탕 화면/기업가치분석'
dir_root = 'C:/Users/10188/Desktop/기업가치/상관분석/기업가치분석_data_result'
dir_master = os.path.join(dir_root, '01. data', 'master')
dir_processed = os.path.join(dir_root, '01. data', 'processed')
dir_result = os.path.join(dir_root, '03. result')

In [14]:
company = 'Infosys'
y = '매출액증가율(YoY)(연도)'

In [15]:
dir_company = os.path.join(dir_result, company)
dir_company_y = os.path.join(dir_company, y)

In [16]:
result = pd.read_csv(os.path.join(dir_company_y, 'result.csv'))
result_sub = pd.read_csv(os.path.join(dir_company_y, 'result_sub.csv'))

## Compare : total ~ sub

### over 0.7

In [17]:
result_total_7 = pd.merge(result[np.abs(result['corr'])>0.7], result_sub[np.abs(result_sub['corr'])>0.7], 
                          on=['company', 'variable', 'group', 'importance'], how='outer',
                         suffixes = ("_total", "_sub"))
result_total_7 = result_total_7.sort_values(['corr_total'], ascending=False)

In [18]:
result_total_7 = result_total_7[['company', 'variable', 'group', 'importance', 'corr_total', 'corr_sub']]

In [19]:
result_total_7.to_csv(os.path.join(dir_company_y, 'compare_period_corr_07.csv'), index=False, encoding='utf-8-sig')

### over 0.5

In [20]:
result_total_5 = pd.merge(result[np.abs(result['corr'])>0.5], result_sub[np.abs(result_sub['corr'])>0.5], 
                          on=['company', 'variable', 'group', 'importance'], how='outer',
                         suffixes = ("_total", "_sub"))

result_total_5 = result_total_5.sort_values(['corr_total'], ascending=False)

In [21]:
result_total_5 = result_total_5[['company', 'variable', 'group', 'importance', 'corr_total', 'corr_sub']]

In [22]:
result_total_5.to_csv(os.path.join(dir_company_y, 'compare_period_corr_05.csv'), index=False, encoding='utf-8-sig')

### Save

In [23]:
writer = pd.ExcelWriter(os.path.join(dir_company_y, y+'.xlsx'), engine='xlsxwriter')
pd.concat([result, result_sub], axis=1).to_excel(writer, sheet_name='Correlation', index=False)
result_total_7.to_excel(writer, sheet_name='Correlation > 0.7', index=False)
result_total_5.to_excel(writer, sheet_name='Correlation > 0.5', index=False)
writer.save()

# TCS

## Load data

In [24]:
# dir_root = 'C:/Users/kkt71/OneDrive/바탕 화면/기업가치분석'
dir_root = 'C:/Users/10188/Desktop/기업가치/상관분석/기업가치분석_data_result'
dir_master = os.path.join(dir_root, '01. data', 'master')
dir_processed = os.path.join(dir_root, '01. data', 'processed')
dir_result = os.path.join(dir_root, '03. result')

In [25]:
company = 'TCS'
y = '매출액증가율(YoY)(연도)'

In [26]:
dir_company = os.path.join(dir_result, company)
dir_company_y = os.path.join(dir_company, y)

In [27]:
result = pd.read_csv(os.path.join(dir_company_y, 'result.csv'))
result_sub = pd.read_csv(os.path.join(dir_company_y, 'result_sub.csv'))

## Compare : total ~ sub

### over 0.7

In [28]:
result_total_7 = pd.merge(result[np.abs(result['corr'])>0.7], result_sub[np.abs(result_sub['corr'])>0.7], 
                          on=['company', 'variable', 'group', 'importance'], how='outer',
                         suffixes = ("_total", "_sub"))
result_total_7 = result_total_7.sort_values(['corr_total'], ascending=False)

In [29]:
result_total_7 = result_total_7[['company', 'variable', 'group', 'importance', 'corr_total', 'corr_sub']]

In [30]:
result_total_7.to_csv(os.path.join(dir_company_y, 'compare_period_corr_07.csv'), index=False, encoding='utf-8-sig')

### over 0.5

In [31]:
result_total_5 = pd.merge(result[np.abs(result['corr'])>0.5], result_sub[np.abs(result_sub['corr'])>0.5], 
                          on=['company', 'variable', 'group', 'importance'], how='outer',
                         suffixes = ("_total", "_sub"))

result_total_5 = result_total_5.sort_values(['corr_total'], ascending=False)

In [32]:
result_total_5 = result_total_5[['company', 'variable', 'group', 'importance', 'corr_total', 'corr_sub']]

In [33]:
result_total_5.to_csv(os.path.join(dir_company_y, 'compare_period_corr_05.csv'), index=False, encoding='utf-8-sig')

### Save

In [34]:
writer = pd.ExcelWriter(os.path.join(dir_company_y, y+'.xlsx'), engine='xlsxwriter')
pd.concat([result, result_sub], axis=1).to_excel(writer, sheet_name='Correlation', index=False)
result_total_7.to_excel(writer, sheet_name='Correlation > 0.7', index=False)
result_total_5.to_excel(writer, sheet_name='Correlation > 0.5', index=False)
writer.save()

# Cognizant

## Load data

In [35]:
# dir_root = 'C:/Users/kkt71/OneDrive/바탕 화면/기업가치분석'
dir_root = 'C:/Users/10188/Desktop/기업가치/상관분석/기업가치분석_data_result'
dir_master = os.path.join(dir_root, '01. data', 'master')
dir_processed = os.path.join(dir_root, '01. data', 'processed')
dir_result = os.path.join(dir_root, '03. result')

In [36]:
company = 'Cognizant'
y = '매출액증가율(YoY)(연도)'

In [37]:
dir_company = os.path.join(dir_result, company)
dir_company_y = os.path.join(dir_company, y)

In [38]:
result = pd.read_csv(os.path.join(dir_company_y, 'result.csv'))
result_sub = pd.read_csv(os.path.join(dir_company_y, 'result_sub.csv'))

## Compare : total ~ sub

### over 0.7

In [39]:
result_total_7 = pd.merge(result[np.abs(result['corr'])>0.7], result_sub[np.abs(result_sub['corr'])>0.7], 
                          on=['company', 'variable', 'group', 'importance'], how='outer',
                         suffixes = ("_total", "_sub"))
result_total_7 = result_total_7.sort_values(['corr_total'], ascending=False)

In [40]:
result_total_7 = result_total_7[['company', 'variable', 'group', 'importance', 'corr_total', 'corr_sub']]

In [41]:
result_total_7.to_csv(os.path.join(dir_company_y, 'compare_period_corr_07.csv'), index=False, encoding='utf-8-sig')

### over 0.5

In [42]:
result_total_5 = pd.merge(result[np.abs(result['corr'])>0.5], result_sub[np.abs(result_sub['corr'])>0.5], 
                          on=['company', 'variable', 'group', 'importance'], how='outer',
                         suffixes = ("_total", "_sub"))

result_total_5 = result_total_5.sort_values(['corr_total'], ascending=False)

In [43]:
result_total_5 = result_total_5[['company', 'variable', 'group', 'importance', 'corr_total', 'corr_sub']]

In [44]:
result_total_5.to_csv(os.path.join(dir_company_y, 'compare_period_corr_05.csv'), index=False, encoding='utf-8-sig')

### Save

In [45]:
writer = pd.ExcelWriter(os.path.join(dir_company_y, y+'.xlsx'), engine='xlsxwriter')
pd.concat([result, result_sub], axis=1).to_excel(writer, sheet_name='Correlation', index=False)
result_total_7.to_excel(writer, sheet_name='Correlation > 0.7', index=False)
result_total_5.to_excel(writer, sheet_name='Correlation > 0.5', index=False)
writer.save()