# <center>Gathering Supply Chain Data<center>

- Date: March 2019
- Author: Minsu Yeom, CFA, FRM

In [1]:
import os
import numpy as np
import pandas as pd
import bamboolib as bam
import xlwings as xw
from datetime import date
from mlfinance.utils.misc import get_letter, get_last_bday

In [2]:
# Reload all modules (except those excluded by %aimport) every time before executing the Python code typed.
%load_ext autoreload
%autoreload 2

# 1. Loading Equity Universe

In [3]:
universe = pd.read_table('./mlfinance/dataset/eqy_universe_gt50mm.txt',
                         header=1,
                         names=[
                             'Identifier', 'Name', 'Revenue', 'Company Type',
                             'Business Description', 'FactSet Industry',
                             'Crunchbase Category(BETA)',
                             'Crunchbase Rank(BETA)', 'Ultimate Parent Name',
                             'Fiscal Year End', 'Country', 'Website'
                         ], 
                         parse_dates = ['Fiscal Year End']
                        )

#### Change dtypes properly

In [4]:
universe.loc[:, 'Revenue'] = pd.to_numeric(universe.loc[:,'Revenue'], errors='coerce')
universe.loc[:, 'Fiscal Year End'] = pd.to_datetime(universe.loc[:, 'Fiscal Year End'], errors='coerce')

In [5]:
universe.dtypes

Identifier                           object
Name                                 object
Revenue                             float64
Company Type                         object
Business Description                 object
FactSet Industry                     object
Crunchbase Category(BETA)            object
Crunchbase Rank(BETA)                object
Ultimate Parent Name                 object
Fiscal Year End              datetime64[ns]
Country                              object
Website                              object
dtype: object

#### Setting the minimum market cap criteria in USD mm
- Reset the index.

In [6]:
min_market_cap = 200
universe = universe.loc[universe.loc[:, 'Revenue'] >= min_market_cap, :]
universe = universe.reset_index(drop = True)

### Missing value analysis

- Looks all good.

In [7]:
universe.isna().sum()

Identifier                     0
Name                           0
Revenue                        0
Company Type                   0
Business Description           0
FactSet Industry               0
Crunchbase Category(BETA)      0
Crunchbase Rank(BETA)          0
Ultimate Parent Name           0
Fiscal Year End                1
Country                        0
Website                      127
dtype: int64

#### Launch Excel with making it sure FastSet add-ins enabled
- `fdswFixExcel.exe` enables the add-ins.
- `retcode`: 0 if runs fine. 1 if any error.

In [8]:
import subprocess
retcode = subprocess.run(['C:/Program Files (x86)/FactSet/fdswFixExcel.exe'])

In [9]:
# subprocess.CompletedProcess.check_returncode

#### Load company tickers

In [9]:
# # tickers = ['AAPL-US', 'GOOGL-US', 'AMZN-US']
# tickers = universe.loc[0:999, 'Identifier'].to_list()

In [10]:
# len(tickers)

#### Set an Excel header

In [11]:
header_meta = ['date', 'ticker',  'relationship']
header_fcst = ['rel_comp_tic', 'rel_comp_nm', 'no_overlap', 'pct_overlap', 'revenue_dependence', 'source', 'prtr_rel']
header = header_meta + header_fcst

#### Set Excel column names and Excel ranges for a relationship data set

Column names

In [12]:
# Excel column names for meta data
meta_col = {}
for i, col_nm in enumerate(header_meta):
    meta_col[col_nm] = get_letter(i)

# Excel column names for FactSet data to be downloaded
fcst_start_col = get_letter(len(header_meta))
fcst_end_col = get_letter(len(header_fcst)-1, letter = fcst_start_col)

The whole data range

In [13]:
per_company_rows = 500
batch_sz = 10
data_height = batch_sz * per_company_rows
data_range = 'A1:' + get_letter(len(header)-1) + str(data_height)

#### FactSet settings

In [14]:
#wb=xw.books

# SUPL: Suppliers. 
# CUST: Customers
# PRTR: Partners
# COMP: Competitors
# It's FactSet's codes.
relationships = ['SUPL', 'CUST', 'PRTR', 'COMP']

# =FDS("GOOGL-US", "FF_COMPANY_RELATIONSHIP(SUPL,PUB,Ticker,ALL,ALL)")
fds_fn = '", "TRANSPOSE(FF_COMPANY_RELATIONSHIP('
fds_param = ',PUB,Ticker,ALL,ALL))")'

#### Load company relationship data from FactSet and write it in an Excel workbook by
- Writing `=FDS()` directly an Excel sheet named as in `relationships`
- Allocating `rows` rows for each company

In [15]:
def get_company_relationships(tickers):
    ws = {}
    fcst_ranges = {}

    # Iterate over ['SUPL', 'CUST', 'PRTR', 'COMP']
    for i, rel in enumerate(relationships):
        ws[rel] = xw.sheets.add(rel)    # Create a sheet per relationship

        # Load company relationship data and write it in the workbook.
        for j, ticker in enumerate(tickers):
            fcst_ranges[ticker] = '{:s}{:d}:{:s}{:d}'.format(fcst_start_col, j*per_company_rows+1, fcst_end_col, (j+1)*per_company_rows)
            ws[rel].range(fcst_ranges[ticker]).formula_array = '=FDSR("' + ticker + fds_fn + rel + fds_param
            ticker_col = get_letter(header_meta.index('ticker'))
            ticker_range = '{:s}{:d}:{:s}{:d}'.format(ticker_col, j*per_company_rows+1, ticker_col, (j+1)*per_company_rows)
            ws[rel].range(ticker_range).value = ticker


        for meta_type, col_nm in meta_col.items():
            meta_range = '{:s}{:d}:{:s}{:d}'.format(meta_col[meta_type], 1, meta_col[meta_type], data_height)
            meta_value = get_last_bday() if meta_type == 'date' else rel if meta_type == 'relationship' else '-'

            # We don't write `ticker` here because it's already filled.
            if meta_type != 'ticker':
                ws[rel].range(meta_range).value = meta_value
    
    return ws

#### Extract data from each sheet in `ws` and merge it into `df`
- We also do: resetting the index and setting the column names.

In [16]:
universe

Unnamed: 0,Identifier,Name,Revenue,Company Type,Business Description,FactSet Industry,Crunchbase Category(BETA),Crunchbase Rank(BETA),Ultimate Parent Name,Fiscal Year End,Country,Website
0,DRI-DE,1&1 Drillisch AG,4320.78600,Public Company,1&1 Drillisch AG provides telecommunication se...,Specialty Telecommunications,-,-,United Internet AG,2018-12-31,Germany,http://www.1und1-drillisch.de
1,MOBIL-NG,11 Plc,455.41678,Public Company,11 Plc operates as an oil and gas distribution...,Oil Refining/Marketing,-,-,NIPCO Plc,2018-12-31,Nigeria,http://www.11plc.com/
2,2GB-DE,2G Energy AG,247.49089,Public Company,"2G Energy AG operates as a holding company, wh...",Electric Utilities,-,-,2G Energy AG,2018-12-31,Germany,http://2-g.de
3,III-GB,3i Group plc,506.57350,Public Company,3i Group Plc operates as a private equity and ...,Investment Managers,-,-,3i Group plc,2019-03-31,United Kingdom,http://3igroup.com
4,FOUR-GB,4imprint Group plc,860.84400,Public Company,4imprint Group Plc engages in the provision of...,Advertising/Marketing Services,-,-,4imprint Group plc,2019-12-28,United Kingdom,http://4imprint.com
...,...,...,...,...,...,...,...,...,...,...,...,...
14230,ZM-US,"Zoom Video Communications, Inc.",622.65800,Public Company,"Zoom Video Communications, Inc. engages in the...",Packaged Software,-,-,"Zoom Video Communications, Inc.",2020-01-31,United States,http://www.zoom.us
14231,ZS-US,"Zscaler, Inc.",302.83600,Public Company,"Zscaler, Inc. engages in the provision of clou...",Packaged Software,-,-,"Zscaler, Inc.",2019-07-31,United States,http://www.zscaler.com
14232,ZUMZ-US,Zumiez Inc.,1034.12900,Public Company,"Zumiez, Inc. engages in retailing apparel, foo...",Apparel/Footwear Retail,-,-,Zumiez Inc.,2020-02-01,United States,http://www.zumiez.com
14233,ZUO-US,"Zuora, Inc.",276.05700,Public Company,"Zuora, Inc. operates as an online subscription...",Packaged Software,-,-,"Zuora, Inc.",2020-01-31,United States,http://www.zuora.com


In [17]:
ws_list = []
for idx in range(0, 30, batch_sz):
    print(idx, end = ' ')
    tickers = universe.loc[idx:idx+batch_sz-1, 'Identifier'].to_list()
#     tickers = df2.loc[idx:idx+batch_sz-1, 'ticker'].to_list()
    ws = get_company_relationships(tickers)
    
    for sheet in ws.keys():
        ws_list.append(pd.DataFrame(ws[sheet].range(data_range).value))
#         ws[sheet].delete()

df = pd.concat(ws_list)
df = df.reset_index(drop = True)
df.columns = header

0 10 

ValueError: Sheet named 'SUPL' already present in workbook

In [19]:
df

Unnamed: 0,date,ticker,relationship,rel_comp_tic,rel_comp_nm,no_overlap,pct_overlap,revenue_dependence,source,prtr_rel
0,2020-03-20,DRI-DE,SUPL,TEF-ES,Telefonica SA,5 of 27,19,,Direct,
1,2020-03-20,DRI-DE,SUPL,VOD-GB,Vodafone Group Plc,4 of 31,13,,Direct,
2,2020-03-20,DRI-DE,SUPL,SAP-DE,SAP SE,0 of 93,0,,Direct,
3,2020-03-20,DRI-DE,SUPL,SGE-GB,Sage Group plc,0 of 22,0,,Direct,
4,2020-03-20,DRI-DE,SUPL,O2D-DE,Telefonica Deutschland Holding AG,4 of 6,67,,Reverse,
...,...,...,...,...,...,...,...,...,...,...
179995,2020-03-20,ADVI-FR,COMP,,,,,,,
179996,2020-03-20,ADVI-FR,COMP,,,,,,,
179997,2020-03-20,ADVI-FR,COMP,,,,,,,
179998,2020-03-20,ADVI-FR,COMP,,,,,,,


In [41]:
# df.to_pickle('./mlfinance/dataset/company_relationships_1-1000.pkl')

In [11]:
df2 = pd.read_pickle('./mlfinance/dataset/company_relationships_1-1000.pkl')

In [22]:
df.loc[3500:3510,:]

Unnamed: 0,date,ticker,relationship,rel_comp_tic,rel_comp_nm,no_overlap,pct_overlap,revenue_dependence,source,prtr_rel
3500,2020-03-20,AFE-ZA,SUPL,ACT-ZA,,,,,,
3501,2020-03-20,AFE-ZA,SUPL,AfroCentric Investment Corporation Limited,,,,,,
3502,2020-03-20,AFE-ZA,SUPL,0 of 21,,,,,,
3503,2020-03-20,AFE-ZA,SUPL,0,,,,,,
3504,2020-03-20,AFE-ZA,SUPL,,,,,,,
3505,2020-03-20,AFE-ZA,SUPL,Reverse,,,,,,
3506,2020-03-20,AFE-ZA,SUPL,,,,,,,
3507,2020-03-20,AFE-ZA,SUPL,,,,,,,
3508,2020-03-20,AFE-ZA,SUPL,,,,,,,
3509,2020-03-20,AFE-ZA,SUPL,,,,,,,


In [20]:
df.loc[df.rel_comp_tic=='Reverse', :]

Unnamed: 0,date,ticker,relationship,rel_comp_tic,rel_comp_nm,no_overlap,pct_overlap,revenue_dependence,source,prtr_rel
3505,2020-03-20,AFE-ZA,SUPL,Reverse,,,,,,
4505,2020-03-20,7745-JP,SUPL,Reverse,,,,,,
6505,2020-03-20,III-GB,CUST,Reverse,,,,,,
9005,2020-03-20,5391-JP,CUST,Reverse,,,,,,
15505,2020-03-20,MOBIL-NG,COMP,Reverse,,,,,,
17005,2020-03-20,FOUR-GB,COMP,Reverse,,,,,,
19505,2020-03-20,7745-JP,COMP,Reverse,,,,,,
23505,2020-03-20,AAK-SE,SUPL,Reverse,,,,,,
29505,2020-03-20,AHCS-QA,CUST,Reverse,,,,,,
36005,2020-03-20,APE-AU,COMP,Reverse,,,,,,


In [21]:
df3 = df2.loc[:, ['ticker', 'relationship', 'rel_comp_tic']]

NameError: name 'df2' is not defined

In [8]:
df3
# bamboolib live code export
df3 = df3.dropna(subset=['rel_comp_tic'])
df3 = df3.groupby(['ticker', 'relationship']).agg(rel_comp_tic_nunique=('rel_comp_tic', 'nunique')).reset_index()
df3

Unnamed: 0,ticker,relationship,rel_comp_tic
0,DRI-DE,SUPL,TEF-ES
1,DRI-DE,SUPL,VOD-GB
2,DRI-DE,SUPL,SAP-DE
3,DRI-DE,SUPL,SGE-GB
4,DRI-DE,SUPL,O2D-DE
...,...,...,...
3999995,8530-JP,COMP,
3999996,8530-JP,COMP,
3999997,8530-JP,COMP,
3999998,8530-JP,COMP,


TypeError: unsupported operand type(s) for -: 'NoneType' and 'int'

In [42]:
df.to_feather('./mlfinance/dataset/company_relationships_1-1000.feather')

ImportError: Missing optional dependency 'pyarrow'.  Use pip or conda to install pyarrow.

## ----------------- The end of work -------------------

#### Set ranges in each sheet for each company in `tickers`
- e.g. `cell_ranges['AAPL-US'] <- 'D1:J1000'

In [9]:
height = 1000
fcst_data_ranges = {}
for i, name in enumerate(rel_names):
    print(i+1, name)

for j, ticker in enumerate(tickers):
    fcst_data_ranges[ticker] = 'A{:d}:G{:d}'.format(j*height+1, (j+1)*height)
    print(fcst_data_ranges[ticker])
#     print('A{:d}:G{:d}'.format(j*height+1, (j+1)*height))

NameError: name 'rel_names' is not defined

#### `xw.App` launches Excel.
- With `visible` False, it runs in the background

In [None]:
app = xw.App(visible=True)

In [None]:
xw.books

In [None]:
bk

In [None]:
book = app.books.open(os.getcwd() + '/test.xlsx')
sheet = book.sheets['Suppliers']
sheet.range('A1').value = 12

In [None]:
sheets = {}
sheets['customers'] = book.sheets['Customers']
sheets['suppliers'] = book.sheets['Suppliers']
sheets['Partners'] = book.sheets['Partners']

In [None]:
sheets['customers']

In [None]:
import win32com.client as win32
from win32com.client import makepy

In [None]:
path = 'C:/Program Files (x86)/FactSet/fdsxl32_x64.xll'
xl = win32.gencache.EnsureDispatch('Excel.Application')
xl.Visible = True
xl.DisplayAlerts=False
xl.RegisterXLL(path)
# xl.COMAddIns("PowerlinkCOMAddIn.COMAddIn").Connect = True
#xl.COMAddIns('C:/Program Files (x86)/FactSe|t/COMShim_x64.dll').Installed = True
#xl.COMAddIns(4)
for i in range(1, xl.COMAddIns.Count+1):
    print("Index: ", i, xl.COMAddIns(i).Description, xl.COMAddIns(i).Connect)
    if xl.COMAddIns(i).Description == "FactSet Office COM Add-In":
        print("xl.COMAddIns({:d}).Description: {:s}".format(i, xl.COMAddIns(i).Description))

wb = xl.Workbooks.Open(os.getcwd() + '/test.xlsx')
ws = wb.Worksheets('Sheet1')

In [None]:
xl.COMAddIns.Count

In [None]:
import subprocess
import xlwings as xw
retcode = subprocess.call("C:/Program Files (x86)/FactSet/fdswFixExcel.exe")
#print(retcode)
#xl = win32.gencache.EnsureDispatch('Excel.Application')
#xl.Visible = True
#xl.DisplayAlerts = False
print(xw.books.active.name)
#wb = xl.Workbooks.Open(os.getcwd() + '/test.xlsx')
#ws = wb.Worksheets('Sheet1')

In [None]:
import xlwings

In [None]:
xl2 = win32.gencache.EnsureDispatch("Excel.Application")
xl2.Visible = True
wb2 = xl.Workbooks.Add(1)
xl2.AddIns("FactSet Office COM Add-In").Installed = False
xl2.AddIns("FactSet Office COM Add-In").Installed = True

In [None]:
xl.run('FDS', 'a', 'b', 'c', 'd')