#### Description

In [1]:
print('Author:  Leo Pauly (cnlp@leeds.ac.uk) & Nick Wilson (n.wilson@lubs.leeds.ac.uk)')
print('Description: Autmatic database update')

Author:  Leo Pauly (cnlp@leeds.ac.uk) & Nick Wilson (n.wilson@lubs.leeds.ac.uk)
Description: Autmatic database update


#### Usage intructions & Other info

1. Give absolute file paths always
2. Assumed that new entry directory names always start with UKLTD_R_ or UKLTD_W_
3. dir_list.txt file contains the processed directory list
4. Prefefably install python using anacodna (all in one installation): https://www.anaconda.com/products/individual#windows
5. Run this if 'pyreadstat module is missing':  !pip install pyreadstat 
6. Need only change base directory in most of the case
7. Databases are stored in the directory: UKLTD_Database 

In [4]:
!pip install pyreadstat


Collecting pyreadstat
  Downloading pyreadstat-1.0.8-cp37-cp37m-macosx_10_9_x86_64.whl (551 kB)
[K     |████████████████████████████████| 551 kB 10.4 MB/s eta 0:00:01
Installing collected packages: pyreadstat
Successfully installed pyreadstat-1.0.8


#### Imports

In [5]:
import os
import sys
print('python version',sys.version)
import pandas as pd
import numpy as np
import pyreadstat
AC01_header=['AC01', 'REGNUM', 'ACCDAT_start', 'ACCDAT', 'number_of_weeks', 'months',
       'currency', 'consolidated', 'acctype', 'Turnover', 'Export',
       'Cost_of_Sales', 'Gross_Profit', 'Wages_Salaries',
       'Directors_Emoluments', 'Operating_Profits', 'Depreciation',
       'Audit_Fees', 'Interest_Payments', 'Pre_Tax_Profit', 'taxation1',
       'Profit_After_Tax', 'Dividends_Payable', 'Retained_Profits',
       'Tangible_Assets', 'Intangible_Assets', 'Total_Fixed_Assets',
       'Total_Current_Assets', 'Trade_Debtors', 'Stock', 'Cash',
       'Other_Current_Assets', 'Increase_In_Cash', 'Mis_Current_Assets',
       'Total_Assets', 'Total_Current_Liabilities', 'Trade_Creditors',
       'Bank_Overdraft', 'Other_Short_Term_Fin', 'Mis_Current_Liabilities',
       'Other_Long_Term_Fin', 'Total_Long_Term_Liabilities',
       'Bank_Overdraft_LTL', 'Total_Liabilities', 'Net_Assets',
       'Working_Capital', 'Paid_up_equity', 'PL_Account_Weserve',
       'Sundry_Weserves', 'Revaluation_Weserve', 'Shareholder_Funds',
       'NetWorth', 'NetCashflowfromOperations', 'NetCashflowbeforeFinancing',
       'NetCashflowfromFinancing', 'Contingent_Liability', 'Capital_Employed',
       'No_Employees', 'status', 'UPLOAD']

python version 3.7.6 (default, Jan  8 2020, 13:42:34) 
[Clang 4.0.1 (tags/RELEASE_401/final)]


#### Checking new entries

In [10]:
## Checking for new download
with open(r'/Volumes/Pegasus32 R6/CreditSafe 2019 Zipped/UKLTD_Scripts/dir_list.txt', 'a+') as fd:
    fd.seek(0)
    dir_list_old=fd.read().split('\n')
   
print('List of processed directores:',*dir_list_old,sep='\n')
dir_list_new=[dir_name for dir_name in os.listdir('/Volumes/Pegasus32 R6/CreditSafe 2019 Zipped') if (dir_name.startswith("UKLTD_W") or dir_name.startswith("UKLTD_R"))]
#print (*dir_list_new,sep='\n')
entry_dir_list=[entry_dir for entry_dir in dir_list_new if entry_dir not in dir_list_old ]
print('\nNew entries detected:',*entry_dir_list,sep='\n')

List of processed directores:


New entries detected:
UKLTD_R_20190607
UKLTD_W_20190602
UKLTD_W_20190609
UKLTD_W_20190616


#### Creating/Loading database 

In [12]:
#!pip install pyreadstat #Run this if 'pyreadstat module is missing'
database_file=(r'/Volumes/Pegasus32 R6/CreditSafe 2019 Zipped/UKLTD_Database/AC01.sav')
if (os.path.isfile(database_file)):
    print('Database found.','Reading database: \n')
    df_database=pd.read_spss(database_file)
else:
    print('Database not found.','Creating database: \n')
    df_database=pd.DataFrame(columns=AC01_header)
    
df_database

Database not found. Creating database: 



Unnamed: 0,AC01,REGNUM,ACCDAT_start,ACCDAT,number_of_weeks,months,currency,consolidated,acctype,Turnover,...,Shareholder_Funds,NetWorth,NetCashflowfromOperations,NetCashflowbeforeFinancing,NetCashflowfromFinancing,Contingent_Liability,Capital_Employed,No_Employees,status,UPLOAD


In [13]:
print('Number of variables:',len(df_database.columns),'\nVariables names:\n', df_database.columns)

Number of variables: 60 
Variables names:
 Index(['AC01', 'REGNUM', 'ACCDAT_start', 'ACCDAT', 'number_of_weeks', 'months',
       'currency', 'consolidated', 'acctype', 'Turnover', 'Export',
       'Cost_of_Sales', 'Gross_Profit', 'Wages_Salaries',
       'Directors_Emoluments', 'Operating_Profits', 'Depreciation',
       'Audit_Fees', 'Interest_Payments', 'Pre_Tax_Profit', 'taxation1',
       'Profit_After_Tax', 'Dividends_Payable', 'Retained_Profits',
       'Tangible_Assets', 'Intangible_Assets', 'Total_Fixed_Assets',
       'Total_Current_Assets', 'Trade_Debtors', 'Stock', 'Cash',
       'Other_Current_Assets', 'Increase_In_Cash', 'Mis_Current_Assets',
       'Total_Assets', 'Total_Current_Liabilities', 'Trade_Creditors',
       'Bank_Overdraft', 'Other_Short_Term_Fin', 'Mis_Current_Liabilities',
       'Other_Long_Term_Fin', 'Total_Long_Term_Liabilities',
       'Bank_Overdraft_LTL', 'Total_Liabilities', 'Net_Assets',
       'Working_Capital', 'Paid_up_equity', 'PL_Account_Weserve

#### Updating database

Rules:

1. Check for fields:REG
2. If they exist update ?? 
3. If not: add as new row in database

In [19]:
def update_database(entry_dir):
    global df_database
    entry_file="AC01_"+(entry_dir.split('_')[-2])+"_"+(entry_dir.split('_')[-1])+".txt"
    print('Entry file:',entry_file)
    df_entry_file=pd.read_csv(r'/Volumes/Pegasus32 R6/CreditSafe 2019 Zipped/%s/'%entry_dir+entry_file,sep='|',names=AC01_header)
    df_entry_file['UPLOAD']=entry_file.split('.')[0]
    for idx,entry_row in df_entry_file.iterrows():
        database_row=df_database.loc[df_database['REGNUM']==entry_row['REGNUM']]
        if(entry_row['REGNUM'] not in df_database['REGNUM'].unique()):
            df_database=df_database.append(entry_row,ignore_index=True)
        elif(entry_row.equals(other=database_row)):
            df_database=df_database.append(entry_row,ignore_index=True)
    return True

In [None]:
## Reading from new directory
for entry_dir in entry_dir_list:
    print('\nReading from entry dir:',entry_dir)
    print('Update Success:',update_database(entry_dir))


Reading from entry dir: UKLTD_R_20190607
Entry file: AC01_R_20190607.txt


In [None]:
df_database


In [None]:
## Writing updated databse to file
df_database=pyreadstat.write_sav(df_database,database_file)

In [None]:
## Update processed directory list
with open(r'/Volumes/Pegasus32 R6/CreditSafe 2019 Zipped/UKLTD_Scripts/dir_list.txt', 'w') as fd:
    fd.write('\n'.join(dir_list_old+entry_dir_list))