In [1]:
from pathlib import Path
import requests
filename = Path('test.pdf')
url = 'https://app.viac.ch/files/document/21V-L0F-58I/content/21V-L0F-58I.pdf'
response = requests.get(url)
filename.write_bytes(response.content)

0

In [2]:
import json
import csv
import os
import re
from datetime import datetime, timedelta
import pymupdf  # PyMuPDF
 


# Define a function to extract shares and exchange rate from a PDF file
def extract_shares_and_exchange_rate(document_number):
    shares = ''
    exchange_rate = ''
    isin = ''
    

    # Find the PDF file that contains the document number as a substring
    pdf_folder = 'pdfs'
    pdf_files = [f for f in os.listdir(pdf_folder) if document_number in f]

    if not pdf_files:
        raise FileNotFoundError(f"No PDF file found containing document number {document_number}")

    pdf_path = os.path.join(pdf_folder, pdf_files[0])

    # Open the PDF file
    pdf_document = pymupdf.open(pdf_path)

    # Iterate through each page
    for page_num in range(len(pdf_document)):
        page = pdf_document.load_page(page_num)
        text = page.get_text()

        # Search for shares value
        shares_match = re.search(r'(?:Kauf|Buy|Verkauf|Sell)\n(\d+\.\d+)', text)
        if shares_match:
            shares = shares_match.group(1)
        if len(shares)<2:
            print(f"Error in transaction {document_number}. File exists but could not find number of shares!")
        

        currency = ""
        # Search for exchange rate value
        exchange_rate_match = re.search(r'(?:Exchange rate|Umrechnungskurs) [A-Z]{3}/[A-Z]{3} (\d+\.\d+)\n', text)
        if exchange_rate_match:
            exchange_rate = exchange_rate_match.group(1)
            exchange_rate = "{:3.8f}".format(1/float(exchange_rate)) # take the inverse
            
            currency_match = re.search(r'(?:Exchange rate|Umrechnungskurs) [A-Z]{3}/([A-Z]{3})\s*\d+\.\d+', text, re.DOTALL)
            if currency_match:
                currency = currency_match.group(1)
            
            
            
        # Search for ISIN
        isin_match = re.search(r'ISIN.{0,8}([A-Z0-9]{12})', text, re.DOTALL)
        if isin_match:
            isin = isin_match.group(1)
        

    pdf_document.close()

    
    return shares, exchange_rate, isin, currency

In [10]:
import csv
from datetime import datetime

last_ex_rate = {}
last_curr = {}

def process_transactions(account_id, transactions, securities):
    # Prepare the CSV file names
    portfolio_csv_file_name = f"{account_id}_PortfolioTransaction.csv"
    account_csv_file_name = f"{account_id}_AccountTransaction.csv"
    
    # Define fieldnames for each CSV file
    fieldnames = ['Date', 'Type', 'Value', 'Security Name', 'Transaction Currency', 'Shares', 'Exchange Rate', 'Note']
    
    # Open the CSV files for writing
    with open(portfolio_csv_file_name, 'w', newline='') as portfolio_csvfile, open(account_csv_file_name, 'w', newline='') as account_csvfile:
        portfolio_writer = csv.DictWriter(portfolio_csvfile, fieldnames=fieldnames)
        account_writer = csv.DictWriter(account_csvfile, fieldnames=fieldnames)
        
        # Write the headers
        portfolio_writer.writeheader()
        account_writer.writeheader()
        
        # Track dividend cancellations to ignore corresponding dividends
        dividend_cancellations = []
        
        # First pass: build the list of dividend cancellations
        for transaction in transactions:
            if transaction['type'] == 'DIVIDEND_CANCELLATION':
                dividend_cancellations.append(transaction)
        
        # Second pass: process transactions and write to the appropriate CSV
        for transaction in transactions[::-1]:
            if transaction['type'] == 'DIVIDEND_CANCELLATION':
                continue
            
            if transaction['type'] == 'DIVIDEND':
                # Check if there is a matching DIVIDEND_CANCELLATION within 30 days
                cancel = False
                for cancel_transaction in dividend_cancellations:
                    cancel_date = datetime.strptime(cancel_transaction['valueDate'], '%Y-%m-%d')
                    transaction_date = datetime.strptime(transaction['valueDate'], '%Y-%m-%d')
                    if (cancel_transaction['amountInChf'] == transaction['amountInChf'] and
                        abs((transaction_date - cancel_date).days) <= 30):
                        cancel = True
                        break
                if cancel:
                    continue
            
            row = {
                'Date': transaction['valueDate'],
                'Type': '',
                'Value': round(abs(transaction['amountInChf']), 8),
                'Security Name': '',
                'Transaction Currency': 'CHF',
                'Shares': '',
                'Exchange Rate': '',
                'Note': ''
            }
            
            if transaction['type'] == 'CONTRIBUTION':
                row['Type'] = 'Deposit'
            elif transaction['type'] == 'DIVIDEND':
                row['Type'] = 'Dividend'
                row['Security Name'] = transaction.get('description', '')
                if last_ex_rate[row['Security Name']] != '': # foreign currency divident, not working, make it interest
                    row['Type'] = 'Interest'
                    row['Note'] = 'Dividend from "{}" original currency: {}, est. exchange rate: {}, CHF amount {}'.format(row['Security Name'], 
                                                                                                                           last_curr[row['Security Name']],
                                                                                                                           last_ex_rate[row['Security Name']], 
                                                                                                                           row['Value'])
                    row['Security Name'] = ''
                    # row['Gross Amount'] = row['Value']
                    # row['Gross Amount'] = row['Value']*float(row['Exchange Rate'])
                    # row['Value'] = row['Value']*float(row['Exchange Rate'])
                    # row['Value'] = row['Value']
                    # row['Transaction Currency'] = last_curr[row['Security Name']]
                    # row['Transaction Currency'] = 'CHF'
                    # row['Currency Gross Amount'] = last_curr[row['Security Name']]
                
            elif transaction['type'] in ['TRADE_SELL', 'TRADE_BUY']:
                row['Security Name'] = transaction.get('description', '')
                try:
                    shares, exchange_rate, isin, currency = extract_shares_and_exchange_rate(transaction['documentNumber'])
                    if isin not in securities:
                        securities[isin] = (row['Security Name'], currency)
                    row['Shares'] = shares
                    row['Exchange Rate'] = exchange_rate
                    last_ex_rate[row['Security Name']] = exchange_rate
                    last_curr[row['Security Name']] = currency
                except FileNotFoundError as e:
                    print(f"Warning: PDF not found for transaction:")
                    print(f"{transaction['type'].split('_')[1]} {transaction['valueDate']} {transaction.get('description', '')} of CHF {transaction['amountInChf']}")
                if transaction['type'] == 'TRADE_SELL':
                    row['Type'] = 'Sell'
                else: # transaction['type'] == 'TRADE_BUY':
                    row['Type'] = 'Buy'
                
            elif transaction['type'] == 'INTEREST':
                row['Type'] = 'Interest'
            elif transaction['type'] == 'FEE_CHARGE':
                row['Type'] = 'Fees'
            elif transaction['type'] != 'DIVIDEND_CANCELLATION':
                print("Warning: Unknown Transaction type {}. Transaction ignored".format(transaction['type']))
                
            
            # Write to the appropriate CSV file
            if transaction['type'] in ['TRADE_SELL', 'TRADE_BUY']:
                portfolio_writer.writerow(row)
            else:
                account_writer.writerow(row)

# Load the JSON data from the file
with open('transactions.json', 'r') as file:
    transactions = json.load(file)
                
# Process each account in the JSON data
securities = {}
for account_id, transactions in transactions['transactions'].items():
    process_transactions(account_id, transactions, securities)
    
with open("securities.csv", mode='w', newline='') as file:
    writer = csv.writer(file)
    # Write the header
    writer.writerow(["ISIN", "Security Name", "Currency"])
    # Write the data
    for key, value in securities.items():
        writer.writerow([key, value[0], value[1]])

print("CSV files have been generated for each account.")

CSV files have been generated for each account.


In [None]:
import os
import re
import pymupdf  # PyMuPDF

def remove_duplicates_and_concatenate_pdfs(folder_path):
    # Get list of all files in the folder
    files = os.listdir(folder_path)
    
    # Filter out only PDF files
    pdf_files = [f for f in files if f.endswith('.pdf')]
    
    # Dictionary to keep track of unique files
    unique_files = {}
    
    # Regular expression to match duplicate files
    duplicate_pattern = re.compile(r'\((\d+)\)\.pdf$')
    
    for pdf in pdf_files:
        # Remove the duplicate pattern from the filename
        base_name = re.sub(duplicate_pattern, '.pdf', pdf)
        
        # If the base name is not in the dictionary, add it
        if base_name not in unique_files:
            unique_files[base_name] = pdf
    
    # List to store the paths of unique PDF files
    unique_pdf_paths = [os.path.join(folder_path, unique_files[base_name]) for base_name in unique_files]

    
    # Create a new PDF document to concatenate all PDFs
    output_pdf = pymupdf.open()
    
    for pdf_path in unique_pdf_paths:
        # Open each PDF file
        input_pdf = pymupdf.open(pdf_path)
        
        # Append each page to the output PDF
        for page_num in range(len(input_pdf)):
            output_pdf.insert_pdf(input_pdf, from_page=page_num, to_page=page_num)
    
    # Save the concatenated PDF
    output_pdf.save(os.path.join(folder_path, 'concatenated_output.pdf'))
    
    print(f"Concatenated PDF saved as 'concatenated_output3.pdf' in {folder_path}")
    return unique_pdf_paths

# Folder path containing the PDF files
folder_path = r"C:\Users\Manuel Meier\Downloads\viac1transactions"

# Call the function to remove duplicates and concatenate PDFs
remove_duplicates_and_concatenate_pdfs(folder_path)

In [5]:
# Regular expression to match duplicate files
duplicate_pattern = re.compile(r'\((\d+)\)\.pdf$')

base_name = re.sub(duplicate_pattern, '.pdf', 'C:\\Users\\Manuel Meier\\Downloads\\viac1transactions\\21V-KCE-D9V(2).pdf')
print(base_name)

C:\Users\Manuel Meier\Downloads\viac1transactions\21V-KCE-D9V.pdf


In [8]:
# modify securities


import csv
import xml.etree.ElementTree as ET

# Read securities.csv and store the data in a dictionary
securities_csv = 'securities.csv'
securities_data = {}

with open(securities_csv, mode='r') as file:
    csv_reader = csv.DictReader(file)
    for row in csv_reader:
        isin = row['ISIN']
        name = row['Security Name']
        currency = row['Currency']
        securities_data[isin] = {'name': name, 'currency': currency}

# Parse test_portfolio.xml
portfolio_xml = 'test_portfolio.xml'
portfolio_tree = ET.parse(portfolio_xml)
portfolio_root = portfolio_tree.getroot()

# Function to find security by ISIN in the XML tree
def find_security_by_isin(root, isin):
    for security in root.findall('.//security'):
        if security.find('isin') is not None and security.find('isin').text == isin:
            return security
    return None

# Check each security from the CSV in the portfolio XML
for isin, data in securities_data.items():
    name_csv = data['name']
    currency_csv = data['currency']
    
    security_in_portfolio = find_security_by_isin(portfolio_root, isin)
    
    if security_in_portfolio is not None:
        name_portfolio = security_in_portfolio.find('name').text
        if name_csv == name_portfolio:
            print(f"{isin} {name_csv} ok")
        else:
            print(f"\033[1;31mWarning: {isin} is already in the portfolio but name is {name_portfolio} but viac calls it {name_csv}. This may require you to select the security manually when importing transactions.\033[0m")
    else:
        # If not found in portfolio, check in pp_all_securities.xml
        all_securities_xml = 'data/pp_all_viac_securities.xml'
        all_securities_tree = ET.parse(all_securities_xml)
        all_securities_root = all_securities_tree.getroot()
        
        security_in_all_securities = find_security_by_isin(all_securities_root, isin)
        
        if security_in_all_securities is not None:
            # Copy the security to the portfolio XML
            portfolio_root.find('.//securities').append(security_in_all_securities)
            print(f"added {isin} {name_csv} to portfolio")
            
            # Save the updated portfolio XML
            portfolio_tree.write(portfolio_xml)
        else:
            print(f"\033[1;31mError: {isin} {name_csv} {currency_csv} is not in our database, please add it manually to the securities before adding transactions. You may also send this info to us through github so that we can add it to the list.\033[0m")

CH0033782431 UBS SMI ok
IE00B5BMR087 iShares Core S&P500 ok
IE00B1FZSF77 iShares US Property Yield ok
LU0439730705 UBS Global Quality Dividend ok
CH0036599816 UBS Real Estate CH ok
CH0032912732 UBS ETF SLI ok
CH0032044684 CSIF Europe ex CH Real Estate ok
CH0037606552 UBS Europe ex CH (old) ok
CH0032044791 CSIF Asia Real Estate ok
CH0214967314 CSIF World ex CH Small Cap - Pension Fund ok
CH0030849647 CSIF Japan ok
CH0030849654 UBS Pacific ex Japan (old) ok
CH0032400639 CSIF World ex CH - Pension Fund ok
CH0217837456 CSIF Real Estate World ex CH - Pension Fund (old) ok
CH0017844686 UBS Emerging Markets (old) ok
CH0030849613 UBS Canada ok
CH0030849712 CSIF US - Pension Fund ok
CH0357515474 UBS Japan - Pension Fund ok
CH0429081620 CSIF World ex CH - Pension Fund Plus ok
CH0209106761 CSIF Gold ok
CH0214968353 CSIF World ex CH Small Cap hedged - Pension Fund ok
CH0429081638 CSIF World ex CH hedged - Pension Fund Plus ok
CH0597394516 UBS SPI ESG ok
IE00B53SZB19 iShares Nasdaq 100 ok
CH0215804