# Stock capital gain calculation
## First in First out law (FIFO)

### - Import libraries

In [2]:
import pandas as pd
import numpy as np

### - read excel files

In [11]:
df = pd.read_excel("tax_2022.xlsx", sheet_name="Sheet3")
df['Code'] = df['SYMBOL']
df['Date'] = df['DATE (US)']
df['Type'] = df['SIDE']
df['Quantity'] = df['UNIT']
df['Total Value ($)'] = df['TOTAL']

df.drop(columns = ['SYMBOL', 'SIDE', 'DATE (US)', 'UNITS', 'UNIT', 'LOCAL CURRENCY VALUE', 'TOTAL'], inplace=True)
df['Unit Value ($)'] = df['Total Value ($)'] / df['Quantity']
                  
df.head()

Unnamed: 0,Code,Date,Type,Quantity,Total Value ($),Unit Value ($)
0,AAPL,2021-11-30,B,4,917.97408,229.49352
1,AAPL,2021-12-21,B,6,1433.0484,238.8414
2,AAPL,2021-12-21,B,6,1432.28555,238.714258
3,AAPL,2022-01-07,B,10,2397.44848,239.744848
4,AAPL,2022-07-11,S,-16,-3423.129282,213.94558


## Concatenate 2020-2022 Data 

In [12]:
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True) # Date : String to datetime format
df.sort_values(['Code', 'Date'], ascending=True, inplace=True) # Sorting multiple names [Code, Date]
df.reset_index(drop=True, inplace=True) # Reset index from 0
df.head()

Unnamed: 0,Code,Date,Type,Quantity,Total Value ($),Unit Value ($)
0,AAPL,2021-11-30,B,4,917.97408,229.49352
1,AAPL,2021-12-21,B,6,1433.0484,238.8414
2,AAPL,2021-12-21,B,6,1432.28555,238.714258
3,AAPL,2022-01-07,B,10,2397.44848,239.744848
4,AAPL,2022-07-11,S,-16,-3423.129282,213.94558


## Capital Gain Calculator: FIFO

In [14]:
import logging
from collections import deque
import math

class Trans:
    datetime=None
    amount=None
    price=None
 
    def __init__(self, datetime, amount, price):
        self.datetime=datetime
        self.amount=amount
        self.price=price
     
    def getInfo(self):
        return(str(self.datetime)+"; "+
                str(self.amount)+"; "+
                str(self.price))+"; "
 
def balanceFifo(all_trans):
 
    qTransactions = deque() 
    trans_result = list()
 
    for t in all_trans:
        #Add first element to the queue
        if len(qTransactions)==0:
            #logging.debug('Added the first element: %s',t.getInfo())
            qTransactions.append(t)
            continue
 
        while (t.amount!=0 and len(qTransactions)>0):
            #investigate the first element from the queue
            tq=qTransactions.popleft()
            #the same type of transaction: both sell or both buy
            if tq.amount*t.amount>0:
                #return the first element back to the same place
                qTransactions.appendleft(tq)
                #add the new element to the list
                qTransactions.append(t)
                #logging.debug('Added: %s',t.getInfo())
                break
             
            #contrary transactions: (sell and buy) or (buy and sell) 
            if tq.amount*t.amount<0:
                #logging.debug('Transaction : %s',t.getInfo())
                #logging.debug('... try to balance with: %s',tq.getInfo())
 
                #The element in the queue have more units and takes in the current transaction
                if abs(tq.amount)>abs(t.amount):
                    result = insertTransaction(tq.datetime,t.datetime,\
                            math.copysign(t.amount,tq.amount), tq.price,t.price)
                    trans_result.append(result)
                    
                    #update the amount of the element in the queue
                    tq.amount=tq.amount+t.amount
                    #return the element back to the same place
                    qTransactions.appendleft(tq)
                    #logging.debug('Removed transaction: %s',t.getInfo())
                    #the transaction has been balanced, take a new transaction
                    break
                 
                #The element from the queue and transaction have the same amount of units
                if abs(tq.amount)==abs(t.amount):
                    result = insertTransaction(tq.datetime,t.datetime,\
                                math.copysign(t.amount,tq.amount), tq.price,t.price)
                    trans_result.append(result)
                    
                    #update the amount in the transaction 
                    t.amount=0
                    #logging.debug('Balanced, removed transaction: %s',t.getInfo())
                    #logging.debug('Balanced, removed from the queue: %s',tq.getInfo())
                    #the transaction has been balanced, take a new transaction
                    continue
                    
                #The transaction has more units
                if abs(tq.amount)<abs(t.amount):
                    #update the units in transaction, (remove element from the queue)
                    t.amount=t.amount+tq.amount
                    result = insertTransaction(tq.datetime,t.datetime,tq.amount,tq.price,t.price)
                    trans_result.append(result)
                    #logging.debug('Removed from queue: %s',tq.getInfo())
                     
                    #the transaction has not been balanced, 
                    #take a new element from the queue (t.amount>0)
                    continue
                 
        #We have unbalanced transaction but the queue is empty            
        if (t.amount!=0 and len(qTransactions)==0):
            #Add unbalanced transaction to the queue
            #The queue changes polarisation
            qTransactions.append(t)
            #logging.debug('Left element: %s',t.getInfo())
     
     
    #If something remained in the queue, treat it as open or part-open transactions
    while (len(qTransactions)>0):
        tq=qTransactions.popleft()
        #logging.debug('Remained on list transaction: %s',tq.getInfo())
        
    return trans_result
 
def insertTransaction(dateStart,dateEnd,amount,priceStart,priceEnd):
    #print("Bought={}, sold={},  amount={}, buy price={}, sell_price={}, gain={}".\
    #        format(dateStart,dateEnd,amount,priceStart,priceEnd, amount*(priceEnd-priceStart)))
    result = [dateStart,dateEnd,amount,priceStart,priceEnd, amount*(priceEnd-priceStart)]
    return result

## Divide the dataframe by Stock Code

In [15]:
code = df['Code'].value_counts()
print("size: ", code.size, "index: ", code.index, "value: ", code.values)

size:  9 index:  Index(['SQQQ', 'TQQQ', 'AAPL', 'QYLD', 'CVX', 'OXY', 'DBA', 'JNJ', 'SNOW'], dtype='object') value:  [24 15  7  6  5  5  2  2  2]


In [16]:
code.index[0]

'SQQQ'

In [23]:
sum_df = []

for j in range(0, code.size):
    # Seperate dataframe by Code name
    mask_code = df['Code'] == code.index[j]
    df_mask = df[mask_code]
    df_mask.reset_index(drop=True, inplace=True)
    print("All Transactions for", code.index[j])
    print(df_mask)
    
    # Save dataframe to an excel file
    #with pd.ExcelWriter('tax_return_2022.xlsx') as writer:
    #    df_mask.to_excel(writer, sheet_name = str(code.index[j]))
    
    # Capital gain caluculation FIFO
    trans_list=list()
    for i in range(0, df_mask.shape[0]):
        trans = Trans(df_mask['Date'][i].date(), df_mask['Quantity'][i], df_mask['Unit Value ($)'][i])
        trans_list.append(trans)
    trans_result = balanceFifo(trans_list)
    df_capital = pd.DataFrame(trans_result, columns = ['Date Purchased', 'Date Sold', 'Quantity', 'Buy Price', 'Sell Price', 'Capital Gain'])

    # Filtering sold date for 2021-2022 Capital Gain
    mask_time = (df_capital['Date Sold'] > pd.to_datetime("2021-06-30")) & (df_capital['Date Sold'] < pd.to_datetime("2022-07-01"))
    df_filtered = df_capital[mask_time]
    #print("All Time Capital gain for", code.index[j])
    #print(df_capital)
    print("Financial Year Capital gain for", code.index[j])
    print(df_filtered)
    
    # Summation of capita gains (Total: any loses, discounts not included, Net: including loses or discounts)
    sum_total = np.sum(x for x in df_filtered['Capital Gain'] if x > 0)
    sum_net = np.sum(x for x in df_filtered['Capital Gain'])
    #For net capital gain, discounts for 'over 1 year stocks' should be applied
    diff = df_filtered['Date Sold'] - df_filtered['Date Purchased'] # How many days holding stocks
    discount = 0 # Amount of discount
    
    # If you hold stocks more than a year, you get half of discount on capital gain for taxation
    for y in range(0, diff.size):
        if (int(diff.values[y]/8.64e+13) > 365): 
            discount = discount + df_filtered['Capital Gain'][y] / 2
            sum_net = sum_net - discount
        else: 
            sum_net = sum_net
        
    print("Total capital gain for", code.index[j], ": ", round(sum_total, 2), "AUD")
    print("Net capital gain for", code.index[j], ": ", round(sum_net, 2), "AUD")
    
    # Append data into Summary dataframe
    a = [code.index[j], sum_total, sum_net]
    sum_df.append(a)
    

All Transactions for SQQQ
    Code       Date Type  Quantity  Total Value ($)  Unit Value ($)
0   SQQQ 2022-07-19    B         2       143.301744       71.650872
1   SQQQ 2022-07-20    B         2       138.921120       69.460560
2   SQQQ 2022-07-20    B         2       138.950080       69.475040
3   SQQQ 2022-07-20    S        -6      -412.607600       68.767933
4   SQQQ 2022-07-21    B         5       332.670416       66.534083
5   SQQQ 2022-07-21    S        -5      -325.679474       65.135895
6   SQQQ 2022-07-21    B         5       335.000730       67.000146
7   SQQQ 2022-07-21    B         5       337.244200       67.448840
8   SQQQ 2022-07-21    B         5       336.520500       67.304100
9   SQQQ 2022-07-22    B         5       314.960184       62.992037
10  SQQQ 2022-07-22    S       -20     -1280.010600       64.000530
11  SQQQ 2022-08-01    B        20      1171.066400       58.553320
12  SQQQ 2022-08-01    B        20      1141.328236       57.066412
13  SQQQ 2022-08-03   

  result = libops.scalar_compare(x.ravel(), y, op)
  sum_total = np.sum(x for x in df_filtered['Capital Gain'] if x > 0)
  sum_net = np.sum(x for x in df_filtered['Capital Gain'])
  result = libops.scalar_compare(x.ravel(), y, op)
  sum_total = np.sum(x for x in df_filtered['Capital Gain'] if x > 0)
  sum_net = np.sum(x for x in df_filtered['Capital Gain'])
  result = libops.scalar_compare(x.ravel(), y, op)
  sum_total = np.sum(x for x in df_filtered['Capital Gain'] if x > 0)
  sum_net = np.sum(x for x in df_filtered['Capital Gain'])
  result = libops.scalar_compare(x.ravel(), y, op)
  sum_total = np.sum(x for x in df_filtered['Capital Gain'] if x > 0)
  sum_net = np.sum(x for x in df_filtered['Capital Gain'])
  result = libops.scalar_compare(x.ravel(), y, op)
  sum_total = np.sum(x for x in df_filtered['Capital Gain'] if x > 0)
  sum_net = np.sum(x for x in df_filtered['Capital Gain'])
  result = libops.scalar_compare(x.ravel(), y, op)
  sum_total = np.sum(x for x in df_filtered['Capi

In [18]:
# Print summary dataframe for total and net capital gain
summary = pd.DataFrame(sum_df, columns = ['Code', 'Total gain', 'Net gain'])
print(summary)
print("Total Capital Gain: ", summary['Total gain'].sum().round(), "AUD")
print("Net Capital Gain: ", summary['Net gain'].sum().round(), "AUD")

   Code  Total gain    Net gain
0  SQQQ    0.000000    0.000000
1  TQQQ    0.000000    0.000000
2  AAPL    0.000000    0.000000
3  QYLD   24.289431   20.598511
4   CVX  330.440798  330.440798
5   OXY    0.000000    0.000000
6   DBA    0.833036    0.833036
7   JNJ    0.000000    0.000000
8  SNOW    0.000000    0.000000
Total Capital Gain:  356.0 AUD
Net Capital Gain:  352.0 AUD


In [26]:
summary.to_excel('tax_return_2022_US.xlsx', sheet_name = 'US_CG')

In [19]:
df_d = pd.read_excel("Stake_transaction_summary_report.xlsx", sheet_name="Dividends")
df_d.head()

Unnamed: 0,DATE (US),REFERENCE,SYMBOL,DIV AMOUNT,DIV VALUE (USD),TAX WITHHELD (USD),TAX RATE,NET DIV RECEIVED (USD),FX RATE,DIV VALUE LOCAL CURRENCY,TAX WITHHELD LOCAL CURRENCY,NET DIV RECEIVED LOCAL CURRENCY
0,2021-10-26,QYLD000291-58430,QYLD,0.1966,6.09,-0.91,0.15,5.18,1.3307,8.103963,-1.210937,6.893026
1,2021-12-01,QYLD000326-26920,QYLD,0.2246,6.96,-1.04,0.15,5.92,1.3949,9.708504,-1.450696,8.257808
2,2021-12-10,CVX000321-37868,CVX,1.34,34.84,-5.23,0.15,29.61,1.3984,48.720256,-7.313632,41.406624
3,2022-02-14,AAPL00035-54740,AAPL,0.22,5.72,-0.86,0.15,4.86,1.3937,7.971964,-1.198582,6.773382
4,2022-05-13,AAPL000126-84487,AAPL,0.23,5.98,-0.9,0.15,5.08,1.4537,8.693126,-1.30833,7.384796


In [21]:
mask = (pd.to_datetime(df_d['DATE (US)']) > pd.to_datetime("2021-06-30")) & (pd.to_datetime(df_d['DATE (US)']) < pd.to_datetime("2022-07-01"))
print("2021-2022 US stock dividend (GROSS AUD): ", df_d[mask]['DIV VALUE LOCAL CURRENCY'].sum().round())
print("2021-2022 US stock dividend (TAX AUD): ", df_d[mask]['TAX WITHHELD LOCAL CURRENCY'].sum().round())
print("2021-2022 US stock dividend (NET AUD): ", df_d[mask]['NET DIV RECEIVED LOCAL CURRENCY'].sum().round())

2021-2022 US stock dividend (GROSS AUD):  84.0
2021-2022 US stock dividend (TAX AUD):  -13.0
2021-2022 US stock dividend (NET AUD):  71.0
