In [1]:
import pandas as pd
from datetime import date, timedelta
from sqlalchemy import create_engine

engine = create_engine('mysql+pymysql://root:@localhost:3306/portfolio_development')
conpf = engine.connect()

engine = create_engine("sqlite:///c:\\ruby\\portlt\\db\\development.sqlite3")
conlt = engine.connect()

engine = create_engine("mysql+pymysql://root:@localhost:3306/stock")
const = engine.connect()

In [2]:
sql = '''
SELECT *
FROM dividends
LIMIT 1'''
struct = pd.read_sql(sql, conpf)
struct.dtypes

id            int64
stock_id      int64
name         object
year          int64
quarter       int64
number        int64
ppu         float64
amt         float64
net         float64
x_date       object
p_date       object
dtype: object

In [3]:
sql = '''
SELECT YEAR(p_date) AS p_year, MONTH(p_date) AS p_month, name, year, quarter, 
number, ppu, amt, net, x_date, p_date
FROM dividends
'''
df = pd.read_sql(sql, conpf)
df.dtypes

p_year       int64
p_month      int64
name        object
year         int64
quarter      int64
number       int64
ppu        float64
amt        float64
net        float64
x_date      object
p_date      object
dtype: object

In [4]:
df['x_date'] = pd.to_datetime(df['x_date'])
df['p_date'] = pd.to_datetime(df['p_date'])
df.dtypes

p_year              int64
p_month             int64
name               object
year                int64
quarter             int64
number              int64
ppu               float64
amt               float64
net               float64
x_date     datetime64[ns]
p_date     datetime64[ns]
dtype: object

In [5]:
df_groupby_year = df.groupby('p_year')
type(df_groupby_year)

pandas.core.groupby.generic.DataFrameGroupBy

In [6]:
df_groupby_year.ngroups

7

In [7]:
df_groupby_year.size()

p_year
2017     30
2018     48
2019     51
2020     54
2021    106
2022     71
2023     54
dtype: int64

In [8]:
df_cur_yr = df_groupby_year.get_group(2023)
df_cur_yr.columns

Index(['p_year', 'p_month', 'name', 'year', 'quarter', 'number', 'ppu', 'amt',
       'net', 'x_date', 'p_date'],
      dtype='object')

In [9]:
output = df_cur_yr.groupby('p_month').net.sum()
output

p_month
3     58267.70
4     28512.00
5    168108.72
6     70133.00
8      1350.00
9    108470.20
Name: net, dtype: float64

In [10]:
data_path = "../data/"
file_name = 'tmp-file-of-dividends.csv'
data_file = data_path + file_name
output.to_csv(data_file, index=True)

In [11]:
df_cur_yr.net.sum()

434841.62

### Old method

In [12]:
mask = df.p_year == 2023

In [13]:
ttl_by_month = df[mask].groupby(['p_year','p_month','name'], as_index=True).agg(
    {
        'net':['sum','count'],
    }
)
ttl_by_month

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,net,net
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,sum,count
p_year,p_month,name,Unnamed: 3_level_2,Unnamed: 4_level_2
2023,3,CPNREIT,2185.2,1
2023,3,DIF,9126.0,1
2023,3,GVREIT,7128.0,1
2023,3,JASIF,29900.0,1
2023,3,TFFIF,1032.0,1
2023,3,WHAIR,5481.0,1
2023,3,WHART,3415.5,1
2023,4,ASK,5832.0,1
2023,4,BANPU,20250.0,1
2023,4,PTTGC,1350.0,1


In [14]:
ttl_by_month.groupby(level='p_month').sum()

Unnamed: 0_level_0,net,net
Unnamed: 0_level_1,sum,count
p_month,Unnamed: 1_level_2,Unnamed: 2_level_2
3,58267.7,7
4,28512.0,4
5,168108.72,15
6,70133.0,8
8,1350.0,1
9,108470.2,19


In [15]:
ttl_by_month.sum()

net  sum      434841.62
     count        54.00
dtype: float64

In [16]:
ttl_by_month.sort_values(['p_month','name'],ascending=[True,True])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,net,net
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,sum,count
p_year,p_month,name,Unnamed: 3_level_2,Unnamed: 4_level_2
2023,3,CPNREIT,2185.2,1
2023,3,DIF,9126.0,1
2023,3,GVREIT,7128.0,1
2023,3,JASIF,29900.0,1
2023,3,TFFIF,1032.0,1
2023,3,WHAIR,5481.0,1
2023,3,WHART,3415.5,1
2023,4,ASK,5832.0,1
2023,4,BANPU,20250.0,1
2023,4,PTTGC,1350.0,1


In [17]:
ttl_by_month['net'].sum()

sum      434841.62
count        54.00
dtype: float64

In [18]:
sql = '''
SELECT YEAR(p_date) AS pay_year, SUM(amt) AS grs_amt, SUM(net) AS net_amt, SUM(amt-net) AS refund
FROM dividends
GROUP BY pay_year
ORDER BY pay_year DESC'''
ttl_by_year = pd.read_sql(sql, conpf)
ttl_by_year

Unnamed: 0,pay_year,grs_amt,net_amt,refund
0,2023,468013.0,434841.62,33171.38
1,2022,708367.91,661087.04,47280.87
2,2021,752315.1,700874.5,51440.6
3,2020,616228.94,576030.84,40198.1
4,2019,552424.34,510266.46,42157.88
5,2018,351895.6,309725.58,42170.02
6,2017,245981.1,219514.23,26466.87


In [30]:
ttl_by_year.net_amt.sum()

3412340.27

In [19]:
sql = '''
SELECT name, COUNT(*) AS qtrs, SUM(net) AS ttl_net
FROM dividends
GROUP BY name
ORDER BY SUM(net) DESC'''
total = pd.read_sql(sql, conpf)
total.sample(10)

Unnamed: 0,name,qtrs,ttl_net
110,TKN,1,1530.0
64,CPALL,2,6390.0
4,CPNREIT,14,138062.3
33,PTTEP,4,19485.0
101,HMPRO,1,2340.0
55,SMPC,1,7740.0
107,SQ,1,1800.0
36,TPIPP,6,15960.47
95,LPF,2,2778.75
62,POPF,3,6669.0


### Summarized profit by name

In [20]:
sql = '''
SELECT name, sum(profit) AS profit
FROM sells JOIN buys ON sells.buy_id = buys.id
JOIN stocks ON buys.stock_id = stocks.id
GROUP BY stocks.name
ORDER BY sum(profit) DESC LIMIT 10'''
profits_by_stock = pd.read_sql(sql, conpf)
profits_by_stock

Unnamed: 0,name,profit
0,CPNREIT,547131.19
1,THANI,237046.32
2,BEAUTY,192974.08
3,JMT,187296.16
4,MTC,147177.71
5,ASIAN,141685.61
6,TU,125061.53
7,SAWAD,115506.59
8,SIS,110986.74
9,SINGER,103973.22


In [21]:
sql = '''
SELECT YEAR(date) AS year, MONTH(date) AS month, profit
FROM sells 
WHERE YEAR(date) = 2023
ORDER BY YEAR(date) DESC, MONTH(date) DESC
'''
sells = pd.read_sql(sql, conpf)
sells

Unnamed: 0,year,month,profit
0,2023,9,2793.14
1,2023,9,1311.9
2,2023,8,4398.53
3,2023,8,6418.09
4,2023,8,1687.04
5,2023,8,2146.35
6,2023,8,1093.15
7,2023,8,1312.41
8,2023,8,532.66
9,2023,7,1480.86


In [22]:
grouped = sells.groupby(['year','month'])
grouped

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001E405EB6250>

In [23]:
profit_by_month = grouped['profit'].sum()
profit_by_month

year  month
2023  1       -81338.26
      2       -77891.90
      3        -7863.28
      5         3680.06
      6       -75562.92
      7         1379.68
      8        17588.23
      9         4105.04
Name: profit, dtype: float64

In [24]:
grouped.agg(['sum','mean', 'max', 'min','count'])

Unnamed: 0_level_0,Unnamed: 1_level_0,profit,profit,profit,profit,profit
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,mean,max,min,count
year,month,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2023,1,-81338.26,-8133.826,14372.89,-33691.45,10
2023,2,-77891.9,-12981.983333,-3749.18,-34467.49,6
2023,3,-7863.28,-2621.093333,4594.67,-14620.17,3
2023,5,3680.06,3680.06,3680.06,3680.06,1
2023,6,-75562.92,-5397.351429,2857.81,-43758.16,14
2023,7,1379.68,344.92,4514.72,-4689.38,4
2023,8,17588.23,2512.604286,6418.09,532.66,7
2023,9,4105.04,2052.52,2793.14,1311.9,2


### Temporary process to calculate dividend portion of profit

In [25]:
year = 2023
quarter = 4

In [26]:
sql = """
SELECT name, year, quarter, aq_eps, ay_eps
FROM epss 
WHERE year = %s AND quarter = %s
"""
sql = sql % (year, quarter)
df_epss = pd.read_sql(sql, conlt)
df_epss.head()

Unnamed: 0,name,year,quarter,aq_eps,ay_eps
0,MC,2023,4,0,0


In [27]:
sql = '''
SELECT name, dividend
FROM dividend 
'''
df_dividend = pd.read_sql(sql, const)
df_dividend

Unnamed: 0,name,dividend
0,WHART,0.7663
1,QH,0.1500
2,ADVANC,8.2400
3,CPNREIT,0.8977
4,MCS,0.2000
...,...,...
65,GFPT,0.1000
66,PTTEP,9.2500
67,MC,0.7700
68,RCL,1.2500


In [28]:
df_merge = pd.merge(df_epss, df_dividend, on='name', how='inner')
df_merge['xxx_pct'] = df_merge.dividend / df_merge.aq_eps * 100
df_merge.set_index('name',inplace=True)
df_merge.sort_values(['xxx_pct'],ascending=[True])

Unnamed: 0_level_0,year,quarter,aq_eps,ay_eps,dividend,xxx_pct
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MC,2023,4,0,0,0.77,inf


In [29]:
df_merge.loc['TFFIF']

KeyError: 'TFFIF'