In [22]:
# default_exp data.finance

In [23]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [24]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [25]:
from libs import is_lib_exists

In [26]:
required_libs = [ ("numpy", "numpy"),
                  ("pandas", "pandas"),
                  ("seaborn", "seaborn"),
                  ("matplotlib", "matplotlib"),
                  ("investpy", "investpy"),
                  ("dateutil", "python-dateutil"),
                ]

In [27]:
for (clz,lib) in required_libs:
    if not is_lib_exists(clz):
        print(f"Installing {lib}")
        !pip install {lib}
    else:
        print(f"{lib} exists")

numpy exists
pandas exists
seaborn exists
matplotlib exists
investpy exists
python-dateutil exists


In [28]:
#export
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import investpy
from pathlib import Path
from dateutil.relativedelta import *
from datetime import *
import calendar
import traceback

In [29]:
%matplotlib inline

In [30]:
# % - line, %% - cell
# %run, %%timeit, %%writefile, %pycat
# %who, %store
# %html 
# %env, %pinfo
# %lsmagic
# %history
# %prun
# %%heat
# https://github.com/lmmx/devnotes/wiki/IPython-'magic'-function-documentation#edit
# %quickref

In [31]:
from IPython.display import display, Image

# Finance Data

> Functionality to gather finance data.

The classes here provide bases classes used to gather finance data for model training.

In [32]:
#hide
from nbdev.showdoc import *

In [33]:
# Dataset path
DATASET_PATH = Path("../../data")  

# Country to analyze
COUNTRY = "malaysia"

# Dataset
DATASET = DATASET_PATH/f"ftse_klse_investing.csv"

# Stocks summary
DATASET_STOCKS_SUMMARY = DATASET_PATH/f"stocks_summary.csv"

## FTSE KLSE

In [34]:
# Retrieve stock components
dataset = investpy.get_stocks(country=COUNTRY)
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 929 entries, 0 to 928
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   country    929 non-null    object
 1   name       929 non-null    object
 2   full_name  929 non-null    object
 3   isin       929 non-null    object
 4   currency   929 non-null    object
 5   symbol     929 non-null    object
dtypes: object(6)
memory usage: 43.7+ KB


In [35]:
# Save the dataset
dataset.to_csv(DATASET, index=False)

#dataset = pd.read_csv(DATASET, header=0, sep=',', quotechar='"')

In [36]:
display(dataset.head(3))

Unnamed: 0,country,name,full_name,isin,currency,symbol
0,malaysia,Media Chinese Int,Media Chinese International Ltd,BMG5959D1048,MYR,MDCH
1,malaysia,AMMB,AMMB Holdings Bhd,MYL1015OO006,MYR,AMMB
2,malaysia,CIMB Group,CIMB Group Holdings Bhd,MYL1023OO000,MYR,CIMB


In [48]:
def calculate_yield(idx, name, symbol):
    """Calculate stock yield based on latest close price"""
    try:
        df_dividends = investpy.get_stock_dividends(stock=symbol, country=COUNTRY)
        #display(df_dividends)
        df_dividends['Year'] = pd.DatetimeIndex(df_dividends['Date']).year
        # Convert yield percentage to number
        df_dividends['Historical Yield'] =  df_dividends['Yield'].apply(\
                                                     lambda y : \
                                                     float(y.translate(str.maketrans({',': '.', '%': '', '-':'0'}))))
        
        # Aggregate the dividends by years
        df_dividends_summary = df_dividends.groupby(by=['Year'], sort=False, as_index=False).\
                                agg({'Dividend':'sum', 'Historical Yield':'sum', \
                                     'Date': lambda d: ','.join(d.dt.strftime('%Y-%m-%d'))})
        # print(df_dividends.groupby(by=['Year'], sort=False, as_index=False))
        # display(df_dividends_summary[df_dividends_summary['Year'].isin(PAST_3_YEARS)])
        #display(df_dividends_summary)
        
        df_recent_data = investpy.get_stock_recent_data(stock=symbol, country=COUNTRY).reset_index()
        last_close_price = df_recent_data[df_recent_data['Date'].eq(df_recent_data['Date'].max())]['Close'].values[0]
        # display(f"Recent price {last_close_price}")
        # display(df_recent_data)
     
        df_dividends_summary['Name'] = name
        df_dividends_summary['Symbol'] = symbol
        df_dividends_summary['Last Close Price'] = last_close_price
        df_dividends_summary['Calculated Yield'] = df_dividends_summary['Dividend'] / last_close_price * 100
       
        # display(df_dividends_summary)
        
        
        print(f'{idx}. {symbol} - {name}')
        return df_dividends_summary
            
    except Exception as exception:
        print(f"{idx}. Skipping {name} - {symbol}")
        #traceback.print_exc()
        return pd.DataFrame()

In [49]:
def summarize_stocks(df_summary, df):
    """Summarize the stock dividends details"""
    if df is None or df.empty:
        return df_summary
    if df_summary.empty:
        return df
    return df_summary.append(df)

In [52]:
import time

def generate_dataset():
    #!rm -rf {DATASET_STOCK_SUMMARY}
    df_all_dividends = pd.DataFrame()

    #df = calculate_yield(0, "BIMB", "BIMB")
    #df_all_dividends = summarize_stocks(df_all_dividends, df)
    #display(df_all_dividends)

    START_INDEX = 0
    for idx, row in dataset.iloc[START_INDEX:].iterrows():
        name = row['name']
        symbol = row['symbol']
        df = calculate_yield(idx, name, symbol)
        df_all_dividends = summarize_stocks(df_all_dividends, df)
        if (idx % 10 == 0 or idx == (len(dataset) - 1)):
            df_all_dividends.to_csv(DATASET_STOCKS_SUMMARY, mode='a', index=False)
            df_all_dividends = pd.DataFrame()
        time.sleep(3)
    # display(df_all_dividends)

# Generate the dataset again?



In [71]:
dataset = pd.read_csv(DATASET_STOCKS_SUMMARY, header=0, sep=',', quotechar='"')
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2551 entries, 0 to 2550
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Year              2551 non-null   int64  
 1   Dividend          2551 non-null   float64
 2   Historical Yield  2551 non-null   float64
 3   Date              2551 non-null   object 
 4   Name              2551 non-null   object 
 5   Symbol            2551 non-null   object 
 6   Last Close Price  2551 non-null   float64
 7   Calculated Yield  2551 non-null   float64
dtypes: float64(4), int64(1), object(3)
memory usage: 159.6+ KB


In [102]:
# Today's date
TODAY = date.today()

# Past 3 years
PAST_3_YEARS = [ TODAY.year - y for y in range(1,4) ]

# Expected YIELD %
EXPECTED_YIELD_PERCENTAGE = 12

In [103]:
df_dividend_stocks = dataset[
                 (dataset['Year'].isin(PAST_3_YEARS)) & 
                 (dataset['Calculated Yield'] >= EXPECTED_YIELD_PERCENTAGE)]
df_dividend_stocks = df_dividend_stocks.sort_values(by=['Name', 'Year'])

In [108]:
print(df_dividend_stocks['Name'].unique())
display(df_dividend_stocks.head(30))
#print(df_dividend_stocks[df[]])

['AMMB' 'APB Resources' 'APM Automotive' 'AYS Ventures' 'Airasia'
 'Al-Salam Real Estate' 'Alliance Bank Malaysia' 'Aluminium Malaysia'
 'AmFIRST' 'Amanah Harta Tanah' 'AmanahRaya Real Estate' 'Amway'
 'Ann Joo Resources' 'Asia File Corp' 'Astro Malaysia' 'Atlan' 'Atrium'
 'BAT Malaysia' 'BP Plastics' 'Berjaya Food' 'Berjaya Sports Toto'
 'Bermaz Auto' 'Bina Darulaman' 'Bintulu Port' 'Boustead'
 'Boustead Plantations' 'CB Industrial Product' 'CIMB Group'
 'CJ Century Logistics' 'CSC Steel' 'CYL Corp' 'CapitaLand Malaysia Mall'
 'Carlsberg Brewery' 'Chemical Co Malaysia' 'Chin Well' 'Classic Scenic'
 'Country View' 'Crescendo Corp' 'Deleum' 'DiGi.Com' 'Dominant Enterprise'
 'ELK-Desa Resources' 'Elsoft Research' 'Eng Kah Corp' 'Esthetics Int'
 'FCW' 'Faber' 'Far East' 'Fima Corp' 'Focus Lumber' 'Freight Management'
 'Gas Malaysia' 'George Kent' 'Glomac' 'Gromutual' 'Hai O Enterprise'
 'Hap Seng Plantations' 'Hektar REIT' 'HeveaBoard' 'Homeritz Corp'
 'Hua Yang' 'Hup Seng Industries' 'I-

Unnamed: 0,Year,Dividend,Historical Yield,Date,Name,Symbol,Last Close Price,Calculated Yield
7,2019,0.41,11.59,"2019-12-13,2019-06-25",AMMB,AMMB,3.17,12.933754
556,2017,0.065,6.84,2017-03-01,APB Resources,APBS,0.34,19.117647
574,2017,0.295,8.49,"2017-09-06,2017-06-06",APM Automotive,APMA,1.71,17.251462
573,2018,0.265,8.15,"2018-09-18,2018-06-06",APM Automotive,APMA,1.71,15.497076
572,2019,0.24,12.14,"2019-09-18,2019-06-11",APM Automotive,APMA,1.71,14.035088
606,2017,0.025,5.95,2017-08-02,AYS Ventures,AYSV,0.175,14.285714
605,2018,0.05,14.74,"2018-08-01,2018-03-14",AYS Ventures,AYSV,0.175,28.571429
284,2017,0.3,9.13,"2017-09-13,2017-05-23",Airasia,AIRA,0.9,33.333333
283,2018,0.24,7.74,2018-06-12,Airasia,AIRA,0.9,26.666667
282,2019,0.24,28.24,2019-03-11,Airasia,AIRA,0.9,26.666667
