In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import credentials
import requests
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from pandas import DataFrame
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
import datetime as dt
#pulling data API

key =credentials.login['key_isaham']  # extract password

In [2]:
# looping through all stocks

def strategy_label(wl):
    
    ori_data = {}
    all_ori_stocks = []
    # Scale dictionary of dataframes for training set(scaling by stock)
    ss={}
    d = {}

    # Scale dictionary of dataframes for prediction set(scaling by stock)
    sp={}
    nsp={}

    for m,stock in enumerate (wl):

        print("----------------PROCESSING {}------------------".format(stock))
        symbol = stock    # Which stock to pull    
        response = requests.get("https://admin.isaham.my/api/chart?stock={}&key={}".format(symbol,key))
        data = response.json()

        ori = pd.DataFrame.from_dict(data)
        ori = ori[['c','d','h','l','o','v']]
        ori['d'] = pd.to_datetime(ori['d'], dayfirst= True )
        # adding the stock names
        ori["Stock"] = stock
        ori_data[f"{symbol}"] = ori
        all_ori_stocks.append(ori_data[stock])
        
        df = ori.reset_index(drop=False)
        
        
        # Tuning OB and OS level
        ob_cross=85
        os_cross=35
        
        # Identify MA50
        n=50
        ma50=[] 

        for i in range(len(df["c"])-(n-1)):
            ma = round(sum(df["c"][i:i+(n)])/n,4)
            ma50.append(ma)

        df["MA50"] = pd.Series(ma50)
        df["MA50"] = df["MA50"].shift(n-1)

        # Filter uptrend price above ma50
        # Calculating the gap of both MA
        df["MAgap"] = df["c"] -  df["MA50"] 

        # Set date as index
        df.set_index('d',inplace=True,drop=False)
        df.index.name = None

        # Create signal for uptrend price above ma50 
        # Result not accurate for stochastic when just filter uptrend only
        df["Uptrend"]=df['MAgap']>0
        Uptrend=df["Uptrend"]
        Uptrend_signal=Uptrend[Uptrend==True]

        # Signal stochastic
        # Create the "L14" column in the DataFrame
        df['L14'] = df['l'].rolling(window=14).min()

        # Create the "H14" column in the DataFrame
        df['H14'] = df['h'].rolling(window=14).max()

        # Create the "%K" column in the DataFrame
        df['%K'] = 100*((df['c'] - df['L14']) / (df['H14'] - df['L14']) )

        # Create the "%D" column in the DataFrame
        df['%D'] = df['%K'].rolling(window=3).mean()

        # Create a column in the DataFrame showing "TRUE" if buy entry signal is given and "FALSE" otherwise. 
        # A buy is initiated when the %K line crosses up through the %D line and the value of the oscillator is below 20 
        df['Buy OS'] = ((df['%K'] > df['%D']) & (df['%K'].shift(1) < df['%D'].shift(1))) & (df['%D'] < os_cross) & (Uptrend_signal)

        # Create a column in the DataFrame showing "TRUE" if sell entry signal is given and "FALSE" otherwise. 
        # A sell is initiated when the %K line crosses down through the %D line and the value of the oscillator is above 80 
        df['Sell OB'] = ((df['%K'] < df['%D']) & (df['%K'].shift(1) > df['%D'].shift(1))) & (df['%D'] > ob_cross) 

        # Create a column in the DataFrame for name of stock
        df['Stock_Name'] = symbol

        # Define overbought and oversold
        overbought =df['Sell OB']
        oversold =df['Buy OS']

        # Use index(date) for create strategy using overbought and oversold
        date_os=list(oversold[oversold == True].index)
        date_ob=list(overbought[overbought == True].index)

        # Scaling certain features only
        col_names_pred = ['c', 'l', 'o', 'v','h']  
        features_pred = df[col_names_pred]

        # Scaling for prediction data
        scaler_pred = StandardScaler().fit(features_pred.values)
        features_pred = scaler_pred.transform(features_pred.values)

        # Convert the array back to a dataframe
        dataset_pred = DataFrame(features_pred)

        # Prepare prediction data that no need to scale
        col_notscale_pred=['d','Stock_Name','Buy OS']
        features_notscale_pred=df[col_notscale_pred]

        # Saving in multiple variable
        sp["{}".format(stock)] = dataset_pred
        nsp["{}".format(stock)] = features_notscale_pred

        # Transaction recording
        buy = []
        sell = []
        sdate = []
        bdate = []
        wr = []
        pct = []
        vol = []
        ope =[]
        high = []
        low = []

        # Record close,open,vol,high,low for buy transaction
        for n,i in enumerate(date_os):
            buy.append(df['c'].loc[i])
            vol.append(df['v'].loc[i])
            ope.append(df['o'].loc[i])
            high.append(df['h'].loc[i])
            low.append(df['l'].loc[i])
            bdate.append(i)
            r=[]

            for m,k in enumerate(date_ob):

                if k>i:
                    r.append(m)
                    if len(r) ==1:
                        # Record close,percentage change for sell transaction
                        sell.append(df['c'].loc[k])
                        sdate.append(k)
                        pct_change=((df['c'].loc[k]/df['c'].loc[i])-1)*100
                        pct.append(((df['c'].loc[k]/df['c'].loc[i])-1)*100)
                    elif len(r)>1:
                        pass
        diff =len(buy)-len(sell)
        # Checking for final transaction, make sure it is sell, as we want to close the position
        if len(buy) == len(sell):
            pass   
        elif len(buy) != len(sell):
            # Deleting excess buy transaction after we close position
            del(buy[-diff:])
            del(bdate[-diff:])
            del(ope[-diff:])
            del(high[-diff:])
            del(vol[-diff:])
            del(low[-diff:])

        dct = {
            "Buy_Date": bdate,
            "Buy": buy,
            "Sell": sell,
            "Sell_Date": sdate,
            "Open":ope,
            'Volume': vol,
            "Low": low,
            "high": high,
            "Pct_change": pct    
        }  

        # Save document for stochastic strategy
        trans = pd.DataFrame(dct)

        # Define variable for winning rate
        wr = []
        for i in range(len(trans)):
            if trans["Pct_change"][i] > 0:
                wr.append(1)
            else:
                wr.append(0)

        # Define variable for name of stock
        Name_stock = []
        for s in range(len(trans)):
            Name_stock.append(symbol)

        # Add new column
        trans["wr"] = pd.Series(wr)
        trans['Stock'] = pd.Series(Name_stock)

        # Scaling certain features only for training data
        col_names = ['Buy', 'Low', 'Open', 'Volume','high']  
        features = trans[col_names]

        # Prepare prediction data that no need to scale
        scaler = StandardScaler().fit(features.values)
        features = scaler.transform(features.values)

        # Convert the array back to a dataframe
        dataset = DataFrame(features)    
        col_notscale = ['Buy_Date','Stock','wr']
        features_notscale = trans[col_notscale]

        # Saving in multiple variable
        d["{}".format(stock)] = dataset
        ss["{}".format(stock)] = features_notscale
        print("----------------DONE {}------------------".format(symbol))

    # Combining all training data generated
    df_notscale = pd.concat(list(ss.values()),ignore_index=True)
    df_scale = pd.concat(list(d.values()),ignore_index=True)
    df_scale = df_scale.rename(columns={0: 'Buy', 1: 'Low',2: 'Open', 3: 'Volume',4: 'High'})
    result_scaler = pd.concat([df_notscale, df_scale], axis=1)

    # Setting Dates as index
    df_all = result_scaler.sort_values(by="Buy_Date")
    df_all.set_index('Buy_Date', inplace=True)
    original = pd.concat(all_ori_stocks)
    
    print('############################### DONE STRATEGY LABEL ##############################')
    
    return df_all
    #return(original, df_all, nsp, sp)

In [3]:
def data_prep(df):
    # Encoding the stock names
    le = LabelEncoder()
    df["Stock"] = le.fit_transform(df["Stock"])

    # Define the splitting date (for training we use 2 years data)
    # Splitting train test
    day = dt.datetime.today().day
    month = dt.datetime.today().month
    year = dt.datetime.today().year - 5
    date1 = "{}-{}-01".format(year, str(month).zfill(2), str(day).zfill(2))
    date2 = str(dt.datetime.today().date())  # until yesterday only

    # Separate input features and target
    X = df.drop(['wr'], axis=1)
    y = df.wr
    
    # Splitting train test
    X_train, y_train = X[date1: date2], y[date1: date2]
    X_trainss, y_train = X_train.to_numpy(), y_train.to_numpy()
    
    print('############################## DONE DATA PREP #################################')

    return(X,y)

In [4]:
def walk_forward(X,y,df_all):
    start_y = 16  
    end_y = 21

    test_prec = []
    train_acc = []
    test_acc = []
    for i in range((end_y + 1) - start_y):  # years remaining
        for j in range(1,12,1):  # iteratting from 1 - 12

            # Fill the string with zeros until it is 2 characters long  
            date1 = "20{}-{}-01".format(start_y + i, str(j).zfill(2))  # starting of the test set
            date2 = "20{}-{}-01".format(start_y + i, str(j + 2).zfill(2))  # contain in 1 month only

            if j == 11:
                date2 = "20{}-{}-01".format(start_y + (i + 1), str((j + 2) - j).zfill(2))  # contain in 1 month only

            if date2 < str(df_all.index[-1]):  # limiting the splitting for duration below the last date

                #print("Start:", date1)
                #print("End:", date2)

                # splitting train test
                X_train, y_train = X[:date1], y[:date1]
                X_test, y_test = X[date1:date2], y[date1:date2]
                #print("#### {}, {} ####".format(len(X_train), len(X_test)))



                # defining the model
                model = XGBClassifier(verbosity=0)
                # training the model
                model.fit(X_train, y_train)

                # predicting the train and test set
                # train
                y_pred = model.predict(X_train)
                pred1 = [round(value) for value in y_pred]
                train_accuracy = accuracy_score(y_train, pred1)
                train_acc.append(train_accuracy)

                # test
                y_pred = model.predict(X_test)
                pred2 = [round(value) for value in y_pred]
                test_accuracy = accuracy_score(y_test, pred2)
                test_acc.append(test_accuracy)
                #print("Test Accuracy: %.2f%%" % (test_accuracy * 100.0))

                precision = precision_score(y_test, pred2)
                test_prec.append(precision)
                #print("Test Precision: %.2f%%" % (precision * 100.0))

                #if i == 0:
                    #print("----------------------------- DONE ROUND {} -----------------------------".format(i*10 + j))
                #else:
                    #print("----------------------------- DONE ROUND {} ----------------------------- \n".format(i*10 + j+(2*i)))
    # remove nan values for list test_acc
    new_test_acc = [x for x in test_acc if np.isnan(x) == False]
    avg_1 = sum(train_acc)/len(train_acc)  # calculating the average accuracy of the train set
    avg_2 = round(sum(new_test_acc)/len(new_test_acc),4)  # calculating the average accuracy of the test set
    print("Overall Train accuracy is: {}".format(avg_1))
    print("Overall Test accuracy is: {}".format(avg_2)) 

    avg_3 = sum(test_prec)/len(test_prec)  # calculating the average accuracy of the train set
    print("Overall Test precision is: {}".format(avg_3))
    return avg_3

    
    

In [5]:
def sector_filter(sector):

  # pulling data from api
  URL = 'https://admin.isaham.my/api/stocks/list'
  r = requests.get(url = URL) 
  data = r.json() 
  df = pd.DataFrame(data['stocks'],columns = ['code','symbol','name','primary-sector','secondary-sector'])

  wl = df[df["primary-sector"]==sector]
  wl = list(wl.symbol)

  return(wl)

In [6]:
def precision(wl):
    df_all=strategy_label(wl)
    X,y=data_prep(df_all)
    avg_3=walk_forward(X,y,df_all)
    
    return X,y,df_all,avg_3

In [7]:
sectors = ["Technology", "Healthcare", "Construction"]


for sector in sectors:
    wl = sector_filter(sector)
    print(f"{sector}:")
    X,y,df_all,avg_3= precision(wl)

Technology:
----------------PROCESSING WILLOW------------------
----------------DONE WILLOW------------------
----------------PROCESSING LAMBO------------------
----------------DONE LAMBO------------------
----------------PROCESSING NETX------------------
----------------DONE NETX------------------
----------------PROCESSING GHLSYS------------------
----------------DONE GHLSYS------------------
----------------PROCESSING IFCAMSC------------------
----------------DONE IFCAMSC------------------
----------------PROCESSING NOVAMSC------------------
----------------DONE NOVAMSC------------------
----------------PROCESSING DIGISTA------------------
----------------DONE DIGISTA------------------
----------------PROCESSING MMAG------------------
----------------DONE MMAG------------------
----------------PROCESSING KGROUP------------------
----------------DONE KGROUP------------------
----------------PROCESSING OPENSYS------------------
----------------DONE OPENSYS------------------
----------

----------------DONE ITRONIC------------------
############################### DONE STRATEGY LABEL ##############################
############################## DONE DATA PREP #################################




Overall Train accuracy is: 0.9140832056878221
Overall Test accuracy is: 0.5717
Overall Test precision is: 0.565087218285991
Healthcare:
----------------PROCESSING SCOMNET------------------
----------------DONE SCOMNET------------------
----------------PROCESSING KOTRA------------------
----------------DONE KOTRA------------------
----------------PROCESSING LYC------------------
----------------DONE LYC------------------
----------------PROCESSING TMCLIFE------------------
----------------DONE TMCLIFE------------------
----------------PROCESSING MGRC------------------
----------------DONE MGRC------------------
----------------PROCESSING CAREPLS------------------
----------------DONE CAREPLS------------------
----------------PROCESSING LKL------------------
----------------DONE LKL------------------
----------------PROCESSING TDM------------------
----------------DONE TDM------------------
----------------PROCESSING HARTA------------------
----------------DONE HARTA------------------
--

  _warn_prf(average, modifier, msg_start, len(result))


Overall Train accuracy is: 0.9836516037992673
Overall Test accuracy is: 0.5808
Overall Test precision is: 0.6253838553885548
Construction:
----------------PROCESSING SCBUILD------------------
----------------DONE SCBUILD------------------
----------------PROCESSING WIDAD------------------
----------------DONE WIDAD------------------
----------------PROCESSING INTA------------------
----------------DONE INTA------------------
----------------PROCESSING GDB------------------
----------------DONE GDB------------------
----------------PROCESSING NADIBHD------------------
----------------DONE NADIBHD------------------
----------------PROCESSING TCS------------------
----------------DONE TCS------------------
----------------PROCESSING MRCB------------------
----------------DONE MRCB------------------
----------------PROCESSING ZELAN------------------
----------------DONE ZELAN------------------
----------------PROCESSING GKENT------------------
----------------DONE GKENT------------------
-

  _warn_prf(average, modifier, msg_start, len(result))


Overall Train accuracy is: 0.927878639336509
Overall Test accuracy is: 0.5384
Overall Test precision is: 0.5223442128817541
