In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import credentials
import requests
#pulling data API
key =credentials.login['key_isaham']  # extract password
# Define the instruments to download.
#tech = ["GREATEC", 
       #"INARI"]

tech = ["GREATEC",
        "UWC",
        "D&O", 
       "GENETEC", 
       "UNISEM", 
       "FRONTKN", 
       "ATAIMS", 
       "FPI", 
       "PENTA", 
       "DUFU", 
       "VS", 
       "SKPRES", 
        "MPI", 
        "VITROX"
         ]
       


for m,stock in enumerate (tech):
        
    print("----------------PROCESSING {}------------------".format(stock))
    #symbol = "UNISEM"  # which stock to pull
    symbol = stock    
    
    response = requests.get("https://admin.isaham.my/api/chart?stock={}&key={}".format(symbol,key))
    data = response.json()
    #print(data)
    plt.style.use('fivethirtyeight')

    %config InlineBackend.figure_format = 'retina'
    %matplotlib inline

    # makes plots bigger
    plt.rcParams["figure.figsize"] = (20,8)
    
    #Tuning OB and OS level
    ob_cross=85
    os_cross=35

    df= pd.DataFrame.from_dict(data)
    df= df[['c','d','h','l','o','v']]
    df['d'] = pd.to_datetime(df['d'], dayfirst= True )
    #identify MA50
    n=50
    ma50=[] 

    for i in range(len(df["c"])-(n-1)):
        ma = round(sum(df["c"][i:i+(n)])/n,4)
        #print(ma)
        ma50.append(ma)
    
    df["MA50"] = pd.Series(ma50)
    df["MA50"] = df["MA50"].shift(n-1)

    #filter uptrend price above ma50
    # Calculating the gap of both MA
    df["MAgap"] = df["c"] -  df["MA50"] 

    df.set_index('d',inplace=True,drop=True)
    df.index.name = None

    #create signal for uptrend price above ma50 
    #result not accurate for stochastic when just filter uptrend only
    df["Uptrend"]=df['MAgap']>0
    Uptrend=df["Uptrend"]
    Uptrend_signal=Uptrend[Uptrend==True]
    #signal stochastic
    #Create the "L14" column in the DataFrame
    df['L14'] = df['l'].rolling(window=14).min()

    #Create the "H14" column in the DataFrame
    df['H14'] = df['h'].rolling(window=14).max()

    #Create the "%K" column in the DataFrame
    df['%K'] = 100*((df['c'] - df['L14']) / (df['H14'] - df['L14']) )

    #Create the "%D" column in the DataFrame
    df['%D'] = df['%K'].rolling(window=3).mean()

    #Create a column in the DataFrame showing "TRUE" if buy entry signal is given and "FALSE" otherwise. 
    #A buy is initiated when the %K line crosses up through the %D line and the value of the oscillator is below 20 
    df['Buy OS'] = ((df['%K'] > df['%D']) & (df['%K'].shift(1) < df['%D'].shift(1))) & (df['%D'] < os_cross) & (Uptrend_signal)

    #Create a column in the DataFrame showing "TRUE" if sell entry signal is given and "FALSE" otherwise. 
    #A sell is initiated when the %K line crosses down through the %D line and the value of the oscillator is above 80 
    df['Sell OB'] = ((df['%K'] < df['%D']) & (df['%K'].shift(1) > df['%D'].shift(1))) & (df['%D'] > ob_cross) 

    overbought =df['Sell OB']
    oversold =df['Buy OS']
    date_os=list(oversold[oversold == True].index)
    date_ob=list(overbought[overbought == True].index)
    
    # transaction recording

    
    # transaction recording
    buy = []
    sell = []
    sdate = []
    bdate = []
    wr = []
    pct = []
    vol = []
    ope =[]
    high = []
    low = []


    for n,i in enumerate(date_os):
        #print(n,"Buy:", i, "Price:", df['c'].loc[i])
        buy.append(df['c'].loc[i])
        vol.append(df['v'].loc[i])
        ope.append(df['o'].loc[i])
        high.append(df['h'].loc[i])
        low.append(df['l'].loc[i])

        bdate.append(i)
        r=[]
        #print("before", len(r))
        for m,k in enumerate(date_ob):

            if k>i:
                r.append(m)
                #print('after',len(r))
                if len(r) ==1:
                    #print(m,"Sell :", k, "Price:",df['c'].loc[k])
                    #print("Percentage change:",(((df['c'].loc[k]/df['c'].loc[i])-1)*100) )
                    sell.append(df['c'].loc[k])
                    sdate.append(k)
                    pct_change=((df['c'].loc[k]/df['c'].loc[i])-1)*100
                    pct.append(((df['c'].loc[k]/df['c'].loc[i])-1)*100)
                    wr =pct_change>0

                    #if pct_change >0:
                        #print("1")
                    #else:
                        #print('0')

                elif len(r)>1:
                    pass       
        #print('........')
    #print('Number of trade:',len(date_os))

    diff =len(buy)-len(sell)
    #print('difference:',diff)

    # Checking for final transaction, make sure it is sell, as we want to close the position
    if len(buy) == len(sell):
        print("Yes \n", "Buy:", len(buy), "Sell:", len(sell))
        print("Position closed")

    elif len(buy) != len(sell):
        #print("No, \n", "Buy:{}, Sell:{}, Closing the position".format(len(buy),len(sell)))
        del(buy[-diff:])
        del(bdate[-diff:])
        del(ope[-diff:])
        del(high[-diff:])
        del(vol[-diff:])
        del(low[-diff:])
        #print("Position closed")
    dct = {
        "Buy_Date": bdate,
        "Buy": buy,
        "Sell": sell,
        "Sell_Date": sdate,
        "Open":ope,
        'Volume': vol,
        "Low": low,
        "high": high,
        "Pct_change": pct    
    }

    trans = pd.DataFrame(dct)

    wr = []
    for i in range(len(trans)):
        if trans["Pct_change"][i] > 0:
            wr.append(1)
        else:
            wr.append(0)
    trans["wr"] = pd.Series(wr)
    
    Name_stock = []
    for s in range(len(trans)):
        Name_stock.append(symbol)
        
    
    trans['Stock'] = pd.Series(Name_stock)
    #print(trans)
    
    trans.to_csv('new_result {}.csv'.format(symbol), index=False)
    #C:\Users\Mo Syamil\Documents\data scientiest trial\strategy stho\result
            
    print("----------------DONE {}------------------".format(symbol))

----------------PROCESSING GREATEC------------------
----------------DONE GREATEC------------------
----------------PROCESSING UWC------------------
Yes 
 Buy: 9 Sell: 9
Position closed
----------------DONE UWC------------------
----------------PROCESSING D&O------------------
Yes 
 Buy: 90 Sell: 90
Position closed
----------------DONE D&O------------------
----------------PROCESSING GENETEC------------------
----------------DONE GENETEC------------------
----------------PROCESSING UNISEM------------------
Yes 
 Buy: 106 Sell: 106
Position closed
----------------DONE UNISEM------------------
----------------PROCESSING FRONTKN------------------
----------------DONE FRONTKN------------------
----------------PROCESSING ATAIMS------------------
Yes 
 Buy: 182 Sell: 182
Position closed
----------------DONE ATAIMS------------------
----------------PROCESSING FPI------------------
----------------DONE FPI------------------
----------------PROCESSING PENTA------------------
----------------DON

In [117]:
# combine all df
df1 = pd.read_csv("new_result GREATEC.csv", parse_dates = True)
df2 = pd.read_csv("new_result UWC.csv", parse_dates = True)
df3 = pd.read_csv("new_result D&O.csv", parse_dates = True)
df4 = pd.read_csv("new_result GENETEC.csv", parse_dates = True)
df5 = pd.read_csv("new_result UNISEM.csv", parse_dates = True)
df6 = pd.read_csv("new_result FRONTKN.csv", parse_dates = True)
df7 = pd.read_csv("new_result ATAIMS.csv", parse_dates = True)

df9 = pd.read_csv("new_result FPI.csv", parse_dates = True)
df10 = pd.read_csv("new_result PENTA.csv", parse_dates = True)
df11 = pd.read_csv("new_result DUFU.csv", parse_dates = True)
df12 = pd.read_csv("new_result VS.csv", parse_dates = True)
df13 = pd.read_csv("new_result SKPRES.csv", parse_dates = True)
df14 = pd.read_csv("new_result MPI.csv", parse_dates = True)
df15 = pd.read_csv("new_result VITROX.csv", parse_dates = True)

# Merging all df
df_all = pd.concat([df1, df2, df3, df4, df5, df6, df7, df9, df10, df11, df12, df13, df14, df15], axis=0)
# sorting all values by dates
df_all = df_all.sort_values(by="Buy_Date")

# setting Dates as index
df_all.set_index('Buy_Date', inplace=True)



In [118]:
# Encoding the stock names
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df_all["Stock"] = le.fit_transform(df_all["Stock"])
df_all

Unnamed: 0_level_0,Buy,Sell,Sell_Date,Open,Volume,Low,high,Pct_change,wr,Stock
Buy_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1994-09-07,4.58,4.46,1995-01-03,4.62,386000.0,4.58,4.72,-2.620087,0,7
1994-09-27,4.72,4.46,1995-01-03,4.68,312000.0,4.66,4.86,-5.508475,0,7
1994-09-30,4.80,4.46,1995-01-03,4.80,85000.0,4.60,4.80,-7.083333,0,7
1994-09-30,2.64,2.25,1994-12-22,2.63,204000.0,2.63,2.71,-14.772727,0,0
1994-10-04,2.66,2.25,1994-12-22,2.61,71000.0,2.61,2.66,-15.413534,0,0
...,...,...,...,...,...,...,...,...,...,...
2021-03-02,6.53,5.80,2021-03-30,6.55,2972600.0,6.43,6.68,-11.179173,0,11
2021-03-09,2.78,3.01,2021-03-29,2.55,5793800.0,2.48,2.78,8.273381,1,0
2021-03-10,35.54,38.90,2021-03-31,36.50,975200.0,34.64,36.82,9.454136,1,7
2021-03-12,2.73,3.01,2021-03-29,2.72,2065700.0,2.69,2.77,10.256410,1,0


In [119]:
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [120]:
# Separate input features and target
X = df_all.drop(['wr','Pct_change','Sell_Date','Sell'], axis=1)
y = df_all.wr

In [121]:
df_all["wr"].value_counts()

1    845
0    492
Name: wr, dtype: int64

In [122]:
# setting up testing and training sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.40, random_state=0, shuffle=False)

In [123]:
# concatenate our training data back together
X = pd.concat([X_train, y_train], axis=1)

In [124]:
# separate minority and majority classes
not_win = X[X.wr==0]
win = X[X.wr==1]

In [125]:
# upsample minority
not_win_upsampled = resample(not_win,
                          replace=True, # sample with replacement
                          n_samples=len(win), # match number in majority class
                          random_state=0) # reproducible results

In [126]:
# combine majority and upsampled minority
upsampled = pd.concat([win, not_win_upsampled]).sort_values(by="Buy_Date")

In [127]:
upsampled.wr.value_counts()

1    442
0    442
Name: wr, dtype: int64

In [128]:
upsampled

Unnamed: 0_level_0,Buy,Open,Volume,Low,high,Stock,wr
Buy_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1994-09-07,4.5800,4.6200,386000.0,4.5800,4.7200,7,0
1994-09-30,2.6400,2.6300,204000.0,2.6300,2.7100,0,0
1994-09-30,4.8000,4.8000,85000.0,4.6000,4.8000,7,0
1994-10-04,2.6600,2.6100,71000.0,2.6100,2.6600,0,0
1994-10-04,2.6600,2.6100,71000.0,2.6100,2.6600,0,0
...,...,...,...,...,...,...,...
2013-11-22,0.0658,0.0658,121588.0,0.0642,0.0658,2,1
2013-12-04,0.2149,0.2180,98109.0,0.2149,0.2180,13,1
2013-12-09,3.2700,3.2200,277000.0,3.2200,3.3300,7,1
2013-12-13,3.2000,3.1800,134700.0,3.1800,3.2200,7,1


In [129]:
# trying logistic regression again with the balanced dataset
y_train = upsampled.wr
X_train = upsampled.drop('wr', axis=1)

upsampled = LogisticRegression(solver='liblinear').fit(X_train, y_train)

In [130]:
from sklearn.metrics import accuracy_score

In [131]:
# predicting the trainset
upsampled_pred_train = upsampled.predict(X_train)
trainscore = accuracy_score(y_train, upsampled_pred_train)
print("training score:", trainscore)

training score: 0.503393665158371


In [132]:
# validating using test_set
upsampled_pred_test = upsampled.predict(X_test)
testscore = accuracy_score(y_test, upsampled_pred_test)
print("test score:", testscore)

test score: 0.24485981308411214


In [133]:
#check 
if trainscore - testscore < -0.01:
    print("Model Overfitted")
else:
    print("Model Ok")

Model Ok


In [134]:
trainscore - testscore

0.2585338520742589

In [135]:
# classification report
from sklearn.metrics import classification_report
print("Training model score:\n {}\n".format(classification_report(y_train, upsampled_pred_train)))
print("Test model score:\n {}".format(classification_report(y_test, upsampled_pred_test)))

Training model score:
               precision    recall  f1-score   support

           0       0.50      0.99      0.67       442
           1       0.62      0.02      0.04       442

    accuracy                           0.50       884
   macro avg       0.56      0.50      0.35       884
weighted avg       0.56      0.50      0.35       884


Test model score:
               precision    recall  f1-score   support

           0       0.24      0.98      0.39       132
           1       0.33      0.00      0.00       403

    accuracy                           0.24       535
   macro avg       0.29      0.49      0.20       535
weighted avg       0.31      0.24      0.10       535

