In [4]:
import numpy as np
import math 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import pymysql                        # for getting data from a SQL database
from sqlalchemy import create_engine  # for establishing the connection and authentication

from getpass import getpass           # To get the password without showing the input

### Project topic: Approach to disprove Random-Walk theory with foreign exchange market data

#### Definition Random-Walk-Theory:
Says basically that it is impossible to predict the further progress and price of market data and therefore profits with specific trading strategies are not possible

#### Trading strategy:

- If low lower than low of 5 previous candles + candle is green (open < close), next day go long at opening
- If high higher than high of 5 previous candles + candle is red (open > close), next day go short at opening

--> reversed candle strategy

Rules:
- Positions will be opened always at open and closed at close of the day (no positions over night)

#### Setting up connection to SQL trading database

In [5]:
#Storing my SQL password in variable
password = getpass()

········


In [6]:
#Build the connection string to trading database and the engine
connection_string = 'mysql+pymysql://root:'+password+'@localhost/trading'
engine = create_engine(connection_string)

### Retrieving required data from trading database:
USDJPY daily will again be used as first trading pair

In [7]:
USDJPY = pd.read_sql_query('SELECT * FROM trading.usdjpy_day_new', engine)
USDJPY

Unnamed: 0,date,open,high,low,close
0,2005-01-03,20548.0,20690.0,20464.0,20564.0
1,2005-01-04,20564.0,20958.0,20486.0,20888.0
2,2005-01-05,20888.0,21004.0,20752.0,20820.0
3,2005-01-06,20820.0,21036.0,20776.0,20984.0
4,2005-01-07,20984.0,21024.0,20772.0,20964.0
...,...,...,...,...,...
5076,2023-02-27,27285.0,27312.0,27182.0,27231.0
5077,2023-02-28,27224.0,27383.0,27146.0,27225.0
5078,2023-03-01,27211.0,27293.0,27051.0,27230.0
5079,2023-03-02,27228.0,27420.0,27204.0,27350.0


In [8]:
def calcOpenCloseDiff(df):
    df['diff_open_close'] = df.apply(lambda x: x['close'] - x['open'], axis=1)
    return df

In [9]:
USDJPY = calcOpenCloseDiff(USDJPY)
USDJPY

Unnamed: 0,date,open,high,low,close,diff_open_close
0,2005-01-03,20548.0,20690.0,20464.0,20564.0,16.0
1,2005-01-04,20564.0,20958.0,20486.0,20888.0,324.0
2,2005-01-05,20888.0,21004.0,20752.0,20820.0,-68.0
3,2005-01-06,20820.0,21036.0,20776.0,20984.0,164.0
4,2005-01-07,20984.0,21024.0,20772.0,20964.0,-20.0
...,...,...,...,...,...,...
5076,2023-02-27,27285.0,27312.0,27182.0,27231.0,-54.0
5077,2023-02-28,27224.0,27383.0,27146.0,27225.0,1.0
5078,2023-03-01,27211.0,27293.0,27051.0,27230.0,19.0
5079,2023-03-02,27228.0,27420.0,27204.0,27350.0,122.0


#### Create function:
- compare low of current candle with lows of 5 previous candles
- if current candle low lower and open < close (green candle), insert in new column value '-1' --> signal for next day to go long
- if current candle high higher and open > close (red candle), insert in new column value '1' --> signal for next day to go short
- else: insert 0 --> no trade on next day

In [10]:
def classLongShort(df):
    list_class = []
    for index, row in df.iterrows():
        if (row['low'] < df['low'].shift(periods=1)[index]) & (row['low'] < df['low'].shift(periods=2)[index]) & (row['low'] < df['low'].shift(periods=3)[index]) & (row['low'] < df['low'].shift(periods=4)[index]) & (row['low'] < df['low'].shift(periods=5)[index]) & (row['diff_open_close'] > 0):
                list_class.append(-1)
        elif (row['high'] > df['high'].shift(periods=1)[index]) & (row['high'] > df['high'].shift(periods=2)[index]) & (row['high'] > df['high'].shift(periods=3)[index]) & (row['high'] > df['high'].shift(periods=4)[index]) & (row['high'] > df['high'].shift(periods=5)[index]) & (row['diff_open_close'] < 0):
                list_class.append(1)
        else:
            list_class.append(0)
    df['classification'] = list_class
    return df

In [11]:
%%time
classLongShort(USDJPY)
USDJPY

Wall time: 2.51 s


Unnamed: 0,date,open,high,low,close,diff_open_close,classification
0,2005-01-03,20548.0,20690.0,20464.0,20564.0,16.0,0
1,2005-01-04,20564.0,20958.0,20486.0,20888.0,324.0,0
2,2005-01-05,20888.0,21004.0,20752.0,20820.0,-68.0,0
3,2005-01-06,20820.0,21036.0,20776.0,20984.0,164.0,0
4,2005-01-07,20984.0,21024.0,20772.0,20964.0,-20.0,0
...,...,...,...,...,...,...,...
5076,2023-02-27,27285.0,27312.0,27182.0,27231.0,-54.0,1
5077,2023-02-28,27224.0,27383.0,27146.0,27225.0,1.0,0
5078,2023-03-01,27211.0,27293.0,27051.0,27230.0,19.0,0
5079,2023-03-02,27228.0,27420.0,27204.0,27350.0,122.0,0


In [12]:
USDJPY['classification'].value_counts()

 0    4494
 1     325
-1     262
Name: classification, dtype: int64

#### Function to create column for order type for each day

In [13]:
def createOrderType(df):
    list_class = list(df['classification'])
    list_orderType = []
    for value in range(0, len(list_class)):
        if list_class[value-1] == -1:
            list_orderType.append('long')
        elif list_class[value-1] == 1:
            list_orderType.append('short')
        else:
            list_orderType.append('none')
    df['order_type'] = list_orderType
    return df

In [14]:
createOrderType(USDJPY)

Unnamed: 0,date,open,high,low,close,diff_open_close,classification,order_type
0,2005-01-03,20548.0,20690.0,20464.0,20564.0,16.0,0,none
1,2005-01-04,20564.0,20958.0,20486.0,20888.0,324.0,0,none
2,2005-01-05,20888.0,21004.0,20752.0,20820.0,-68.0,0,none
3,2005-01-06,20820.0,21036.0,20776.0,20984.0,164.0,0,none
4,2005-01-07,20984.0,21024.0,20772.0,20964.0,-20.0,0,none
...,...,...,...,...,...,...,...,...
5076,2023-02-27,27285.0,27312.0,27182.0,27231.0,-54.0,1,none
5077,2023-02-28,27224.0,27383.0,27146.0,27225.0,1.0,0,short
5078,2023-03-01,27211.0,27293.0,27051.0,27230.0,19.0,0,none
5079,2023-03-02,27228.0,27420.0,27204.0,27350.0,122.0,0,none


#### Create column with actual profit per day comparing order_type with diff_open_close

In [15]:
def createProfit(df):
    list_profit = []
    for index, row in df.iterrows():
        if row['order_type'] == 'long':
            list_profit.append(row['diff_open_close'])
        elif row['order_type'] == 'short':
            profit = (row['diff_open_close'])*(-1.0)
            list_profit.append(profit)
        else:
            list_profit.append(0)
    df['profit'] = list_profit
    return df

In [16]:
createProfit(USDJPY)

Unnamed: 0,date,open,high,low,close,diff_open_close,classification,order_type,profit
0,2005-01-03,20548.0,20690.0,20464.0,20564.0,16.0,0,none,0.0
1,2005-01-04,20564.0,20958.0,20486.0,20888.0,324.0,0,none,0.0
2,2005-01-05,20888.0,21004.0,20752.0,20820.0,-68.0,0,none,0.0
3,2005-01-06,20820.0,21036.0,20776.0,20984.0,164.0,0,none,0.0
4,2005-01-07,20984.0,21024.0,20772.0,20964.0,-20.0,0,none,0.0
...,...,...,...,...,...,...,...,...,...
5076,2023-02-27,27285.0,27312.0,27182.0,27231.0,-54.0,1,none,0.0
5077,2023-02-28,27224.0,27383.0,27146.0,27225.0,1.0,0,short,-1.0
5078,2023-03-01,27211.0,27293.0,27051.0,27230.0,19.0,0,none,0.0
5079,2023-03-02,27228.0,27420.0,27204.0,27350.0,122.0,0,none,0.0


#### Calculate average loss, average win, number of trades and sum of profit

In [17]:
def loss_win_trades_profit(df):
    sum_of_profit = df['profit'].sum()
    number_of_trades = df['order_type'][(df['order_type'] == 'long') | (df['order_type'] == 'short')].count()
    average_loss = df['profit'][df['profit'] < 0].mean()
    average_win = df['profit'][df['profit'] > 0].mean()
    
    return sum_of_profit, number_of_trades, average_loss, average_win

In [18]:
loss_win_trades_profit(USDJPY)

(-2471.0000000000127, 587, -78.57114754098359, 76.76142857142851)

#### Hypothesis testing
Question: How possible is it that the results of my trading strategy are out of coincidence?
It is clear that it is impossible to answer this question with 100% certainty. Therefore, one has to set up a significance level as a decision point to accept or reject the strategy.

H0: Random-Walk theory is true, market price is unpredictable and therefore expected mean value for profit is 0.
    expected profit value = 0
    
H1: Random-Walk theory is not true, with this trading strategy the mean profit is bigger or lower 0
    expected profit value <> 0

significance level: the resulting profit of this trading strategy has to exceed 4.9 standard deviations. This means that the occured mean profit of this strategy has to be 4.9 standard deviations different from the expected mean profit of 0 from the H0 to accept H1 and reject H1. In math: Sigma > 4.9

Reference for my considerations: https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2474755

In [19]:
def testStrategy(df):
    sum_of_profit = loss_win_trades_profit(df)[0]
    number_of_trades = loss_win_trades_profit(df)[1]
    average_loss = loss_win_trades_profit(df)[2]
    average_win = loss_win_trades_profit(df)[3]
    percentage_avg_win = abs(average_loss)/(abs(average_loss)+abs(average_win))
    percentage_avg_loss = 1 - percentage_avg_win
    exp_profit = 0
    variance_of_trades = (percentage_avg_loss*(average_loss-exp_profit)**2) + (percentage_avg_win*(average_win-exp_profit)**2)
    stand_dev_of_profit = math.sqrt(number_of_trades)*math.sqrt(variance_of_trades)
    stand_dev_final = sum_of_profit/stand_dev_of_profit
    
    return stand_dev_final

In [20]:
std_USDJPY = testStrategy(USDJPY)
std_USDJPY

-1.3132597155786911

#### Conclusion: 
The result....

In [21]:
#Store created USDJPY_daily with profit column as csv
#USDJPY.to_csv('./cleaned_datafiles/usdjpy_rev_candle.csv', index=False)

### Testing the strategy with minutes data for USDJPY to check if the performance is better

In [22]:
USDJPY_min = pd.read_sql_query('SELECT * FROM trading.usdjpy_minute', engine)
USDJPY_min

Unnamed: 0,date,open,high,low,close
0,2007-01-02 00:31:00,23752.0,23753.0,23743.0,23746.0
1,2007-01-02 00:32:00,23746.0,23746.0,23742.0,23744.0
2,2007-01-02 00:33:00,23744.0,23744.0,23740.0,23742.0
3,2007-01-02 00:34:00,23742.0,23742.0,23722.0,23734.0
4,2007-01-02 00:35:00,23734.0,23738.0,23734.0,23734.0
...,...,...,...,...,...
5999380,2023-03-03 15:00:00,27208.0,27209.0,27197.0,27201.0
5999381,2023-03-03 15:01:00,27201.0,27252.0,27201.0,27223.0
5999382,2023-03-03 15:02:00,27223.0,27244.0,27223.0,27242.0
5999383,2023-03-03 15:03:00,27242.0,27266.0,27242.0,27252.0


In [23]:
USDJPY_min = calcOpenCloseDiff(USDJPY_min)
USDJPY_min

Unnamed: 0,date,open,high,low,close,diff_open_close
0,2007-01-02 00:31:00,23752.0,23753.0,23743.0,23746.0,-6.0
1,2007-01-02 00:32:00,23746.0,23746.0,23742.0,23744.0,-2.0
2,2007-01-02 00:33:00,23744.0,23744.0,23740.0,23742.0,-2.0
3,2007-01-02 00:34:00,23742.0,23742.0,23722.0,23734.0,-8.0
4,2007-01-02 00:35:00,23734.0,23738.0,23734.0,23734.0,0.0
...,...,...,...,...,...,...
5999380,2023-03-03 15:00:00,27208.0,27209.0,27197.0,27201.0,-7.0
5999381,2023-03-03 15:01:00,27201.0,27252.0,27201.0,27223.0,22.0
5999382,2023-03-03 15:02:00,27223.0,27244.0,27223.0,27242.0,19.0
5999383,2023-03-03 15:03:00,27242.0,27266.0,27242.0,27252.0,10.0


In [36]:
def classLongShort2(df):
    df['low_shifted'] = df['low'].shift(periods=5)
    df['high_shifted'] = df['high'].shift(periods=5)

    mask1 = df['low'] < df['low_shifted']
    mask2 = df['high'] > df['high_shifted']
    mask3 = df['diff_open_close'] > 0
    mask4 = df['diff_open_close'] < 0

    list_class = np.where(mask1 & mask3, -1, np.where(mask2 & mask4, 1, 0))

    df['classification'] = list_class
    
    df_new = df.drop(['low_shifted', 'high_shifted'], axis=1)
    
    return df_new

In [37]:
%%time
classLongShort2(USDJPY_min)

Wall time: 394 ms


Unnamed: 0,date,open,high,low,close,diff_open_close,classification
0,2007-01-02 00:31:00,23752.0,23753.0,23743.0,23746.0,-6.0,0
1,2007-01-02 00:32:00,23746.0,23746.0,23742.0,23744.0,-2.0,0
2,2007-01-02 00:33:00,23744.0,23744.0,23740.0,23742.0,-2.0,0
3,2007-01-02 00:34:00,23742.0,23742.0,23722.0,23734.0,-8.0,0
4,2007-01-02 00:35:00,23734.0,23738.0,23734.0,23734.0,0.0,0
...,...,...,...,...,...,...,...
5999380,2023-03-03 15:00:00,27208.0,27209.0,27197.0,27201.0,-7.0,0
5999381,2023-03-03 15:01:00,27201.0,27252.0,27201.0,27223.0,22.0,-1
5999382,2023-03-03 15:02:00,27223.0,27244.0,27223.0,27242.0,19.0,0
5999383,2023-03-03 15:03:00,27242.0,27266.0,27242.0,27252.0,10.0,0


In [38]:
createOrderType(USDJPY_min)
createProfit(USDJPY_min)
loss_win_trades_profit(USDJPY_min)

(-23480.0, 1679402, -3.0176687347473985, 3.0539297970755)

In [39]:
std_USDJPY_min = testStrategy(USDJPY_min)
std_USDJPY_min

-5.96836607632827