<a href="https://colab.research.google.com/github/roni762583/NEAT/blob/main/NEAT_FX.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install neat-python
!pip install visualize==0.5.1

import neat
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random

# import local config file
from google.colab import files
uploaded = files.upload() 
config_file = list(uploaded.keys())[0]
# make sure pandas doesnâ€™t hide any columns
pd.set_option("display.max.columns", None)

In [None]:
# import price data
!ls

from google.colab import files
uploaded = files.upload() 
filename = list(uploaded.keys())[0]
colnames=['TIME', 'bid', 'ask', 'tradable'] 
df = pd.read_csv(filename, names=colnames, header=None)
print(df.head())
print(df.dtypes)
df['datetime'] = pd.to_datetime( df['TIME'], format='%Y%m%d %H%M%S%f' )
df.head()
# setting datetime as index column
df.set_index(df['datetime'], inplace = True)
# df.head
# drop extra columns
df.drop(['datetime','TIME','tradable'], axis=1, inplace=True)
print("Shape: ",df.shape)
print(df.head())
print("count: ",df.count())

config-feedforward  sample_data


In [None]:
# build Hull Moving Avg. as features of environment
def WMA(s, period):
       return s.rolling(period).apply(lambda x: ((np.arange(period)+1)*x).sum()/(np.arange(period)+1).sum(), raw=True)

def HMA(s, period):
       return WMA(WMA(s, period//2).multiply(2).sub(WMA(s, period)), int(np.sqrt(period)))

#df["hma5"] = HMA(df.bid, 5) # took >32 sec. to run on 1.08MM rows
#df['deltahma5'] = df.hma5.diff()
#df['deltahma5_5'] = df.hma5.diff(5)
#df['hma5hma5'] = HMA(df.hma5, 5)
#df['deltahma5hma5'] = df.hma5hma5.diff()
#df['deltahma5hma5_5'] = df.hma5hma5.diff(5) # divide by n+1 ticks to get avg. 
#df["hma20"] = HMA(df.bid, 20)
#df['bidhma20diff'] = df.bid - df.hma20
#df['bidhma50diff'] = df.bid - df.hma50

df['spread'] = df.ask - df.bid
# ma's
df['hma50'] = HMA(df.bid, 50)
df['hma200'] = HMA(df.bid, 200)
df['hma1000'] = HMA(df.bid, 1000)
df['hma5000'] = HMA(df.bid, 5000) # took 50 sec. to run this line

# log returns lr_
def log_return(list_stock_prices):
    return np.log(list_stock_prices).diff()

df['lr_hma50'] = log_return(HMA(df.bid, 50))
df['lr_hma200'] = log_return(HMA(df.bid, 200))
df['lr_hma1000'] = log_return(HMA(df.bid, 1000))
df['lr_hma5000'] = log_return(HMA(df.bid, 5000))
# ma momentums
df['mom10_hma50'] = df.hma50.diff(10)
df['mom10_hma200'] = df.hma200.diff(10)
df['mom10_hma1000'] = df.hma1000.diff(10)
df['mom10_hma5000'] = df.hma5000.diff(10)

# distance beteween ma's - take small period avg. minus large period
df['delta_hma50_hma5000'] = df.hma50 - df.hma5000
df['delta_hma50_hma200'] = df.hma50 - df.hma200
df['delta_hma200_hma1000'] = df.hma200 - df.hma1000
df['delta_hma1000_hma5000'] = df.hma1000 - df.hma5000
df['delta_hma200_hma5000'] = df.hma200 - df.hma5000

# get rid o NA rows
print('before removing Nans',df.count())
df.dropna(axis=0, how='any', thresh=None, subset=None, inplace=True)


In [None]:
# plot 

rowsList = list(range(4000,6800))

fig, axs = plt.subplots(15, figsize=(20,50))
fig.suptitle('Signal Features')


axs[0].plot(df.bid.iloc[rowsList])
axs[0].set_title("Bid")
 
axs[1].plot(df.spread.iloc[rowsList])
axs[1].set_title("spread")

# log returns of ma's
axs[2].plot(df.lr_hma50.iloc[rowsList]) # this filters sufficiently, yet retains bid line shape with enough detail
axs[2].set_title("lr_hma50")
 
axs[3].plot(df.lr_hma200.iloc[rowsList])
axs[3].set_title("lr_hma200")
 
axs[4].plot(df.lr_hma1000.iloc[rowsList])
axs[4].set_title("lr_hma1000")
 
axs[5].plot(df.lr_hma5000.iloc[rowsList])
axs[5].set_title("lr_hma5000")

# MA momentums
axs[6].plot(df.mom10_hma50.iloc[rowsList])
axs[6].set_title("mom10_hma50")

axs[7].plot(df.mom10_hma200.iloc[rowsList])
axs[7].set_title("mom10_hma200")

axs[8].plot(df.mom10_hma1000.iloc[rowsList])
axs[8].set_title("mom10_hma1000")

axs[9].plot(df.mom10_hma5000.iloc[rowsList])
axs[9].set_title("mom10_hma5000")

# distance beteween ma's - take small period avg. minus large period => (+) is leading upwards
axs[10].plot(df.delta_hma50_hma5000.iloc[rowsList]) 
axs[10].set_title("delta_hma50_hma5000")

axs[11].plot(df.delta_hma200_hma5000.iloc[rowsList]) 
axs[11].set_title("delta_hma200_hma5000")


# adjacent ma's (neigbors by period)
axs[12].plot(df.delta_hma50_hma200.iloc[rowsList])
axs[12].set_title("delta_hma50_hma200")

axs[13].plot(df.delta_hma200_hma1000.iloc[rowsList])
axs[13].set_title("delta_hma200_hma1000")

axs[14].plot(df.delta_hma1000_hma5000.iloc[rowsList])
axs[14].set_title("delta_hma1000_hma5000")

'''
# Ratio of MA differences to largest difference (between the two extreme ma periods)
axs[15].plot(df.ratio_50delta200_tofullrange.iloc[rowsList])
axs[15].set_title("ratio_50delta200_tofullrange")

axs[16].plot(df.ratio_200delta1000_tofullrange.iloc[rowsList])
axs[16].set_title("ratio_200delta1000_tofullrange")

axs[17].plot(df.ratio_1000delta5000_tofullrange.iloc[rowsList])
axs[17].set_title("ratio_1000delta5000_tofullrange")
'''

In [None]:
# make deep copy 
features = (df[['spread','lr_hma50','lr_hma200','lr_hma1000','lr_hma5000','mom10_hma50','mom10_hma200','mom10_hma1000','mom10_hma5000','delta_hma50_hma5000','delta_hma50_hma200','delta_hma200_hma1000','delta_hma1000_hma5000','delta_hma200_hma5000']]).copy()

# Normalize features df
def min_max_scaling(series):
    return (series - series.min()) / (series.max() - series.min())

for col in features.columns:
    features[col] = min_max_scaling(features[col])


print('Describe after normalization')
print('')
features.describe()

In [None]:
# examine data slices

rowsList = list(range(5000,6800))

sl = features.iloc[rowsList, [1,2,3,4] ]  #  4,5,6,14,  [15,16,17]  [0,3,4,5,6] [7,8,9,10]
#sl = df.iloc[rowsList, [7,8,9,10] ]  #  4,5,6,14,  [15,16,17],  [0,3,4,5,6], lr_'s [7,8,9,10]

# ploting parameters
mpl.rcParams['figure.figsize'] = (20, 7)     # set figure size 
mpl.rcParams['axes.grid'] = False

sl.plot(marker='.', linestyle='none', markersize=3)# ,linewidth=2, ) color='green', 

plt.show()

In [None]:
# make X Y for NEAT training

In [None]:
"""
2-input XOR example -- this is most likely the simplest possible example.
from: https://github.com/CodeReclaimers/neat-python/blob/master/examples/xor/evolve-minimal.py
"""

# 2-input XOR inputs and expected outputs.
xor_inputs = [(0.0, 0.0), (0.0, 1.0), (1.0, 0.0), (1.0, 1.0)]
xor_outputs = [(0.0,), (1.0,), (1.0,), (0.0,)]


def eval_genomes(genomes, config):
    for genome_id, genome in genomes:
        genome.fitness = 4.0
        net = neat.nn.FeedForwardNetwork.create(genome, config)
        for xi, xo in zip(xor_inputs, xor_outputs):
            output = net.activate(xi)
            genome.fitness -= (output[0] - xo[0]) ** 2


# Load configuration.
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation,
                     'config-feedforward')

# Create the population, which is the top-level object for a NEAT run.
p = neat.Population(config)

# Add a stdout reporter to show progress in the terminal.
p.add_reporter(neat.StdOutReporter(False))

# Run until a solution is found.
winner = p.run(eval_genomes)

# Display the winning genome.
print('\nBest genome:\n{!s}'.format(winner))

# Show output of the most fit genome against training data.
print('\nOutput:')
winner_net = neat.nn.FeedForwardNetwork.create(winner, config)
for xi, xo in zip(xor_inputs, xor_outputs):
    output = winner_net.activate(xi)
    outputRounded = round(output[0], 0)
    print("  input {!r}, expected output {!r}, got {!r}, rounded = {!r}".format(xi, xo, output, outputRounded))


 ****** Running generation 0 ****** 

Population's average fitness: 2.19354 stdev: 0.35020
Best fitness: 2.98963 - size: (1, 2) - species 1 - id 11
Average adjusted fitness: 0.537
Mean genetic distance 0.982, standard deviation 0.395
Population of 150 members in 1 species
Total extinctions: 0
Generation time: 0.014 sec

 ****** Running generation 1 ****** 

Population's average fitness: 2.35397 stdev: 0.32104
Best fitness: 2.98963 - size: (1, 2) - species 1 - id 11
Average adjusted fitness: 0.533
Mean genetic distance 1.175, standard deviation 0.444
Population of 150 members in 1 species
Total extinctions: 0
Generation time: 0.013 sec (0.014 average)

 ****** Running generation 2 ****** 

Population's average fitness: 2.30390 stdev: 0.35456
Best fitness: 2.99639 - size: (1, 2) - species 1 - id 360
Average adjusted fitness: 0.597
Mean genetic distance 1.310, standard deviation 0.442
Population of 150 members in 1 species
Total extinctions: 0
Generation time: 0.017 sec (0.015 average)

