In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("ibm.us.txt")
df.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Volume,OpenInt
0,1962-01-02,6.413,6.413,6.3378,6.3378,467056,0
1,1962-01-03,6.3378,6.3963,6.3378,6.3963,350294,0
2,1962-01-04,6.3963,6.3963,6.3295,6.3295,314365,0
3,1962-01-05,6.3211,6.3211,6.1958,6.2041,440112,0
4,1962-01-08,6.2041,6.2041,6.0373,6.087,655676,0
5,1962-01-09,6.1208,6.2376,6.1208,6.1621,592806,0
6,1962-01-10,6.1707,6.2041,6.1707,6.1707,359274,0
7,1962-01-11,6.1875,6.2376,6.1875,6.2376,386220,0
8,1962-01-12,6.2543,6.2962,6.2543,6.2543,529933,0
9,1962-01-15,6.2708,6.2962,6.2708,6.2792,305383,0


## Feature Engineering

* Historique des prix 
* la volatilité 
* la moyenne mobile
* le retour sur investissement

$$ R_t = \frac{P_t - P_{t-1}}{P_{t-1}} $$

$$ V_t(n) = std(R_{t-n},...,R_{t}) $$

In [13]:
# Retour sur investissement
R = [1,5,10,15]

for r in R:
    df['R_'+str(r)] = df['Open'].pct_change(r)

# Moyenne mobile
M = [5,10,15,20]

for m in M:
    df['MA_' + str(m)] = df['Open'].rolling(m).mean()

# Historique des prix
P = range(1,11)

for p in P:
    df['Prix_'+str(p)] = df['Open'].shift(p)
    
# La volatilité
V = [5,10,15]

for v in V:
    df['V_'+str(v)] = df['R_5'].rolling(v).std()

df['target'] = df['V_5'].shift(-5)
seuil = df['target'].median()
df['target'] = df['target'].apply(lambda x: 1 if x> seuil else 0)

df.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Volume,OpenInt,R_1,R_5,R_10,...,Prix_5,Prix_6,Prix_7,Prix_8,Prix_9,Prix_10,V_5,V_10,V_15,target
0,1962-01-02,6.413,6.413,6.3378,6.3378,467056,0,,,,...,,,,,,,,,,0
1,1962-01-03,6.3378,6.3963,6.3378,6.3963,350294,0,-0.011726,,,...,,,,,,,,,,0
2,1962-01-04,6.3963,6.3963,6.3295,6.3295,314365,0,0.00923,,,...,,,,,,,,,,0
3,1962-01-05,6.3211,6.3211,6.1958,6.2041,440112,0,-0.011757,,,...,,,,,,,,,,0
4,1962-01-08,6.2041,6.2041,6.0373,6.087,655676,0,-0.018509,,,...,,,,,,,,,,1
5,1962-01-09,6.1208,6.2376,6.1208,6.1621,592806,0,-0.013427,-0.045564,,...,6.413,,,,,,,,,1
6,1962-01-10,6.1707,6.2041,6.1707,6.1707,359274,0,0.008153,-0.026366,,...,6.3378,6.413,,,,,,,,1
7,1962-01-11,6.1875,6.2376,6.1875,6.2376,386220,0,0.002723,-0.032644,,...,6.3963,6.3378,6.413,,,,,,,0
8,1962-01-12,6.2543,6.2962,6.2543,6.2543,529933,0,0.010796,-0.010568,,...,6.3211,6.3963,6.3378,6.413,,,,,,1
9,1962-01-15,6.2708,6.2962,6.2708,6.2792,305383,0,0.002638,0.010751,,...,6.2041,6.3211,6.3963,6.3378,6.413,,0.021706,,,1
