In [1]:
#### Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Handle table-like data and matrices
import numpy as np
import pandas as pd

# Modelling Algorithms
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier , GradientBoostingClassifier
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
import lightgbm as lgb

# Modelling Helpers
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import Imputer , Normalizer , scale, StandardScaler
from sklearn.cross_validation import train_test_split , StratifiedKFold, cross_val_score
from sklearn.feature_selection import RFECV, SelectKBest, SelectFromModel
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report,precision_recall_curve,roc_curve,make_scorer
from sklearn.model_selection import KFold


# Deeplearning tools
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense,Activation,Dropout
from keras.utils import to_categorical

# Visualisation
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
import seaborn as sns

# Configure visualisations
%matplotlib inline
mpl.style.use( 'ggplot' )
sns.set_style( 'white' )
pylab.rcParams[ 'figure.figsize' ] = 8 , 6

# Others
import datetime
import os
import time

Using TensorFlow backend.


In [2]:
pwd

'/home/chenhao/proj/data_proj/stock-data-process'

In [3]:
train_original = pd.read_csv('../stock-data/data-1533016425769.csv')
train_original.head(10)

Unnamed: 0,id,symbol,frequency,begin_of_bar,end_of_bar,open_price,close_price,low_price,hi_price,volume,amount,is_peak,is_valley,y,gmt_created
0,212566,SHSE.600497,900s,2016-01-04 09:30:00,2016-01-04 09:45:00,5.403103,5.403103,5.348826,5.442578,2556000,27901144,0,0,0,2018-07-30 17:02:36.261914
1,212567,SHSE.600503,900s,2016-01-04 09:30:00,2016-01-04 09:45:00,13.777168,13.429009,13.369324,13.777168,11300800,154109520,0,0,0,2018-07-30 17:02:36.376703
2,212568,SHSE.601238,900s,2016-01-04 09:30:00,2016-01-04 09:45:00,15.434888,15.489597,15.434888,15.619532,215600,4893348,0,0,0,2018-07-30 17:02:36.460579
3,212569,SHSE.600570,900s,2016-01-04 09:30:00,2016-01-04 09:45:00,59.887718,57.990288,57.328163,60.00631,3464800,205674736,0,0,0,2018-07-30 17:02:36.622501
4,212570,SHSE.600737,900s,2016-01-04 09:30:00,2016-01-04 09:45:00,13.956138,13.888854,13.821573,13.956138,1953000,28227520,0,0,0,2018-07-30 17:02:36.766972
5,212571,SHSE.600466,900s,2016-01-04 09:30:00,2016-01-04 09:45:00,9.768019,9.589167,9.547894,9.768019,2468800,34581656,0,0,0,2018-07-30 17:02:36.869707
6,212572,SHSE.600482,900s,2016-01-04 09:30:00,2016-01-04 09:45:00,46.995907,46.112709,46.024387,47.437504,1765600,84095440,0,0,0,2018-07-30 17:02:36.992882
7,212573,SHSE.600276,900s,2016-01-04 09:30:00,2016-01-04 09:45:00,26.07629,25.720463,25.587692,26.07629,935600,45382620,0,0,0,2018-07-30 17:02:37.11213
8,212574,SHSE.600827,900s,2016-01-04 09:30:00,2016-01-04 09:45:00,17.433502,17.268576,17.181263,17.433502,1189200,21200996,0,0,0,2018-07-30 17:02:37.265169
9,212575,SHSE.600016,900s,2016-01-04 09:30:00,2016-01-04 09:45:00,7.23641,7.198244,7.182976,7.28221,16974100,160790736,0,0,0,2018-07-30 17:02:37.355675


In [5]:
train_original.describe()

Unnamed: 0,id,open_price,close_price,low_price,hi_price,volume,amount,is_peak,is_valley,y
count,1652926.0,1652926.0,1652926.0,1652926.0,1652926.0,1652926.0,1652926.0,1652926.0,1652926.0,1652926.0
mean,1039028.0,18.02252,18.02389,17.9643,18.08221,2638859.0,30661760.0,0.01493533,0.02673441,-0.002010374
std,477158.8,38.81074,38.81405,38.70823,38.91727,5542340.0,59160630.0,0.1212942,0.1613062,0.1040415
min,212566.0,1.455166,1.455166,1.455166,1.465132,1.0,9.0,0.0,0.0,-1.0
25%,625797.2,6.982121,6.983147,6.960654,7.004458,516190.0,7291982.0,0.0,0.0,0.0
50%,1039028.0,11.67235,11.67242,11.63231,11.71537,1171300.0,15051770.0,0.0,0.0,0.0
75%,1452260.0,18.91984,18.92,18.85312,18.98446,2716151.0,32172680.0,0.0,0.0,0.0
max,1865491.0,792.11,791.9424,790.2761,792.258,1109784000.0,10013950000.0,1.0,1.0,1.0
