In [1]:
%run _common.ipynb

Cuda available: True


In [2]:
from src import model_data as md

In [3]:
__file__ = Path("./create_training_data.ipynb")

In [4]:
__name__ = "create_training_data"

In [5]:
print(md.SingleTickerPipeline.__init__.__doc__)


        Parameters
        ----------
        target: str
            target can be "price", "return" or "log_return"
        target_type: str
            target type can be "single" for single point-in-time prediction 
            or "sequence" for sequence prediction (predicts a sequence of target shifted one day into the future)
            if single, output y shape is (N, 1)
            if sequence, output y shape is (N, model_seq_len, 1)
        model_seq_len: int
            model sequence length specifies the sequence length of each input sample. 
            E.g. 30 means using the past 30 days's historical data to predict the next day
        max_overlap: int
            maximum number of overlapping days between two sequences. Will be capped at model_seq_len - 1
            if it is larger than model_seq_len
        train_periods: list(tuple(str, str))
            training periods is a list of tuples, each tuple has a start date and an end date. 
            Data from all tr

In [6]:
single_ticker_pipeline = md.SingleTickerPipeline(
    target="price",
    target_type="single",
    model_seq_len=30,
    max_overlap=20,
    train_periods=[
        ("2000-01-01", "2006-12-31"),
        ("2009-01-01", "2018-12-31"),
    ],
    test_periods=[
        ("2007-01-01", "2008-12-31"),
        ("2019-01-01", "2021-04-01"),
    ],
    normalization_method="log",
#     lookback_period=200,
    cross_validation_folds=5,)

In [7]:
single_ticker_pipeline.prepare_data("TEAM")

INFO:src.model_data:Reading data from /home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/data/feature_selected/TEAM.csv...
INFO:src.model_data:Making training arrays...
INFO:numexpr.utils:Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.
INFO:src.model_data:  Training has 73 sequences of length 30.
INFO:src.model_data:Making 5 validation folds...
INFO:src.model_data:  Generating folds with fold_size=11 and distance between train and validation being 3
INFO:src.model_data:    Fold 0 shapes:
INFO:src.model_data:      x: (11, 30, 53), y: (11, 1)
INFO:src.model_data:      x: (11, 30, 53), y: (11, 1)
INFO:src.model_data:    Fold 1 shapes:
INFO:src.model_data:      x: (22, 30, 53), y: (22, 1)
INFO:src.model_data:      x: (11, 30, 53), y: (11, 1)
INFO:src.model_data:    Fold 2 shapes:
INFO:src.model_data:      x: (33, 30, 53), y: (33, 1)
INFO:src.model_data:      x: (11, 30, 53), 

In [8]:
# If loading existing data:
single_ticker_pipeline.load_data("TEAM")

INFO:src.model_data:Loading generated data from /home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/data/model_data/TEAM...
INFO:src.model_data:  Loading train folds...
INFO:src.model_data:  Loading test arrays...


In [11]:
single_ticker_pipeline.print_train_shapes()

{
    "0": {
        "train": {
            "x": "(11, 30, 53)",
            "y": "(11, 1)",
            "prediction_date": "(11,)"
        },
        "valid": {
            "x": "(11, 30, 53)",
            "y": "(11, 1)",
            "prediction_date": "(11,)"
        }
    },
    "1": {
        "train": {
            "x": "(22, 30, 53)",
            "y": "(22, 1)",
            "prediction_date": "(22,)"
        },
        "valid": {
            "x": "(11, 30, 53)",
            "y": "(11, 1)",
            "prediction_date": "(11,)"
        }
    },
    "2": {
        "train": {
            "x": "(33, 30, 53)",
            "y": "(33, 1)",
            "prediction_date": "(33,)"
        },
        "valid": {
            "x": "(11, 30, 53)",
            "y": "(11, 1)",
            "prediction_date": "(11,)"
        }
    },
    "3": {
        "train": {
            "x": "(44, 30, 53)",
            "y": "(44, 1)",
            "prediction_date": "(44,)"
        },
        "valid": {
       

In [12]:
single_ticker_pipeline.print_test_shapes()

{
    "N": 537,
    "prediction_date": "(537,)",
    "x": "(537, 30, 53)",
    "y": "(537, 1)"
}


In [9]:
pipeline = md.MultiTickerPipeline(
    train_periods=[
        ("2012-01-01", "2019-12-31"),
    ],
    test_periods=[
        ("2020-01-01", "2021-04-01"),
    ],
)

In [131]:
pipeline.prepare_data(['_all_'])

INFO:create_training_data:Reading data from /home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/data/feature_selected/TEAM.csv...
INFO:create_training_data:Reading data from /home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/data/feature_selected/CDNS.csv...
INFO:create_training_data:Reading data from /home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/data/feature_selected/CTAS.csv...
INFO:create_training_data:Reading data from /home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/data/feature_selected/SGEN.csv...
INFO:create_training_data:Reading data from /home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/data/feature_selected/FISV.csv...
INFO:create_training_data:Reading data from /home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/data/feature_selected/CERN.csv...
INFO:create_training_data:Reading data from /home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/data/feature_selected/CMCS

In [132]:
pipeline.print_test_shapes()

{
    "N": 27360,
    "prediction_date": "(27360, 1)",
    "ticker": "(27360,)",
    "x": "(27360, 30, 53)",
    "y": "(27360, 30, 1)"
}


In [133]:
pipeline.print_train_shapes()

{
    "0": {
        "train": {
            "x": "(2734, 2, 30, 53)",
            "y": "(2734, 2, 30, 1)",
            "prediction_date": "(2734, 2, 1)",
            "ticker": "(2734, 2)"
        },
        "valid": {
            "x": "(2803, 2, 30, 53)",
            "y": "(2803, 2, 30, 1)",
            "prediction_date": "(2803, 2, 1)",
            "ticker": "(2803, 2)"
        }
    },
    "1": {
        "train": {
            "x": "(5450, 2, 30, 53)",
            "y": "(5450, 2, 30, 1)",
            "prediction_date": "(5450, 2, 1)",
            "ticker": "(5450, 2)"
        },
        "valid": {
            "x": "(2829, 2, 30, 53)",
            "y": "(2829, 2, 30, 1)",
            "prediction_date": "(2829, 2, 1)",
            "ticker": "(2829, 2)"
        }
    },
    "2": {
        "train": {
            "x": "(8187, 2, 30, 53)",
            "y": "(8187, 2, 30, 1)",
            "prediction_date": "(8187, 2, 1)",
            "ticker": "(8187, 2)"
        },
        "valid": {
    

### Train dictionary structure

In [9]:
single_ticker_pipeline.print_train_shapes()

{
    "0": {
        "train": {
            "x": "(11, 30, 53)",
            "y": "(11, 1)",
            "target_date": "(11, 1)"
        },
        "valid": {
            "x": "(11, 30, 53)",
            "y": "(11, 1)",
            "target_date": "(11, 1)"
        }
    },
    "1": {
        "train": {
            "x": "(22, 30, 53)",
            "y": "(22, 1)",
            "target_date": "(22, 1)"
        },
        "valid": {
            "x": "(11, 30, 53)",
            "y": "(11, 1)",
            "target_date": "(11, 1)"
        }
    },
    "2": {
        "train": {
            "x": "(33, 30, 53)",
            "y": "(33, 1)",
            "target_date": "(33, 1)"
        },
        "valid": {
            "x": "(11, 30, 53)",
            "y": "(11, 1)",
            "target_date": "(11, 1)"
        }
    },
    "3": {
        "train": {
            "x": "(44, 30, 53)",
            "y": "(44, 1)",
            "target_date": "(44, 1)"
        },
        "valid": {
            "x": "(11

### Test dictionary structure

In [15]:
single_ticker_pipeline.print_test_shapes()

{
    "N": 537,
    "target_date": "(537, 1)",
    "x": "(537, 30, 53)",
    "y": "(537, 1)"
}


# Drafts

In [5]:
data_path = Path(__file__).absolute().parent.parent.joinpath("data/feature_selected")

In [6]:
data_path

PosixPath('/home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/data/feature_selected')

In [7]:
data_files = list(data_path.glob("*.csv"))

## Config

In [8]:
# target can be "price", "return" or "log_return"
target = "price"

# target type can be single prediction or sequence prediction
target_type = "sequence"

# model sequence length specifies the sequence length of each input sample. E.g. 30 means using the past 30 days's historical data to predict the next day
model_seq_len = 30

# maximum number of overlapping days of historical data between two records
max_overlap = 20

# training periods is a list of tuples, each tuple has a start date and an end date. Data from all training periods are put together
# Note that training periods will be further divided into training and valiation, or time series cross validation
train_periods = [
    ("2000-01-01", "2006-12-31"),
    ("2010-01-01", "2018-12-31")
]

# testing periods - similar to training periods
test_periods = [
    ("2007-01-01", "2009-12-31"),
    ("2019-01-01", "2021-04-01")
]

cross_validation_folds = 5

## Single Ticker

In [9]:
import numpy as np

In [10]:
x = np.random.randint(0, 10, size=(10, 1))

In [11]:
# copy of model_data.py to modify

In [6]:
pipeline = md.MultiTickerPipeline(
    target="price",
    target_type="sequence",
    model_seq_len=30,
    max_overlap=20,
    train_periods=[
        ("2012-01-01", "2019-12-31"),
    ],
    test_periods=[
        ("2020-01-01", "2021-04-01"),
    ],
    normalization_method="log",
    cross_validation_folds=5
)

In [6]:
pipeline.prepare_data(['_all_'])

INFO:src.model_data:Reading data from /home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/data/feature_selected/TEAM.csv...
INFO:src.model_data:Reading data from /home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/data/feature_selected/CDNS.csv...
INFO:src.model_data:Reading data from /home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/data/feature_selected/CTAS.csv...
INFO:src.model_data:Reading data from /home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/data/feature_selected/SGEN.csv...
INFO:src.model_data:Reading data from /home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/data/feature_selected/FISV.csv...
INFO:src.model_data:Reading data from /home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/data/feature_selected/CERN.csv...
INFO:src.model_data:Reading data from /home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/data/feature_selected/CMCSA.csv...
INFO:src.model_data:Reading data 

Traceback (most recent call last):
  File "/home/rluo/anaconda3/envs/trade/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3437, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-5dfb3cae15cf>", line 1, in <module>
    pipeline.prepare_data(['_all_'])
  File "/home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/src/model_data.py", line 493, in prepare_data
    self.create_arrays()
  File "/home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/src/model_data.py", line 397, in create_arrays
    test_xy_arrays[ticker] = single_pipeline.get_xy_arr(test_dfs, seq_dist=1)
  File "/home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/src/model_data.py", line 126, in get_xy_arr
    feature_subdf = df[self._feature_cols].iloc[(N - (i * seq_dist + self.model_seq_len)):(N - i * seq_dist)]
  File "/home/rluo/anaconda3/envs/trade/lib/python3.8/site-packages/pandas/core/frame.py", line 2806, in __getitem_

TypeError: object of type 'NoneType' has no len()

In [21]:
train_dates = np.array(
    sorted(list(set(pipeline._train_xy_arrays['prediction_date'].reshape(-1))))
)

In [25]:
train_val_distance = int(np.ceil(pipeline.model_seq_len / pipeline._seq_dist))
fold_size = (len(train_dates) - train_val_distance) // (pipeline.cross_validation_folds + 1)
i = 2

In [26]:
train_end_dt = train_dates[fold_size * (i + 1)]

In [32]:
pipeline._train_xy_arrays["prediction_date"].shape

(17359, 1)

In [27]:
train_inds = np.argwhere(
    pipeline._train_xy_arrays["prediction_date"] <= train_end_dt
)

In [30]:
pipeline._train_xy_arrays["prediction_date"] <= train_end_dt

array([[ True],
       [ True],
       [ True],
       ...,
       [False],
       [False],
       [False]])

In [31]:
train_inds

array([[    0,     0],
       [    1,     0],
       [    2,     0],
       ...,
       [17241,     0],
       [17242,     0],
       [17243,     0]])

In [20]:
pipeline._train_xy_arrays["x"][train_inds]

(17359, 1)

In [15]:
pipeline.print_train_shapes()

{
    "0": {
        "train": {
            "x": "(2734, 2, 30, 53)",
            "y": "(2734, 2, 30, 1)",
            "prediction_date": "(2734, 2, 1)",
            "ticker": "(2734, 2)"
        },
        "valid": {
            "x": "(2803, 2, 30, 53)",
            "y": "(2803, 2, 30, 1)",
            "prediction_date": "(2803, 2, 1)",
            "ticker": "(2803, 2)"
        }
    },
    "1": {
        "train": {
            "x": "(5450, 2, 30, 53)",
            "y": "(5450, 2, 30, 1)",
            "prediction_date": "(5450, 2, 1)",
            "ticker": "(5450, 2)"
        },
        "valid": {
            "x": "(2829, 2, 30, 53)",
            "y": "(2829, 2, 30, 1)",
            "prediction_date": "(2829, 2, 1)",
            "ticker": "(2829, 2)"
        }
    },
    "2": {
        "train": {
            "x": "(8187, 2, 30, 53)",
            "y": "(8187, 2, 30, 1)",
            "prediction_date": "(8187, 2, 1)",
            "ticker": "(8187, 2)"
        },
        "valid": {
    

In [None]:
pipeline._train_out[4]['train']

In [51]:
pipeline.load_data("TEAM")

INFO:__main__:Loading generated data from /home/rluo/raid/classes/gatech/cs7643/GATech-CS7643-Project-Group/data/model_data/TEAM...
INFO:__main__:  Loading train folds...
INFO:__main__:  Loading test arrays...


In [12]:
train_dfs = get_period_data(pipeline._df, pipeline.train_periods)

In [None]:
means = 

In [15]:
train_concat = pd.concat(train_dfs, axis=0)

In [48]:
np.sign(train_concat[pipeline._feature_cols])

Unnamed: 0,adj_close,ev,marketcap,pb,pe,evebit,retearn,accoci,ps,shareswa,...,revenueusd,revenue,divyield,sgna,cor,receivables,gp,taxliabilities,invcap,currentratio
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,...,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,...,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,...,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,...,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,...,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
764,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,0.0,1.0,1.0,...,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
765,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,0.0,1.0,1.0,...,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
766,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,0.0,1.0,1.0,...,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
767,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,0.0,1.0,1.0,...,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [35]:
train_describe = train_concat.describe().T

In [28]:
stats = train_concat[pipeline._feature_cols].agg(['mean', 'std'])

In [32]:
(train_concat[pipeline._feature_cols] - stats.loc['mean', :]) / stats.loc['std', :]

Unnamed: 0,adj_close,ev,marketcap,pb,pe,evebit,retearn,accoci,ps,shareswa,...,revenueusd,revenue,divyield,sgna,cor,receivables,gp,taxliabilities,invcap,currentratio
0,-0.771824,-0.782292,-0.799935,2.209687,1.020279,1.992171,1.178283,,0.294485,-2.947541,...,-1.689038,-1.689038,,-1.737656,-1.433170,-0.727768,-1.782124,-1.273314,-1.749773,1.509293
1,-0.785770,-0.794417,-0.811850,2.178333,1.010464,1.971751,1.178283,,0.232390,-2.947541,...,-1.689038,-1.689038,,-1.737656,-1.433170,-0.727768,-1.782124,-1.273314,-1.749773,1.509293
2,-0.847530,-0.848147,-0.864652,1.990209,0.966577,1.881138,1.178283,,0.015059,-2.947541,...,-1.689038,-1.689038,,-1.737656,-1.433170,-0.727768,-1.782124,-1.273314,-1.749773,1.509293
3,-0.835576,-0.837746,-0.854430,2.021563,0.974990,1.898686,1.178283,,0.046107,-2.947541,...,-1.689038,-1.689038,,-1.737656,-1.433170,-0.727768,-1.782124,-1.273314,-1.749773,1.509293
4,-0.833584,-0.836022,-0.852737,2.021563,0.976392,1.901717,1.178283,,0.077154,-2.947541,...,-1.689038,-1.689038,,-1.737656,-1.433170,-0.727768,-1.782124,-1.273314,-1.749773,1.509293
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
764,1.762335,1.770585,1.807062,2.084271,-0.036935,-0.070557,-0.830421,,1.287999,0.518039,...,0.872251,0.872251,,0.911815,0.912132,0.802412,0.844046,0.722499,-0.429552,-0.847556
765,2.094047,2.097117,2.127951,2.429166,-0.043244,-0.079810,-0.830421,,1.815803,0.518039,...,0.872251,0.872251,,0.911815,0.912132,0.802412,0.844046,0.722499,-0.429552,-0.847556
766,2.206112,2.207421,2.236349,2.554582,-0.045488,-0.083000,-0.830421,,1.971040,0.518039,...,0.872251,0.872251,,0.911815,0.912132,0.802412,0.844046,0.722499,-0.429552,-0.847556
767,2.207606,2.208895,2.237798,2.554582,-0.045488,-0.083160,-0.830421,,1.971040,0.518039,...,0.872251,0.872251,,0.911815,0.912132,0.802412,0.844046,0.722499,-0.429552,-0.847556


In [27]:
stats[stats['std'] == 0]

Unnamed: 0,mean,std
accoci,0.0,0.0
ncfdiv,0.0,0.0
investmentsnc,0.0,0.0
dps,0.0,0.0
debtc,0.0,0.0
inventory,0.0,0.0
divyield,0.0,0.0


In [254]:
pipeline._df.columns

Index(['target', 'adj_close', 'ev', 'marketcap', 'pb', 'pe', 'evebit',
       'retearn', 'accoci', 'ps', 'shareswa', 'de', 'taxassets', 'ncfdiv',
       'shareswadil', 'sharesbas', 'debt', 'ps1', 'evebitda', 'bvps',
       'ppnenet', 'investmentsnc', 'equity', 'sps', 'rnd', 'debtusd',
       'equityusd', 'payables', 'assets', 'liabilities', 'assetsnc', 'depamor',
       'tangibles', 'debtnc', 'dps', 'liabilitiesnc', 'debtc', 'tbvps',
       'intangibles', 'opex', 'sbcomp', 'grossmargin', 'inventory',
       'revenueusd', 'revenue', 'divyield', 'sgna', 'cor', 'receivables', 'gp',
       'taxliabilities', 'invcap', 'currentratio', 'date'],
      dtype='object')

In [255]:
df = pipeline._df.copy()

In [257]:
train_dfs = get_period_data(pipeline._df, pipeline.train_periods)
train_xy_arrs = pipeline.get_xy_arr(train_dfs)

In [279]:
train_arrays = {"x": [], "y": [], "N": 0}
for train_df in train_dfs:
    N = train_df.shape[0]
    step = max_overlap
    if N >= model_seq_len:
        for i in range((N - model_seq_len) // (model_seq_len - max_overlap)):
            train_arrays["x"].append(train_df[pipeline._feature_cols].iloc[(N - (i * (model_seq_len - max_overlap) + model_seq_len)):(N - i * (model_seq_len - max_overlap))].values)
            train_arrays["y"].append([train_df["target"].iloc[(N - i * (model_seq_len - max_overlap)) - 1]])
            train_arrays["N"] += 1
train_arrays["x"] = np.array(train_arrays['x'][::-1])
train_arrays["y"] = np.array(train_arrays['y'][::-1])

In [280]:
train_val_distance = int(np.ceil(model_seq_len / (model_seq_len - max_overlap)))
fold_size = (train_arrays["N"] - train_val_distance) // cross_validation_folds

In [281]:
fold_size

14

In [282]:
folds = {}

In [283]:
for i in range(cross_validation_folds):
    train_end_ind = fold_size * (i + 1)
    val_begin_ind = fold_size * (i + 1) + train_val_distance
    val_end_ind = val_begin_ind + fold_size
    fold_arrs = {
        "train":{
            "x": train_arrays["x"][:train_end_ind],
            "y": train_arrays["y"][:train_end_ind],
        },
        "valid":{
            "x": train_arrays["x"][val_begin_ind:val_end_ind],
            "y": train_arrays["y"][val_begin_ind:val_end_ind],
        },
    }
    folds[i] = fold_arrs

In [14]:
date_diff = pd.to_datetime(df['date']) - pd.to_datetime(df['date'].shift(-1))

In [22]:
df.loc[1315:1323]

Unnamed: 0,date,adj_close,ev,marketcap,pb,pe,evebit,retearn,accoci,ps,...,revenueusd,revenue,divyield,sgna,cor,receivables,gp,taxliabilities,invcap,currentratio
1315,2016-01-25,23.67,4731.0,4939.3,22.9,597.0,1038.0,25049000.0,0.0,14.0,...,109706000.0,109706000.0,0.0,40020000.0,18473000.0,18273000.0,91233000.0,5584000.0,130095000.0,3.741
1316,2016-01-22,24.61,4927.2,5135.5,23.8,620.7,1081.0,25049000.0,0.0,14.5,...,109706000.0,109706000.0,0.0,40020000.0,18473000.0,18273000.0,91233000.0,5584000.0,130095000.0,3.741
1317,2016-01-21,24.24,4850.0,5058.3,23.4,611.3,1064.1,25049000.0,0.0,14.3,...,109706000.0,109706000.0,0.0,40020000.0,18473000.0,18273000.0,91233000.0,5584000.0,130095000.0,3.741
1318,2016-01-20,22.63,4514.0,4722.3,21.9,570.7,990.3,25049000.0,0.0,13.4,...,109706000.0,109706000.0,0.0,40020000.0,18473000.0,18273000.0,91233000.0,5584000.0,130095000.0,3.741
1319,2016-01-19,24.02,4804.1,5012.4,23.2,605.8,1054.0,25049000.0,0.0,14.2,...,109706000.0,109706000.0,0.0,40020000.0,18473000.0,18273000.0,91233000.0,5584000.0,130095000.0,3.741
1320,2016-01-15,25.19,5048.2,5256.5,24.3,635.3,1107.5,25049000.0,0.0,14.9,...,109706000.0,109706000.0,0.0,40020000.0,18473000.0,18273000.0,91233000.0,5584000.0,130095000.0,3.741
1321,2016-01-14,26.84,5392.5,5600.8,25.9,676.9,1183.1,25049000.0,0.0,15.8,...,109706000.0,109706000.0,0.0,40020000.0,18473000.0,18273000.0,91233000.0,5584000.0,130095000.0,3.741
1322,2016-01-13,26.73,5369.6,5577.9,25.8,674.1,1178.1,25049000.0,0.0,15.8,...,109706000.0,109706000.0,0.0,40020000.0,18473000.0,18273000.0,91233000.0,5584000.0,130095000.0,3.741
1323,2016-01-12,26.82,5388.4,5596.7,25.9,676.4,1182.2,25049000.0,0.0,15.8,...,109706000.0,109706000.0,0.0,40020000.0,18473000.0,18273000.0,91233000.0,5584000.0,130095000.0,3.741


In [8]:
df['ps1']

0       31.945
1       31.945
2       31.945
3       31.945
4       31.945
         ...  
1340    11.881
1341    11.881
1342    11.881
1343    11.881
1344    11.881
Name: ps1, Length: 1345, dtype: float64