# Data for trading bots
### 2022/06/07, AJ Zerouali
### Updated: 2022/06/14

Notebook for data collection and cleaning. Mainly collect stock price data using pd_datareader and convert to CSVs.



In [1]:
import numpy as np
import pandas as pd
import pandas_datareader as reader
import matplotlib.pyplot as plt
import datetime as datetime

In [2]:
from sklearn.preprocessing import StandardScaler

In [2]:
%matplotlib inline

In [4]:
# A long list of tickers
lng_list_tickers = ['MMM','ABT','ABBV','ACN','ATVI','AYI','ADBE','AMD','AAP','AES','AET',
    'AMG','AFL','A','APD','AKAM','ALK','ALB','ARE','ALXN','ALGN','ALLE',
    'AGN','ADS','LNT','ALL','GOOGL','GOOG','MO','AMZN','AEE','AAL','AEP',
    'AXP','AIG','AMT','AWK','AMP','ABC','AME','AMGN','APH','APC','ADI','ANDV',
    'ANSS','ANTM','AON','AOS','APA','AIV','AAPL','AMAT','APTV','ADM','ARNC',
    'AJG','AIZ','T','ADSK','ADP','AZO','AVB','AVY','BHGE','BLL','BAC','BK',
    'BAX','BBT','BDX','BRK.B','BBY','BIIB','BLK','HRB','BA','BWA','BXP','BSX',
    'BHF','BMY','AVGO','BF.B','CHRW','CA','COG','CDNS','CPB','COF','CAH','CBOE',
    'KMX','CCL','CAT','CBG','CBS','CELG','CNC','CNP','CTL','CERN','CF','SCHW',
    'CHTR','CHK','CVX','CMG','CB','CHD','CI','XEC','CINF','CTAS','CSCO','C','CFG',
    'CTXS','CLX','CME','CMS','KO','CTSH','CL','CMCSA','CMA','CAG','CXO','COP',
    'ED','STZ','COO','GLW','COST','COTY','CCI','CSRA','CSX','CMI','CVS','DHI',
    'DHR','DRI','DVA','DE','DAL','XRAY','DVN','DLR','DFS','DISCA','DISCK','DISH',
    'DG','DLTR','D','DOV','DWDP','DPS','DTE','DRE','DUK','DXC','ETFC','EMN','ETN',
    'EBAY','ECL','EIX','EW','EA','EMR','ETR','EVHC','EOG','EQT','EFX','EQIX','EQR',
    'ESS','EL','ES','RE','EXC','EXPE','EXPD','ESRX','EXR','XOM','FFIV','FB','FAST',
    'FRT','FDX','FIS','FITB','FE','FISV','FLIR','FLS','FLR','FMC','FL','F','FTV',
    'FBHS','BEN','FCX','GPS','GRMN','IT','GD','GE','GGP','GIS','GM','GPC','GILD',
    'GPN','GS','GT','GWW','HAL','HBI','HOG','HRS','HIG','HAS','HCA','HCP','HP','HSIC',
    'HSY','HES','HPE','HLT','HOLX','HD','HON','HRL','HST','HPQ','HUM','HBAN','HII',
    'IDXX','INFO','ITW','ILMN','IR','INTC','ICE','IBM','INCY','IP','IPG','IFF','INTU',
    'ISRG','IVZ','IQV','IRM','JEC','JBHT','SJM','JNJ','JCI','JPM','JNPR','KSU','K','KEY',
    'KMB','KIM','KMI','KLAC','KSS','KHC','KR','LB','LLL','LH','LRCX','LEG','LEN','LUK',
    'LLY','LNC','LKQ','LMT','L','LOW','LYB','MTB','MAC','M','MRO','MPC','MAR','MMC','MLM',
    'MAS','MA','MAT','MKC','MCD','MCK','MDT','MRK','MET','MTD','MGM','KORS','MCHP','MU',
    'MSFT','MAA','MHK','TAP','MDLZ','MON','MNST','MCO','MS','MOS','MSI','MYL','NDAQ',
    'NOV','NAVI','NTAP','NFLX','NWL','NFX','NEM','NWSA','NWS','NEE','NLSN','NKE','NI',
    'NBL','JWN','NSC','NTRS','NOC','NCLH','NRG','NUE','NVDA','ORLY','OXY','OMC','OKE',
    'ORCL','PCAR','PKG','PH','PDCO','PAYX','PYPL','PNR','PBCT','PEP','PKI','PRGO','PFE',
    'PCG','PM','PSX','PNW','PXD','PNC','RL','PPG','PPL','PX','PCLN','PFG','PG','PGR',
    'PLD','PRU','PEG','PSA','PHM','PVH','QRVO','PWR','QCOM','DGX','RRC','RJF','RTN','O',
    'RHT','REG','REGN','RF','RSG','RMD','RHI','ROK','COL','ROP','ROST','RCL','CRM','SBAC',
    'SCG','SLB','SNI','STX','SEE','SRE','SHW','SIG','SPG','SWKS','SLG','SNA','SO','LUV',
    'SPGI','SWK','SBUX','STT','SRCL','SYK','STI','SYMC','SYF','SNPS','SYY','TROW','TPR',
    'TGT','TEL','FTI','TXN','TXT','TMO','TIF','TWX','TJX','TMK','TSS','TSCO','TDG','TRV',
    'TRIP','FOXA','FOX','TSN','UDR','ULTA','USB','UAA','UA','UNP','UAL','UNH','UPS','URI',
    'UTX','UHS','UNM','VFC','VLO','VAR','VTR','VRSN','VRSK','VZ','VRTX','VIAB','V','VNO',
    'VMC','WMT','WBA','DIS','WM','WAT','WEC','WFC','HCN','WDC','WU','WRK','WY','WHR','WMB',
    'WLTW','WYN','WYNN','XEL','XRX','XLNX','XL','XYL','YUM','ZBH','ZION','ZTS']
len(lng_list_tickers)

505

## 1) Trading bot -  Version 1

A simplified version with an environment of 3 stocks only. Our training data will be 10 years, starting 2010-01-01, and our test data will be 2020 and 2021.
Will start by creating a list of tickers:

In [5]:
list_0 = [ 'AAPL', 'MSFT', 'AMZN',
          'GOOG', 'TSLA', 'NVDA',
          'JNJ', 'JPM', 'CVX',
          'BAC', 'INTC', 'AMD'
         ]

In [6]:
len(list_0)

12

### 1.a - Training dataset

#### 1.a.1 - Building 10 year dataset

Now onto downloading the data. Here's the function that we'll use:

In [7]:
help(reader.data.DataReader)

Help on function DataReader in module pandas_datareader.data:

DataReader(name, data_source=None, start=None, end=None, retry_count=3, pause=0.1, session=None, api_key=None)
    Imports data from a number of online sources.
    
    Currently supports Google Finance, St. Louis FED (FRED),
    and Kenneth French's data library, among others.
    
    Parameters
    ----------
    name : str or list of strs
        the name of the dataset. Some data sources (IEX, fred) will
        accept a list of names.
    data_source: {str, None}
        the data source ("iex", "fred", "ff")
    start : string, int, date, datetime, Timestamp
        left boundary for range (defaults to 1/1/2010)
    end : string, int, date, datetime, Timestamp
        right boundary for range (defaults to today)
    retry_count : {int, 3}
        Number of times to retry query request.
    pause : {numeric, 0.001}
        Time, in seconds, to pause between consecutive queries of chunks. If
        single value given 

In [10]:
data_training_start = datetime.datetime(2010, 1, 1)
data_training_end = datetime.datetime(2019, 12, 31)

In [11]:
df_data_training = reader.data.DataReader(name = list_0, data_source = "yahoo", \
                                          start = data_training_start, end = data_training_end)

In [12]:
df_data_training.to_csv("Training_DataSet_01.csv")

In [13]:
df_data_training

Attributes,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Symbols,AAPL,MSFT,AMZN,GOOG,TSLA,NVDA,JNJ,JPM,CVX,BAC,...,AMZN,GOOG,TSLA,NVDA,JNJ,JPM,CVX,BAC,INTC,AMD
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2010-01-04,6.535086,23.800219,6.695000,312.204773,,4.245554,44.863743,31.292093,48.064461,13.329647,...,151998000.0,3927065.0,,80020400.0,9506200.0,35460500.0,10173800.0,180845200.0,47800900.0,18748700.0
2010-01-05,6.546385,23.807919,6.734500,310.829926,,4.307549,44.343533,31.898205,48.404949,13.762925,...,177038000.0,6031925.0,,72864800.0,10673100.0,41208300.0,10593700.0,209521300.0,52357700.0,22145700.0
2010-01-06,6.442255,23.661806,6.612500,302.994293,,4.335103,44.704216,32.073475,48.411011,13.924343,...,143576000.0,7987226.0,,64916800.0,13171500.0,27729000.0,11014600.0,205257900.0,40037400.0,18643400.0
2010-01-07,6.430344,23.415730,6.500000,295.940735,,4.250144,44.385155,32.708813,48.228630,14.383107,...,220604000.0,12876685.0,,54779200.0,10901600.0,44864700.0,9626900.0,320868400.0,54041500.0,26806800.0
2010-01-08,6.473097,23.577223,6.676000,299.885956,,4.259330,44.537754,32.628487,48.313728,14.255668,...,196610000.0,9484016.0,,47816800.0,7898000.0,33110100.0,5624300.0,220104700.0,48234700.0,13752800.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-24,69.835594,153.758926,89.460503,1343.560059,85.050003,59.494614,136.812378,127.787819,106.216789,33.296745,...,17626000.0,347500.0,40273500.0,13886400.0,4238900.0,3324300.0,1640100.0,13805400.0,6313200.0,44432200.0
2019-12-26,71.221161,155.019241,93.438499,1360.400024,86.188004,59.636734,136.718643,129.143875,106.446274,33.580360,...,120108000.0,667500.0,53169500.0,18285200.0,3019300.0,6770900.0,3648600.0,26992100.0,11480300.0,57562800.0
2019-12-27,71.194122,155.302612,93.489998,1351.890015,86.075996,59.058292,136.643661,129.236771,106.181480,33.419647,...,123732000.0,1038400.0,49728500.0,25464400.0,3432700.0,7868200.0,4138300.0,28111300.0,14085800.0,36581300.0
2019-12-30,71.616684,153.964096,92.344498,1336.140015,82.940002,57.923855,136.221786,128.763077,105.784286,33.230568,...,73494000.0,1050900.0,62932000.0,25805600.0,2923300.0,6963000.0,4580200.0,30705100.0,12750500.0,41149700.0


In [17]:
df_data_training.info

<bound method DataFrame.info of Attributes  Adj Close                                                 \
Symbols          AAPL        MSFT       AMZN         GOOG       TSLA   
Date                                                                   
2010-01-04   6.535086   23.800219   6.695000   312.204773        NaN   
2010-01-05   6.546385   23.807919   6.734500   310.829926        NaN   
2010-01-06   6.442255   23.661806   6.612500   302.994293        NaN   
2010-01-07   6.430344   23.415730   6.500000   295.940735        NaN   
2010-01-08   6.473097   23.577223   6.676000   299.885956        NaN   
...               ...         ...        ...          ...        ...   
2019-12-24  69.835594  153.758926  89.460503  1343.560059  85.050003   
2019-12-26  71.221161  155.019241  93.438499  1360.400024  86.188004   
2019-12-27  71.194122  155.302612  93.489998  1351.890015  86.075996   
2019-12-30  71.616684  153.964096  92.344498  1336.140015  82.940002   
2019-12-31  72.139954  154.07157

In [18]:
len(df_data_training)

2516

In [28]:
help(pd.DataFrame.attrs)

Help on property:

    Dictionary of global attributes of this dataset.
    
    
    
    See Also
    --------
    DataFrame.flags : Global flags applying to this object.



#### 1.a.2 - Reduction

Next we clean-up our data frame. We want to keep the adjusted close prices and convert the dates into numbers (We don't really need the dates once we have the start date and the end date). One way to do this is to simply append series.

In [57]:
series_list = []
for tckr in list_0:
    temp_data_array = df_data_training[('Adj Close', tckr)].to_numpy()
    temp_series = pd.Series(data = temp_data_array, name = tckr)
    series_list.append(temp_series)
df_data_training_adjclose = pd.concat(series_list, ignore_index=True, axis = 1)
df_data_training_adjclose.columns = list_0

In [58]:
df_data_training_adjclose

Unnamed: 0,AAPL,MSFT,AMZN,GOOG,TSLA,NVDA,JNJ,JPM,CVX,BAC,INTC,AMD
0,6.535086,23.800219,6.695000,312.204773,,4.245554,44.863743,31.292093,48.064461,13.329647,14.353070,9.700000
1,6.546385,23.807919,6.734500,310.829926,,4.307549,44.343533,31.898205,48.404949,13.762925,14.346198,9.710000
2,6.442255,23.661806,6.612500,302.994293,,4.335103,44.704216,32.073475,48.411011,13.924343,14.298076,9.570000
3,6.430344,23.415730,6.500000,295.940735,,4.250144,44.385155,32.708813,48.228630,14.383107,14.160592,9.470000
4,6.473097,23.577223,6.676000,299.885956,,4.259330,44.537754,32.628487,48.313728,14.255668,14.318704,9.430000
...,...,...,...,...,...,...,...,...,...,...,...,...
2511,69.835594,153.758926,89.460503,1343.560059,85.050003,59.494614,136.812378,127.787819,106.216789,33.296745,55.636517,46.540001
2512,71.221161,155.019241,93.438499,1360.400024,86.188004,59.636734,136.718643,129.143875,106.446274,33.580360,56.020473,46.630001
2513,71.194122,155.302612,93.489998,1351.890015,86.075996,59.058292,136.643661,129.236771,106.181480,33.419647,56.263962,46.180000
2514,71.616684,153.964096,92.344498,1336.140015,82.940002,57.923855,136.221786,128.763077,105.784286,33.230568,55.833183,45.520000


In [59]:
df_data_training_adjclose.to_csv("Training_DataSet_01_AdjClose.csv")

### Small subset

In [49]:
df_X = pd.read_csv("Training_DataSet_01_AdjClose.csv", usecols=["AAPL", "MSFT", "GOOG"])
df_X = df_X.iloc[0:20]

In [50]:
df_X

Unnamed: 0,AAPL,MSFT,GOOG
0,6.535086,23.800219,312.204773
1,6.546385,23.807919,310.829926
2,6.442255,23.661806,302.994293
3,6.430344,23.41573,295.940735
4,6.473097,23.577223,299.885956
5,6.415993,23.277309,299.432648
6,6.34301,23.123512,294.137512
7,6.432483,23.338833,292.448822
8,6.395229,23.807919,293.823669
9,6.288352,23.731016,288.917053


In [51]:
df_X.to_numpy()

array([[  6.53508568,  23.80021858, 312.20477295],
       [  6.54638481,  23.80791855, 310.82992554],
       [  6.44225502,  23.66180611, 302.99429321],
       [  6.4303441 ,  23.41572952, 295.94073486],
       [  6.47309685,  23.57722282, 299.88595581],
       [  6.41599274,  23.27730942, 299.43264771],
       [  6.34300995,  23.12351227, 294.13751221],
       [  6.43248272,  23.33883286, 292.44882202],
       [  6.39522886,  23.80791855, 293.82366943],
       [  6.28835249,  23.73101616, 288.91705322],
       [  6.56653786,  23.91557503, 292.71282959],
       [  6.46546221,  23.52338219, 289.12130737],
       [  6.35369825,  23.07737541, 290.40148926],
       [  6.03856421,  22.26993561, 273.97805786],
       [  6.20101738,  22.5467701 , 268.99176025],
       [  6.28865671,  22.68518257, 270.19723511],
       [  6.34789944,  22.81592178, 270.0378418 ],
       [  6.08558989,  22.42373657, 266.1473999 ],
       [  5.86481237,  21.67012405, 263.98052979],
       [  5.94634295,  21.84699

#### 1.a.3 - Standard scaler for dataset

Will build a specific standard scaler for a subset our working dataset "Training_DataSet_01_AdjClose.csv". Will serve as a first example. Will take only tickers "AAPL", "AMZN", and "JNJ".

**IMPORTANT:** 

1) Won't work, you actually need "states" here, including cash and no.'s of shares.

2) For future reference, you call *pickle* to save data structures created in Python. In the case of *sklearn* scalers, can find example code at: https://machinelearningmastery.com/how-to-save-and-load-models-and-data-preparation-in-scikit-learn-for-later-use/. You can save a "trained" scaler using *pickle.dump*, and load one using *pickle.load*.

In [4]:
df_X = pd.read_csv("Training_DataSet_01_AdjClose.csv", usecols = ["AAPL", "AMZN", "JNJ"])

In [5]:
df_X

Unnamed: 0,AAPL,AMZN,JNJ
0,6.535086,6.695000,44.863743
1,6.546385,6.734500,44.343533
2,6.442255,6.612500,44.704216
3,6.430344,6.500000,44.385155
4,6.473097,6.676000,44.537754
...,...,...,...
2511,69.835594,89.460503,136.812378
2512,71.221161,93.438499,136.718643
2513,71.194122,93.489998,136.643661
2514,71.616684,92.344498,136.221786


In [6]:
X = df_X.to_numpy()

In [7]:
X

array([[  6.53508568,   6.69500017,  44.86374283],
       [  6.54638481,   6.73449993,  44.34353256],
       [  6.44225502,   6.61250019,  44.704216  ],
       ...,
       [ 71.19412231,  93.48999786, 136.6436615 ],
       [ 71.61668396,  92.34449768, 136.2217865 ],
       [ 72.13995361,  92.39199829, 136.75614929]])

In [None]:
Ex01_scaler = Standardscaler()

### 1.b - Testing data set

#### 1.b.1 - Creating the CSV files

The training data spanned 10 years. Now we'll build the dataframe corresponding to the tickers in list_0 from 2020-01-01 to 2021-12-31.

In [3]:
list_0 = [ 'AAPL', 'MSFT', 'AMZN',
          'GOOG', 'TSLA', 'NVDA',
          'JNJ', 'JPM', 'CVX',
          'BAC', 'INTC', 'AMD'
         ]

In [4]:
data_testing_start = datetime.datetime(2020, 1, 1)
data_testing_end = datetime.datetime(2021, 12, 31)

Load dataframes from Yahoo:

* The full price histories;
* Adjusted close only.

First we download


In [7]:
# All features
df_data_testing = reader.data.DataReader(name = list_0, data_source = "yahoo", \
                                          start = data_testing_start, end = data_testing_end)

In [8]:
df_data_testing

Attributes,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Symbols,AAPL,MSFT,AMZN,GOOG,TSLA,NVDA,JNJ,JPM,CVX,BAC,...,AMZN,GOOG,TSLA,NVDA,JNJ,JPM,CVX,BAC,INTC,AMD
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2020-01-02,73.785919,156.924393,94.900497,1367.369995,86.052002,59.803608,136.849915,131.047989,107.178879,33.693813,...,80580000.0,1406600.0,47660500.0,23753600.0,5777000.0,10803700.0,5205000.0,37614200.0,18056000.0,80331100.0
2020-01-03,73.068573,154.970383,93.748497,1360.660034,88.601997,58.846390,135.265503,129.318619,106.808144,32.994217,...,75288000.0,1186400.0,88892500.0,20538400.0,5752400.0,10386800.0,6360900.0,50357900.0,15293900.0,73127400.0
2020-01-06,73.650795,155.370987,95.143997,1394.209961,90.307999,59.093166,135.096725,129.215790,106.446266,32.946945,...,81236000.0,1732300.0,50665000.0,26263600.0,7731300.0,10259000.0,9953000.0,42185000.0,17755200.0,47934900.0
2020-01-07,73.304413,153.954330,95.343002,1393.339966,93.811996,59.808594,135.921768,127.019043,105.087006,32.729504,...,80898000.0,1502700.0,89410500.0,31485600.0,7382900.0,10531300.0,7856900.0,34149000.0,21876100.0,58061400.0
2020-01-08,74.483612,156.406586,94.598503,1404.319946,98.428001,59.920769,135.903015,128.009918,103.886620,33.060402,...,70160000.0,1528000.0,155721500.0,27710800.0,6605800.0,9695300.0,7295900.0,45311600.0,23133500.0,53767000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-27,179.836319,340.949280,169.669495,2961.280029,1093.939941,309.331909,167.511383,156.060837,116.596573,44.168400,...,58688000.0,662800.0,23715300.0,40368600.0,3704800.0,6188100.0,7745500.0,33879200.0,20985400.0,53296400.0
2021-12-28,178.799164,339.754517,170.660995,2928.959961,1088.469971,303.104279,168.182755,156.534454,116.370819,44.227768,...,54638000.0,931200.0,20108000.0,42059100.0,3187000.0,6342300.0,6516000.0,32492400.0,19290400.0,58699100.0
2021-12-29,178.888916,340.451477,169.201004,2930.090088,1086.189941,299.895508,169.367462,156.455521,115.772079,44.158508,...,35754000.0,851100.0,18718000.0,34313900.0,3662000.0,6398900.0,6957400.0,25294900.0,14174800.0,51300200.0
2021-12-30,177.712143,337.833008,168.644501,2920.050049,1070.339966,295.747070,170.117798,156.376587,115.261681,44.059563,...,37584000.0,648900.0,15680300.0,30886400.0,4231800.0,6948200.0,6016000.0,23852200.0,18359400.0,44358000.0


In [9]:
df_data_testing.to_csv("Testing_DataSet_01.csv")

In [10]:
# Creating dataframe with adj closes only
series_list = []
for tckr in list_0:
    temp_data_array = df_data_testing[('Adj Close', tckr)].to_numpy()
    temp_series = pd.Series(data = temp_data_array, name = tckr)
    series_list.append(temp_series)
df_data_testing_adjclose = pd.concat(series_list, ignore_index=True, axis = 1)
df_data_testing_adjclose.columns = list_0

In [11]:
df_data_testing_adjclose

Unnamed: 0,AAPL,MSFT,AMZN,GOOG,TSLA,NVDA,JNJ,JPM,CVX,BAC,INTC,AMD
0,73.785919,156.924393,94.900497,1367.369995,86.052002,59.803608,136.849915,131.047989,107.178879,33.693813,56.975693,49.099998
1,73.068573,154.970383,93.748497,1360.660034,88.601997,58.846390,135.265503,129.318619,106.808144,32.994217,56.282696,48.599998
2,73.650795,155.370987,95.143997,1394.209961,90.307999,59.093166,135.096725,129.215790,106.446266,32.946945,56.123493,48.389999
3,73.304413,153.954330,95.343002,1393.339966,93.811996,59.808594,135.921768,127.019043,105.087006,32.729504,55.187008,48.250000
4,74.483612,156.406586,94.598503,1404.319946,98.428001,59.920769,135.903015,128.009918,103.886620,33.060402,55.224464,47.830002
...,...,...,...,...,...,...,...,...,...,...,...,...
500,179.836319,340.949280,169.669495,2961.280029,1093.939941,309.331909,167.511383,156.060837,116.596573,44.168400,51.143055,154.360001
501,178.799164,339.754517,170.660995,2928.959961,1088.469971,303.104279,168.182755,156.534454,116.370819,44.227768,50.965816,153.149994
502,178.888916,340.451477,169.201004,2930.090088,1086.189941,299.895508,169.367462,156.455521,115.772079,44.158508,51.034748,148.259995
503,177.712143,337.833008,168.644501,2920.050049,1070.339966,295.747070,170.117798,156.376587,115.261681,44.059563,50.946129,145.149994


In [12]:
df_data_testing_adjclose.to_csv("Testing_DataSet_01_AdjClose.csv")

Let's check if we get what we need:

In [13]:
df_X = pd.read_csv("Testing_DataSet_01_AdjClose.csv", usecols = ['AMZN', 'JPM', 'INTC'])

In [14]:
df_X

Unnamed: 0,AMZN,JPM,INTC
0,94.900497,131.047989,56.975693
1,93.748497,129.318619,56.282696
2,95.143997,129.215790,56.123493
3,95.343002,127.019043,55.187008
4,94.598503,128.009918,55.224464
...,...,...,...
500,169.669495,156.060837,51.143055
501,170.660995,156.534454,50.965816
502,169.201004,156.455521,51.034748
503,168.644501,156.376587,50.946129


In [15]:
X = df_X.to_numpy()

In [16]:
X

array([[ 94.90049744, 131.04798889,  56.97569275],
       [ 93.74849701, 129.31861877,  56.28269577],
       [ 95.14399719, 129.21578979,  56.12349319],
       ...,
       [169.20100403, 156.45552063,  51.03474808],
       [168.64450073, 156.37658691,  50.94612885],
       [166.71699524, 156.24832153,  50.70980835]])

In [18]:
X.shape

(505, 3)