# Loading Data and Visualizations
## Goals: 
##       -get data
##       -inspect data
##       -The .pipe method

## Loading Data

In [1]:
from matplotlib import dates # to load dates library from matplotlib
import matplotlib.pyplot as plt # to import general matplotlib plotting library
import numpy as np # numPy
import pandas as pd # pandas
import yfinance as yf # Yfinance

In [3]:
raw = yf.download('SPY AAPL', start = '2010-01-01', end = '2019-12-31')

[*********************100%***********************]  2 of 2 completed


In [4]:
# This gives us a pandas dataframe

raw

Unnamed: 0_level_0,Adj Close,Adj Close,Close,Close,High,High,Low,Low,Open,Open,Volume,Volume
Unnamed: 0_level_1,AAPL,SPY,AAPL,SPY,AAPL,SPY,AAPL,SPY,AAPL,SPY,AAPL,SPY
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2010-01-04,6.505280,88.117912,7.643214,113.330002,7.660714,113.389999,7.585000,111.510002,7.622500,112.370003,493729600,118944600
2010-01-05,6.516527,88.351135,7.656429,113.629997,7.699643,113.680000,7.616071,112.849998,7.664286,113.260002,601904800,111579900
2010-01-06,6.412872,88.413345,7.534643,113.709999,7.686786,113.989998,7.526786,113.430000,7.656429,113.519997,552160000,116074400
2010-01-07,6.401018,88.786552,7.520714,114.190002,7.571429,114.330002,7.466071,113.180000,7.562500,113.500000,477131200,131091100
2010-01-08,6.443573,89.082039,7.570714,114.570000,7.571429,114.620003,7.466429,113.660004,7.510714,113.889999,447610800,126402800
...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-23,69.451057,305.051178,71.000000,321.220001,71.062500,321.649994,70.092499,321.059998,70.132500,321.589996,98572000,52990000
2019-12-24,69.517090,305.060699,71.067497,321.230011,71.222504,321.519989,70.730003,320.899994,71.172501,321.470001,48478800,20270000
2019-12-26,70.896332,306.684631,72.477501,322.940002,72.495003,322.950012,71.175003,321.640015,71.205002,321.649994,93121200,30911200
2019-12-27,70.869423,306.608612,72.449997,322.859985,73.492500,323.799988,72.029999,322.279999,72.779999,323.739990,146266000,42528800


In [5]:
raw.columns

MultiIndex([('Adj Close', 'AAPL'),
            ('Adj Close',  'SPY'),
            (    'Close', 'AAPL'),
            (    'Close',  'SPY'),
            (     'High', 'AAPL'),
            (     'High',  'SPY'),
            (      'Low', 'AAPL'),
            (      'Low',  'SPY'),
            (     'Open', 'AAPL'),
            (     'Open',  'SPY'),
            (   'Volume', 'AAPL'),
            (   'Volume',  'SPY')],
           )

In [6]:
# going to use the .pipe method a lot
# if you put a question mark after afunction in jupyter, it will pull up documentation
raw.pipe?

[1;31mSignature:[0m
[0mraw[0m[1;33m.[0m[0mpipe[0m[1;33m([0m[1;33m
[0m    [0mfunc[0m[1;33m:[0m [1;34m'Callable[..., T] | tuple[Callable[..., T], str]'[0m[1;33m,[0m[1;33m
[0m    [1;33m*[0m[0margs[0m[1;33m,[0m[1;33m
[0m    [1;33m**[0m[0mkwargs[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m [1;33m->[0m [1;34m'T'[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Apply chainable functions that expect Series or DataFrames.

Parameters
----------
func : function
    Function to apply to the Series/DataFrame.
    ``args``, and ``kwargs`` are passed into ``func``.
    Alternatively a ``(callable, data_keyword)`` tuple where
    ``data_keyword`` is a string indicating the keyword of
    ``callable`` that expects the Series/DataFrame.
args : iterable, optional
    Positional arguments passed into ``func``.
kwargs : mapping, optional
    A dictionary of keyword arguments passed into ``func``.

Returns
-------
object : the return type of ``func``.

See Also
--------
Da

In [7]:
# here, we defined a function fixed_cols to reset the columns to get only AAPL info

def fix_cols(df):
    columns = df.columns
    outer = [col[0] for col in columns] # using a list comprehension to extract the first entry for each tuple in 
                                        # the multi-index from .columns ^above^
    df.columns = outer
    return df

(raw
 .iloc[:, ::2]
 .pipe(fix_cols)
)

# at this point we have pulled out all of the apple values of our data
# used pipe because there is not a way to remove and replace columns using pandas, so we 
# defined our own function to do that


Unnamed: 0_level_0,Adj Close,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,6.505280,7.643214,7.660714,7.585000,7.622500,493729600
2010-01-05,6.516527,7.656429,7.699643,7.616071,7.664286,601904800
2010-01-06,6.412872,7.534643,7.686786,7.526786,7.656429,552160000
2010-01-07,6.401018,7.520714,7.571429,7.466071,7.562500,477131200
2010-01-08,6.443573,7.570714,7.571429,7.466429,7.510714,447610800
...,...,...,...,...,...,...
2019-12-23,69.451057,71.000000,71.062500,70.092499,70.132500,98572000
2019-12-24,69.517090,71.067497,71.222504,70.730003,71.172501,48478800
2019-12-26,70.896332,72.477501,72.495003,71.175003,71.205002,93121200
2019-12-27,70.869423,72.449997,73.492500,72.029999,72.779999,146266000


In [10]:
import yfinance as yf
def fix_cols(df):
    columns = df.columns
    outer = [col[0] for col in columns] 
    df.columns = outer
    return df

def tweak_data():
    raw = yf.download('SPY AAPL', start = '2010-01-01', end = '2019-12-31')
                    
    return(raw
     .iloc[:, ::2]
     .pipe(fix_cols)
)
                      
tweak_data()



[*********************100%***********************]  2 of 2 completed


Unnamed: 0_level_0,Adj Close,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,6.505280,7.643214,7.660714,7.585000,7.622500,493729600
2010-01-05,6.516528,7.656429,7.699643,7.616071,7.664286,601904800
2010-01-06,6.412872,7.534643,7.686786,7.526786,7.656429,552160000
2010-01-07,6.401017,7.520714,7.571429,7.466071,7.562500,477131200
2010-01-08,6.443573,7.570714,7.571429,7.466429,7.510714,447610800
...,...,...,...,...,...,...
2019-12-23,69.451050,71.000000,71.062500,70.092499,70.132500,98572000
2019-12-24,69.517075,71.067497,71.222504,70.730003,71.172501,48478800
2019-12-26,70.896339,72.477501,72.495003,71.175003,71.205002,93121200
2019-12-27,70.869431,72.449997,73.492500,72.029999,72.779999,146266000
