# Getting Financial Data - Google Finance

### Introduction:

This time you will get data from a website.


### Step 1. Import the necessary libraries

In [31]:
import pandas as pd

# package to extract data from various Internet sources into a DataFrame
# make sure you have it installed
from pandas_datareader import data, wb
import pandas_datareader.data as web  # needed to do some debugging here
# package for dates
import datetime as dt

### Step 2. Create your time range (start and end variables). The start date should be 01/01/2015 and the end should today (whatever your today is)

In [4]:
start = dt.datetime(2015, 1, 1)

end = dt.datetime.today()

# Docstring:
# datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]])

# The year, month and day arguments are required. tzinfo may be None, or an
# instance of a tzinfo subclass. The remaining arguments may be ints or longs.

print(start)
print(end)

2015-01-01 00:00:00
2016-11-28 15:34:46.085651


### Step 3. Select the Apple, Tesla, Twitter, IBM, LinkedIn stocks symbols and assign them to a variable called stocks

In [5]:
stocks = ['AAPL', 'TSLA', 'IBM', 'LNKD']

### Step 4. Read the data from google, assign to df and print it

In [34]:
df = web.DataReader(stocks, 'google', start, end)  # Needed to do some debugging here

# Signature: web.DataReader(name, data_source=None, start=None, end=None, retry_count=3, pause=0.001, session=None)
# Docstring:
# Imports data from a number of online sources.

# Currently supports Yahoo! Finance, Google Finance, St. Louis FED (FRED)
# and Kenneth French's data library.

# Parameters
# ----------
# name : str or list of strs
#     the name of the dataset. Some data sources (yahoo, google, fred) will
#     accept a list of names.
# data_source: {str, None}
#     the data source ("yahoo", "yahoo-actions", "google", "fred", or "ff")
# start : {datetime, None}
#     left boundary for range (defaults to 1/1/2010)
# end : {datetime, None}
#     right boundary for range (defaults to today)
# retry_count : {int, 3}
#     Number of times to retry query request.
# pause : {numeric, 0.001}
#     Time, in seconds, to pause between consecutive queries of chunks. If
#     single value given for symbol, represents the pause between retries.
# session : Session, default None
#         requests.sessions.Session instance to be used

# Examples
# ----------

# # Data from Yahoo! Finance
# gs = DataReader("GS", "yahoo")

# # Corporate Actions (Dividend and Split Data) with ex-dates from Yahoo! Finance
# gs = DataReader("GS", "yahoo-actions")

# # Data from Google Finance
# aapl = DataReader("AAPL", "google")

# # Data from FRED
# vix = DataReader("VIXCLS", "fred")

# # Data from Fama/French
# ff = DataReader("F-F_Research_Data_Factors", "famafrench")
# ff = DataReader("F-F_Research_Data_Factors_weekly", "famafrench")
# ff = DataReader("6_Portfolios_2x3", "famafrench")
# ff = DataReader("F-F_ST_Reversal_Factor", "famafrench")

df

<class 'pandas.core.panel.Panel'>
Dimensions: 5 (items) x 481 (major_axis) x 4 (minor_axis)
Items axis: Open to Volume
Major_axis axis: 2015-01-02 00:00:00 to 2016-11-28 00:00:00
Minor_axis axis: AAPL to TSLA

### Step 5.  What is the type of structure of df ?

In [40]:
# 'pandas.core.panel.Panel'
pd.core.panel.Panel

pandas.core.panel.Panel

### Step 6. Print all the Items axis values
#### To learn more about the Panel structure go to [documentation](http://pandas.pydata.org/pandas-docs/stable/dsintro.html#panel) 

In [50]:
# For additional details and reference, see http://pandas.pydata.org/pandas-docs/stable/dsintro.html#panel

# Panel is a somewhat less-used, but still important container for 3-dimensional data. The term panel data is derived
# from econometrics and is partially responsible for the name pandas: pan(el)-da(ta)-s. The names for the 3 axes are
#     intended to give some semantic meaning to describing operations involving panel data and, in particular,
#     econometric analysis of panel data. However, for the strict purposes of slicing and dicing a collection of 
#     DataFrame objects, you may find the axis names slightly arbitrary:

# items: axis 0, each item corresponds to a DataFrame contained inside
# major_axis: axis 1, it is the index (rows) of each of the DataFrames
# minor_axis: axis 2, it is the columns of each of the DataFrames

df.items

Index([u'Open', u'High', u'Low', u'Close', u'Volume'], dtype='object')

### Step 7. Good, now we know  the data avaiable. Create a dataFrame called vol, with the Volume values.

In [43]:
vol = df['Volume']
vol.head()

Unnamed: 0_level_0,AAPL,IBM,LNKD,TSLA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-01-02,53204626.0,5525466.0,1203743.0,4764443.0
2015-01-05,64285491.0,4880389.0,1400562.0,5368477.0
2015-01-06,65797116.0,6146712.0,2006546.0,6261936.0
2015-01-07,40105934.0,4701839.0,985016.0,2968390.0
2015-01-08,59364547.0,4241113.0,1293955.0,3442509.0


### Step 8. Aggregate the data of Volume to weekly
#### Hint: Be careful to not sum data from the same week of 2015 and other years.

In [47]:
vol['week'] = vol.index.week
vol['year'] = vol.index.year

# Docstring:
# Immutable ndarray of datetime64 data, represented internally as int64, and
# which can be boxed to Timestamp objects that are subclasses of datetime and
# carry metadata such as frequency information.

# Parameters
# ----------
# data  : array-like (1-dimensional), optional
#     Optional datetime-like data to construct index with
# copy  : bool
#     Make a copy of input ndarray
# freq : string or pandas offset object, optional
#     One of pandas date offset strings or corresponding objects
# start : starting value, datetime-like, optional
#     If data is None, start is used as the start point in generating regular
#     timestamp data.
# periods  : int, optional, > 0
#     Number of periods to generate, if generating index. Takes precedence
#     over end argument
# end   : end time, datetime-like, optional
#     If periods is none, generated index will extend to first conforming
#     time on or just past end argument
# closed : string or None, default None
#     Make the interval closed with respect to the given frequency to
#     the 'left', 'right', or both sides (None)
# tz : pytz.timezone or dateutil.tz.tzfile
# ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
#     - 'infer' will attempt to infer fall dst-transition hours based on
#       order
#     - bool-ndarray where True signifies a DST time, False signifies a
#       non-DST time (note that this flag is only applicable for ambiguous
#       times)
#     - 'NaT' will return NaT where there are ambiguous times
#     - 'raise' will raise an AmbiguousTimeError if there are ambiguous times
# infer_dst : boolean, default False (DEPRECATED)
#     Attempt to infer fall dst-transition hours based on order
# name : object
#     Name to be stored in the index

week = vol.groupby(['week','year']).sum()
week.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,AAPL,IBM,LNKD,TSLA
week,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,2015,53204626.0,5525466.0,1203743.0,4764443.0
1,2016,343422014.0,25233098.0,6630485.0,20967926.0
2,2015,283252615.0,24458400.0,7203125.0,22709607.0
2,2016,302072797.0,29379214.0,9160521.0,22997290.0
3,2015,304226647.0,23263206.0,7084168.0,30799137.0
3,2016,242466904.0,43919935.0,8139238.0,16106478.0
4,2015,198737041.0,31244856.0,5549477.0,16215501.0
4,2016,366526430.0,27192107.0,7309853.0,18629438.0
5,2015,465842684.0,32928061.0,5810679.0,15720217.0
5,2016,215137299.0,25468829.0,58241234.0,32726932.0


### Step 9. Find all the volume traded in the year of 2015

In [46]:
del vol['week']
vol['year'] = vol.index.year

year = vol.groupby(['year']).sum()
year

Unnamed: 0_level_0,AAPL,IBM,LNKD,TSLA
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015,13019940000.0,1100959000.0,440376163.0,1085839000.0
2016,8952190000.0,939388700.0,551638797.0,1062073000.0


### BONUS: Create your own question and answer it.