In [1]:
import pandas as pd
from pandas_datareader import data

In [2]:
companies = ["MSFT","GOOG","AAPL","YHOO","AMZN"]

In [3]:
p = data.DataReader(name=companies,data_source="google",
                start="2010-01-01", end = "2016-12-31")

## The Axes of Panel Object

In [7]:
p

<class 'pandas.core.panel.Panel'>
Dimensions: 5 (items) x 251 (major_axis) x 5 (minor_axis)
Items axis: Open to Volume
Major_axis axis: 2016-11-22 00:00:00 to 2017-11-20 00:00:00
Minor_axis axis: AAPL to YHOO

In [8]:
#Dimensions = items * major_axis * minor_axis
#items tells us the number of dataframes in panel. These are actually the columns of each dataframe
#major_axis number of rows 
#minor axis number of columns (five company names)

In [9]:
p.items

Index(['Open', 'High', 'Low', 'Close', 'Volume'], dtype='object')

In [10]:
p.major_axis

DatetimeIndex(['2016-11-22', '2016-11-23', '2016-11-25', '2016-11-28',
               '2016-11-29', '2016-11-30', '2016-12-01', '2016-12-02',
               '2016-12-05', '2016-12-06',
               ...
               '2017-11-07', '2017-11-08', '2017-11-09', '2017-11-10',
               '2017-11-13', '2017-11-14', '2017-11-15', '2017-11-16',
               '2017-11-17', '2017-11-20'],
              dtype='datetime64[ns]', name='Date', length=251, freq=None)

In [11]:
p.minor_axis

Index(['AAPL', 'AMZN', 'GOOG', 'MSFT', 'YHOO'], dtype='object')

In [12]:
p.axes

[Index(['Open', 'High', 'Low', 'Close', 'Volume'], dtype='object'),
 DatetimeIndex(['2016-11-22', '2016-11-23', '2016-11-25', '2016-11-28',
                '2016-11-29', '2016-11-30', '2016-12-01', '2016-12-02',
                '2016-12-05', '2016-12-06',
                ...
                '2017-11-07', '2017-11-08', '2017-11-09', '2017-11-10',
                '2017-11-13', '2017-11-14', '2017-11-15', '2017-11-16',
                '2017-11-17', '2017-11-20'],
               dtype='datetime64[ns]', name='Date', length=251, freq=None),
 Index(['AAPL', 'AMZN', 'GOOG', 'MSFT', 'YHOO'], dtype='object')]

## Panel Attributes

In [13]:
p.ndim

3

In [14]:
p.dtypes

Open      float64
High      float64
Low       float64
Close     float64
Volume    float64
dtype: object

In [15]:
p.shape

(5, 251, 5)

In [17]:
p.size #total number of values stored in panel object

6275

In [18]:
p.values

array([[[  1.11950000e+02,   7.88170000e+02,   7.72630000e+02,
           6.09800000e+01,   4.12000000e+01],
        [  1.11360000e+02,   7.81730000e+02,   7.67730000e+02,
           6.10100000e+01,   4.09100000e+01],
        [  1.11470000e+02,   7.86500000e+02,   7.64260000e+02,
           6.03000000e+01,   4.10800000e+01],
        ..., 
        [  1.71180000e+02,   1.13016000e+03,   1.02252000e+03,
           8.31000000e+01,   6.98000000e+01],
        [  1.71040000e+02,   1.13828000e+03,   1.03401000e+03,
           8.31200000e+01,   7.08100000e+01],
        [  1.70290000e+02,   1.12977000e+03,   1.02026000e+03,
           8.24000000e+01,   7.16000000e+01]],

       [[  1.12420000e+02,   7.92400000e+02,   7.76960000e+02,
           6.12600000e+01,   4.14000000e+01],
        [  1.11510000e+02,   7.81750000e+02,   7.68280000e+02,
           6.11000000e+01,   4.09800000e+01],
        [  1.11870000e+02,   7.86750000e+02,   7.65000000e+02,
           6.05300000e+01,   4.10800000e+01],
   

## Use Brackets notation to Extract a DataFrame (for subsetting)

In [6]:
p["Open"] #returns a brand new data frame
p["Volume"].head()

Unnamed: 0_level_0,AAPL,AMZN,GOOG,MSFT,YHOO
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-11-25,11475922.0,1837068.0,587421.0,8409616.0,2586908.0
2016-11-28,27193983.0,4438828.0,2188151.0,20732619.0,6836364.0
2016-11-29,28528750.0,3272344.0,1616618.0,22366721.0,13085981.0
2016-11-30,36162258.0,4625946.0,2392890.0,34655435.0,7644360.0
2016-12-01,37086862.0,4665993.0,3017947.0,34542121.0,12624139.0


In [8]:
p.Volume.head(3) #Can access based on name of column

Unnamed: 0_level_0,AAPL,AMZN,GOOG,MSFT,YHOO
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-11-25,11475922.0,1837068.0,587421.0,8409616.0,2586908.0
2016-11-28,27193983.0,4438828.0,2188151.0,20732619.0,6836364.0
2016-11-29,28528750.0,3272344.0,1616618.0,22366721.0,13085981.0


## Extracting from .loc[] , .iloc[], and .ix[] Methods

In [9]:
#In dataframe these methods have two arguments row location and column location
#The behavior is different in panel

In [14]:
#first paramter of loc would be one of the item from items axes
#second paramter would be a value from major axis. On item not found it will through an exception
#third parameter is value of minor axis
#paramters are option
p.loc["Close","2016-12-12","GOOG"]

789.26999999999998

In [22]:
p.iloc[3,11,2] #iloc takes index position

789.26999999999998

In [23]:
p.ix["Close",11,"GOOG"] #Mix and matach of loc and iloc

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


789.26999999999998

## Convert a Panel to a MultiIndex Dataframe (and Vice Versa)

In [32]:
df = p.to_frame() #major and minor are structured together to become an index
p.to_frame().head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Volume
Date,minor,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-11-25,AAPL,111.47,111.87,110.95,111.79,11475922.0
2016-11-25,AMZN,786.5,786.75,777.9,780.37,1837068.0
2016-11-25,GOOG,764.26,765.0,760.52,761.68,587421.0
2016-11-25,MSFT,60.3,60.53,60.13,60.53,8409616.0
2016-11-25,YHOO,41.08,41.08,40.71,40.87,2586908.0
2016-11-28,AAPL,111.43,112.46,111.39,111.57,27193983.0
2016-11-28,AMZN,776.99,777.0,764.24,766.77,4438828.0
2016-11-28,GOOG,760.0,779.53,759.8,768.24,2188151.0
2016-11-28,MSFT,60.34,61.02,60.21,60.61,20732619.0
2016-11-28,YHOO,40.85,41.7,40.74,41.45,6836364.0


In [33]:
#Doing reverse
p = df.to_panel()

Panel is deprecated and will be removed in a future version.
The recommended way to represent these types of 3-dimensional data are with a MultiIndex on a DataFrame, via the Panel.to_frame() method
Alternatively, you can use the xarray package http://xarray.pydata.org/en/stable/.
Pandas provides a `.to_xarray()` method to help automate this conversion.

  


## The .major_xs() Method

In [35]:
p.major_axis #This attribute returns the row labels of our panel which in this case is datetime

DatetimeIndex(['2016-11-25', '2016-11-28', '2016-11-29', '2016-11-30',
               '2016-12-01', '2016-12-02', '2016-12-05', '2016-12-06',
               '2016-12-07', '2016-12-08',
               ...
               '2017-11-09', '2017-11-10', '2017-11-13', '2017-11-14',
               '2017-11-15', '2017-11-16', '2017-11-17', '2017-11-20',
               '2017-11-21', '2017-11-22'],
              dtype='datetime64[ns]', name='Date', length=251, freq=None)

In [39]:
p.major_xs("2016-11-25") #returns a dataframe sliced around a key on major axis

Unnamed: 0_level_0,Open,High,Low,Close,Volume
minor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AAPL,111.47,111.87,110.95,111.79,11475922.0
AMZN,786.5,786.75,777.9,780.37,1837068.0
GOOG,764.26,765.0,760.52,761.68,587421.0
MSFT,60.3,60.53,60.13,60.53,8409616.0
YHOO,41.08,41.08,40.71,40.87,2586908.0


In [41]:
p.minor_xs("GOOG").head(3) #returns a dataframe sliced around a key on minor axis

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-11-25,764.26,765.0,760.52,761.68,587421.0
2016-11-28,760.0,779.53,759.8,768.24,2188151.0
2016-11-29,771.53,778.5,768.24,770.84,1616618.0


## Transpose a Panel with .transpose() Method()

In [42]:
#For swapping items axis with minor axis
p.transpose(2,1,0)  #moving minor axis 2 to first axis,keeping same for major axis and entering items axis 0 at third position

<class 'pandas.core.panel.Panel'>
Dimensions: 5 (items) x 251 (major_axis) x 5 (minor_axis)
Items axis: AAPL to YHOO
Major_axis axis: 2016-11-25 00:00:00 to 2017-11-22 00:00:00
Minor_axis axis: Open to Volume

In [43]:
p2 = p.transpose(2,1,0)

In [45]:
p2["AAPL"].head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-11-25,111.47,111.87,110.95,111.79,11475922.0
2016-11-28,111.43,112.46,111.39,111.57,27193983.0
2016-11-29,110.78,112.03,110.07,111.46,28528750.0
2016-11-30,111.6,112.2,110.27,110.52,36162258.0
2016-12-01,110.36,110.94,109.03,109.49,37086862.0


In [50]:
p2.major_xs("2016-12-14").head(2)

minor,AAPL,AMZN,GOOG,MSFT,YHOO
Open,115.04,778.25,797.4,63.0,41.44
High,116.2,780.86,804.0,63.45,41.53


In [52]:
p2.minor_xs("Volume").head(2)

minor,AAPL,AMZN,GOOG,MSFT,YHOO
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-11-25,11475922.0,1837068.0,587421.0,8409616.0,2586908.0
2016-11-28,27193983.0,4438828.0,2188151.0,20732619.0,6836364.0


## The .swapaxes() Method

In [53]:
p.swapaxes("items","minor") #Similar to transpose

Panel is deprecated and will be removed in a future version.
The recommended way to represent these types of 3-dimensional data are with a MultiIndex on a DataFrame, via the Panel.to_frame() method
Alternatively, you can use the xarray package http://xarray.pydata.org/en/stable/.
Pandas provides a `.to_xarray()` method to help automate this conversion.

  """Entry point for launching an IPython kernel.


<class 'pandas.core.panel.Panel'>
Dimensions: 5 (items) x 251 (major_axis) x 5 (minor_axis)
Items axis: AAPL to YHOO
Major_axis axis: 2016-11-25 00:00:00 to 2017-11-22 00:00:00
Minor_axis axis: Open to Volume