<a href="https://colab.research.google.com/github/shailendra-repo/Python/blob/main/Learning_Pandas_Reshaping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Reshaping, Reorganizing and Aggregation

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns',10)
pd.set_option('display.max_rows',10)
pd.set_option('precision',3)


In [7]:
msft = pd.read_csv('msft.csv', index_col=['Date'], parse_dates=True)
aapl= pd.read_csv('aapl.csv', index_col=['Date'], parse_dates=True)

In [8]:
msft.head()

             Open   High    Low  Close    Volume  Adj Close
Date                                                       
2012-01-03  26.55  26.96  26.39  26.77  64731500     24.422
2012-01-04  26.82  27.47  26.78  27.40  80516100     24.997
2012-01-05  27.38  27.73  27.29  27.68  56081400     25.252
2012-01-06  27.53  28.19  27.53  28.11  99455500     25.644
2012-01-09  28.05  28.10  27.72  27.74  59706800     25.307

In [9]:
aapl.head()

              Open    High     Low   Close    Volume  Adj Close
Date                                                           
2012-01-03  409.40  412.50  409.00  411.23  75555200     55.414
2012-01-04  410.00  414.68  409.28  413.44  65005500     55.711
2012-01-05  414.95  418.55  412.67  418.03  67817400     56.330
2012-01-06  419.77  422.75  419.22  422.40  79573200     56.919
2012-01-09  425.50  427.75  421.35  421.73  98506100     56.829

In [10]:
msft[:5]

             Open   High    Low  Close    Volume  Adj Close
Date                                                       
2012-01-03  26.55  26.96  26.39  26.77  64731500     24.422
2012-01-04  26.82  27.47  26.78  27.40  80516100     24.997
2012-01-05  27.38  27.73  27.29  27.68  56081400     25.252
2012-01-06  27.53  28.19  27.53  28.11  99455500     25.644
2012-01-09  28.05  28.10  27.72  27.74  59706800     25.307

In [11]:
aapl[:5]

              Open    High     Low   Close    Volume  Adj Close
Date                                                           
2012-01-03  409.40  412.50  409.00  411.23  75555200     55.414
2012-01-04  410.00  414.68  409.28  413.44  65005500     55.711
2012-01-05  414.95  418.55  412.67  418.03  67817400     56.330
2012-01-06  419.77  422.75  419.22  422.40  79573200     56.919
2012-01-09  425.50  427.75  421.35  421.73  98506100     56.829

# Reorganizing and reshaping data

### Concatenating date in Multiple DataFrame objects

In [14]:
msftA1 = msft['2012-01'][['Adj Close']]
msftA2 = msft['2012-01'][['Adj Close']]
msftA1[:5]

            Adj Close
Date                 
2012-01-03     24.422
2012-01-04     24.997
2012-01-05     25.252
2012-01-06     25.644
2012-01-09     25.307

In [15]:
msftA2[:5]

            Adj Close
Date                 
2012-01-03     24.422
2012-01-04     24.997
2012-01-05     25.252
2012-01-06     25.644
2012-01-09     25.307

In [17]:
pd.concat([msftA1[:5],msftA2[:5]])

            Adj Close
Date                 
2012-01-03     24.422
2012-01-04     24.997
2012-01-05     25.252
2012-01-06     25.644
2012-01-09     25.307
2012-01-03     24.422
2012-01-04     24.997
2012-01-05     25.252
2012-01-06     25.644
2012-01-09     25.307

In [18]:
aaplA01 = aapl['2012-01'][['Adj Close']]

In [24]:
withDups = pd.concat([msftA1[:3], aaplA01[:3]])

In [25]:
withDups

            Adj Close
Date                 
2012-01-03     24.422
2012-01-04     24.997
2012-01-05     25.252
2012-01-03     55.414
2012-01-04     55.711
2012-01-05     56.330

In [27]:
withDups['2012-01-03']

            Adj Close
Date                 
2012-01-03     24.422
2012-01-03     55.414

In [28]:
closes =  pd.concat([msftA1[:3],aaplA01[:3]],keys=['MSFT','AAPL'])
closes

                 Adj Close
     Date                 
MSFT 2012-01-03     24.422
     2012-01-04     24.997
     2012-01-05     25.252
AAPL 2012-01-03     55.414
     2012-01-04     55.711
     2012-01-05     56.330

In [29]:
closes.index

MultiIndex([('MSFT', '2012-01-03'),
            ('MSFT', '2012-01-04'),
            ('MSFT', '2012-01-05'),
            ('AAPL', '2012-01-03'),
            ('AAPL', '2012-01-04'),
            ('AAPL', '2012-01-05')],
           names=[None, 'Date'])

In [34]:
closes.loc['MSFT']

            Adj Close
Date                 
2012-01-03     24.422
2012-01-04     24.997
2012-01-05     25.252

In [36]:
msftAV = msft[['Adj Close','Volume']]
aaplAV = msft[['Adj Close','Volume']]
pd.concat([msftAV, aaplAV])

            Adj Close    Volume
Date                           
2012-01-03     24.422  64731500
2012-01-04     24.997  80516100
2012-01-05     25.252  56081400
2012-01-06     25.644  99455500
2012-01-09     25.307  59706800
...               ...       ...
2012-12-21     25.750  98776500
2012-12-24     25.385  20842400
2012-12-26     25.197  31631100
2012-12-27     25.291  39394000
2012-12-28     24.906  28239900

[498 rows x 2 columns]

In [37]:
aaplA = aapl[['Adj Close']]
pd.concat([msftAV, aaplA])

            Adj Close     Volume
Date                            
2012-01-03     24.422  6.473e+07
2012-01-04     24.997  8.052e+07
2012-01-05     25.252  5.608e+07
2012-01-06     25.644  9.946e+07
2012-01-09     25.307  5.971e+07
...               ...        ...
2012-12-21     70.602        NaN
2012-12-24     70.716        NaN
2012-12-26     69.741        NaN
2012-12-27     70.021        NaN
2012-12-28     69.278        NaN

[498 rows x 2 columns]

In [39]:
pd.concat([msftAV,aaplA], join='inner')

            Adj Close
Date                 
2012-01-03     24.422
2012-01-04     24.997
2012-01-05     25.252
2012-01-06     25.644
2012-01-09     25.307
...               ...
2012-12-21     70.602
2012-12-24     70.716
2012-12-26     69.741
2012-12-27     70.021
2012-12-28     69.278

[498 rows x 1 columns]

In [42]:
msftAV.size

498

In [43]:
aaplA.size

249

In [46]:
mstfA = msft[['Adj Close']]
closes = pd.concat([mstfA, aaplA], axis=1)
closes.head()

            Adj Close  Adj Close
Date                            
2012-01-03     24.422     55.414
2012-01-04     24.997     55.711
2012-01-05     25.252     56.330
2012-01-06     25.644     56.919
2012-01-09     25.307     56.829

In [48]:
pd.concat([msftAV[:5],aaplAV[:3]], axis=1,keys=['MSFT','AAPL'])

                MSFT                AAPL           
           Adj Close    Volume Adj Close     Volume
Date                                               
2012-01-03    24.422  64731500    24.422  6.473e+07
2012-01-04    24.997  80516100    24.997  8.052e+07
2012-01-05    25.252  56081400    25.252  5.608e+07
2012-01-06    25.644  99455500       NaN        NaN
2012-01-09    25.307  59706800       NaN        NaN

In [49]:
pd.concat([msftAV[:5],aaplAV[:3]], join='inner',keys=['MSFT','AAPL'])

                 Adj Close    Volume
     Date                           
MSFT 2012-01-03     24.422  64731500
     2012-01-04     24.997  80516100
     2012-01-05     25.252  56081400
     2012-01-06     25.644  99455500
     2012-01-09     25.307  59706800
AAPL 2012-01-03     24.422  64731500
     2012-01-04     24.997  80516100
     2012-01-05     25.252  56081400

In [51]:
pd.concat([mstfA[:3],aaplA[:3]], ignore_index=True)

   Adj Close
0     24.422
1     24.997
2     25.252
3     55.414
4     55.711
5     56.330

## Merging DataFrame objects

In [52]:
msftAR = mstfA.reset_index()
msftVR = msft[['Volume']].reset_index()
msftAR[:3]

        Date  Adj Close
0 2012-01-03     24.422
1 2012-01-04     24.997
2 2012-01-05     25.252

In [53]:
msftVR[:3]

        Date    Volume
0 2012-01-03  64731500
1 2012-01-04  80516100
2 2012-01-05  56081400

In [55]:
msftCVR = pd.merge(msftAR,msftVR)

In [56]:
msftCVR

          Date  Adj Close    Volume
0   2012-01-03     24.422  64731500
1   2012-01-04     24.997  80516100
2   2012-01-05     25.252  56081400
3   2012-01-06     25.644  99455500
4   2012-01-09     25.307  59706800
..         ...        ...       ...
244 2012-12-21     25.750  98776500
245 2012-12-24     25.385  20842400
246 2012-12-26     25.197  31631100
247 2012-12-27     25.291  39394000
248 2012-12-28     24.906  28239900

[249 rows x 3 columns]

In [60]:
msftAR0_5 = msftAR[0:5]

In [61]:
msftAR0_5

        Date  Adj Close
0 2012-01-03     24.422
1 2012-01-04     24.997
2 2012-01-05     25.252
3 2012-01-06     25.644
4 2012-01-09     25.307

In [62]:
msftAR2_4 = msftAR[2:4]
msftAR2_4

        Date  Adj Close
2 2012-01-05     25.252
3 2012-01-06     25.644

In [63]:
msftVR2_4 = msftVR[2:4]

In [64]:
msftVR2_4

        Date    Volume
2 2012-01-05  56081400
3 2012-01-06  99455500

In [65]:
pd.merge(msftAR0_5,msftVR2_4)

        Date  Adj Close    Volume
0 2012-01-05     25.252  56081400
1 2012-01-06     25.644  99455500

In [66]:
pd.merge(msftAR0_5,msftVR2_4, how='outer')

        Date  Adj Close     Volume
0 2012-01-03     24.422        NaN
1 2012-01-04     24.997        NaN
2 2012-01-05     25.252  5.608e+07
3 2012-01-06     25.644  9.946e+07
4 2012-01-09     25.307        NaN

## Pivoting

In [67]:
msft.head()

             Open   High    Low  Close    Volume  Adj Close
Date                                                       
2012-01-03  26.55  26.96  26.39  26.77  64731500     24.422
2012-01-04  26.82  27.47  26.78  27.40  80516100     24.997
2012-01-05  27.38  27.73  27.29  27.68  56081400     25.252
2012-01-06  27.53  28.19  27.53  28.11  99455500     25.644
2012-01-09  28.05  28.10  27.72  27.74  59706800     25.307

In [68]:
aapl.head()

              Open    High     Low   Close    Volume  Adj Close
Date                                                           
2012-01-03  409.40  412.50  409.00  411.23  75555200     55.414
2012-01-04  410.00  414.68  409.28  413.44  65005500     55.711
2012-01-05  414.95  418.55  412.67  418.03  67817400     56.330
2012-01-06  419.77  422.75  419.22  422.40  79573200     56.919
2012-01-09  425.50  427.75  421.35  421.73  98506100     56.829

In [69]:
msft.insert(0,'Symbol','MSFT')
aapl.insert(0,'Symbol','AAPL')

In [70]:
msft.head()

           Symbol   Open   High    Low  Close    Volume  Adj Close
Date                                                              
2012-01-03   MSFT  26.55  26.96  26.39  26.77  64731500     24.422
2012-01-04   MSFT  26.82  27.47  26.78  27.40  80516100     24.997
2012-01-05   MSFT  27.38  27.73  27.29  27.68  56081400     25.252
2012-01-06   MSFT  27.53  28.19  27.53  28.11  99455500     25.644
2012-01-09   MSFT  28.05  28.10  27.72  27.74  59706800     25.307

In [73]:
combined = pd.concat([msft,aapl]).sort_index()

In [74]:
combined

           Symbol    Open    High     Low   Close     Volume  Adj Close
Date                                                                   
2012-01-03   MSFT   26.55   26.96   26.39   26.77   64731500     24.422
2012-01-03   AAPL  409.40  412.50  409.00  411.23   75555200     55.414
2012-01-04   MSFT   26.82   27.47   26.78   27.40   80516100     24.997
2012-01-04   AAPL  410.00  414.68  409.28  413.44   65005500     55.711
2012-01-05   MSFT   27.38   27.73   27.29   27.68   56081400     25.252
...           ...     ...     ...     ...     ...        ...        ...
2012-12-26   MSFT   27.03   27.20   26.70   26.86   31631100     25.197
2012-12-27   AAPL  513.54  516.25  504.66  515.06  113780100     70.021
2012-12-27   MSFT   26.89   27.09   26.57   26.96   39394000     25.291
2012-12-28   MSFT   26.71   26.90   26.55   26.55   28239900     24.906
2012-12-28   AAPL  510.29  514.48  508.12  509.59   88569600     69.278

[498 rows x 7 columns]

In [75]:
s4p = combined.reset_index()

In [76]:
s4p

          Date Symbol    Open    High     Low   Close     Volume  Adj Close
0   2012-01-03   MSFT   26.55   26.96   26.39   26.77   64731500     24.422
1   2012-01-03   AAPL  409.40  412.50  409.00  411.23   75555200     55.414
2   2012-01-04   MSFT   26.82   27.47   26.78   27.40   80516100     24.997
3   2012-01-04   AAPL  410.00  414.68  409.28  413.44   65005500     55.711
4   2012-01-05   MSFT   27.38   27.73   27.29   27.68   56081400     25.252
..         ...    ...     ...     ...     ...     ...        ...        ...
493 2012-12-26   MSFT   27.03   27.20   26.70   26.86   31631100     25.197
494 2012-12-27   AAPL  513.54  516.25  504.66  515.06  113780100     70.021
495 2012-12-27   MSFT   26.89   27.09   26.57   26.96   39394000     25.291
496 2012-12-28   MSFT   26.71   26.90   26.55   26.55   28239900     24.906
497 2012-12-28   AAPL  510.29  514.48  508.12  509.59   88569600     69.278

[498 rows x 8 columns]

In [77]:
closes = s4p.pivot(index='Date', columns='Symbol', values='Adj Close')

In [78]:
closes

Symbol        AAPL    MSFT
Date                      
2012-01-03  55.414  24.422
2012-01-04  55.711  24.997
2012-01-05  56.330  25.252
2012-01-06  56.919  25.644
2012-01-09  56.829  25.307
...            ...     ...
2012-12-21  70.602  25.750
2012-12-24  70.716  25.385
2012-12-26  69.741  25.197
2012-12-27  70.021  25.291
2012-12-28  69.278  24.906

[249 rows x 2 columns]

## Stacking and Unstacking

In [79]:
closes

Symbol        AAPL    MSFT
Date                      
2012-01-03  55.414  24.422
2012-01-04  55.711  24.997
2012-01-05  56.330  25.252
2012-01-06  56.919  25.644
2012-01-09  56.829  25.307
...            ...     ...
2012-12-21  70.602  25.750
2012-12-24  70.716  25.385
2012-12-26  69.741  25.197
2012-12-27  70.021  25.291
2012-12-28  69.278  24.906

[249 rows x 2 columns]

In [80]:
closes.index

DatetimeIndex(['2012-01-03', '2012-01-04', '2012-01-05', '2012-01-06',
               '2012-01-09', '2012-01-10', '2012-01-11', '2012-01-12',
               '2012-01-13', '2012-01-17',
               ...
               '2012-12-14', '2012-12-17', '2012-12-18', '2012-12-19',
               '2012-12-20', '2012-12-21', '2012-12-24', '2012-12-26',
               '2012-12-27', '2012-12-28'],
              dtype='datetime64[ns]', name='Date', length=249, freq=None)

In [81]:
stackedClosed = closes.stack()

In [82]:
stackedClosed

Date        Symbol
2012-01-03  AAPL      55.414
            MSFT      24.422
2012-01-04  AAPL      55.711
            MSFT      24.997
2012-01-05  AAPL      56.330
                       ...  
2012-12-26  MSFT      25.197
2012-12-27  AAPL      70.021
            MSFT      25.291
2012-12-28  AAPL      69.278
            MSFT      24.906
Length: 498, dtype: float64

In [83]:
stackedClosed.index

MultiIndex([('2012-01-03', 'AAPL'),
            ('2012-01-03', 'MSFT'),
            ('2012-01-04', 'AAPL'),
            ('2012-01-04', 'MSFT'),
            ('2012-01-05', 'AAPL'),
            ('2012-01-05', 'MSFT'),
            ('2012-01-06', 'AAPL'),
            ('2012-01-06', 'MSFT'),
            ('2012-01-09', 'AAPL'),
            ('2012-01-09', 'MSFT'),
            ...
            ('2012-12-21', 'AAPL'),
            ('2012-12-21', 'MSFT'),
            ('2012-12-24', 'AAPL'),
            ('2012-12-24', 'MSFT'),
            ('2012-12-26', 'AAPL'),
            ('2012-12-26', 'MSFT'),
            ('2012-12-27', 'AAPL'),
            ('2012-12-27', 'MSFT'),
            ('2012-12-28', 'AAPL'),
            ('2012-12-28', 'MSFT')],
           names=['Date', 'Symbol'], length=498)

In [85]:
stackedClosed['2012-01-03','AAPL']

55.41362

In [86]:
stackedClosed['2012-01-03']

Symbol
AAPL    55.414
MSFT    24.422
dtype: float64

In [87]:
stackedClosed[:,'MSFT']

Date
2012-01-03    24.422
2012-01-04    24.997
2012-01-05    25.252
2012-01-06    25.644
2012-01-09    25.307
               ...  
2012-12-21    25.750
2012-12-24    25.385
2012-12-26    25.197
2012-12-27    25.291
2012-12-28    24.906
Length: 249, dtype: float64

In [88]:
unstackedCloses= stackedClosed.unstack()
unstackedCloses

Symbol        AAPL    MSFT
Date                      
2012-01-03  55.414  24.422
2012-01-04  55.711  24.997
2012-01-05  56.330  25.252
2012-01-06  56.919  25.644
2012-01-09  56.829  25.307
...            ...     ...
2012-12-21  70.602  25.750
2012-12-24  70.716  25.385
2012-12-26  69.741  25.197
2012-12-27  70.021  25.291
2012-12-28  69.278  24.906

[249 rows x 2 columns]

## Melting

In [89]:
s4p

          Date Symbol    Open    High     Low   Close     Volume  Adj Close
0   2012-01-03   MSFT   26.55   26.96   26.39   26.77   64731500     24.422
1   2012-01-03   AAPL  409.40  412.50  409.00  411.23   75555200     55.414
2   2012-01-04   MSFT   26.82   27.47   26.78   27.40   80516100     24.997
3   2012-01-04   AAPL  410.00  414.68  409.28  413.44   65005500     55.711
4   2012-01-05   MSFT   27.38   27.73   27.29   27.68   56081400     25.252
..         ...    ...     ...     ...     ...     ...        ...        ...
493 2012-12-26   MSFT   27.03   27.20   26.70   26.86   31631100     25.197
494 2012-12-27   AAPL  513.54  516.25  504.66  515.06  113780100     70.021
495 2012-12-27   MSFT   26.89   27.09   26.57   26.96   39394000     25.291
496 2012-12-28   MSFT   26.71   26.90   26.55   26.55   28239900     24.906
497 2012-12-28   AAPL  510.29  514.48  508.12  509.59   88569600     69.278

[498 rows x 8 columns]

In [90]:
melted = pd.melt(s4p, id_vars=['Date','Symbol'])
melted

           Date Symbol   variable    value
0    2012-01-03   MSFT       Open   26.550
1    2012-01-03   AAPL       Open  409.400
2    2012-01-04   MSFT       Open   26.820
3    2012-01-04   AAPL       Open  410.000
4    2012-01-05   MSFT       Open   27.380
...         ...    ...        ...      ...
2983 2012-12-26   MSFT  Adj Close   25.197
2984 2012-12-27   AAPL  Adj Close   70.021
2985 2012-12-27   MSFT  Adj Close   25.291
2986 2012-12-28   MSFT  Adj Close   24.906
2987 2012-12-28   AAPL  Adj Close   69.278

[2988 rows x 4 columns]

In [92]:
melted[(melted.Date == '2012-01-03') & (melted.Symbol == 'MSFT')]

           Date Symbol   variable      value
0    2012-01-03   MSFT       Open  2.655e+01
498  2012-01-03   MSFT       High  2.696e+01
996  2012-01-03   MSFT        Low  2.639e+01
1494 2012-01-03   MSFT      Close  2.677e+01
1992 2012-01-03   MSFT     Volume  6.473e+07
2490 2012-01-03   MSFT  Adj Close  2.442e+01

# Grouping and aggregation

## Splitting

In [93]:
combined

           Symbol    Open    High     Low   Close     Volume  Adj Close
Date                                                                   
2012-01-03   MSFT   26.55   26.96   26.39   26.77   64731500     24.422
2012-01-03   AAPL  409.40  412.50  409.00  411.23   75555200     55.414
2012-01-04   MSFT   26.82   27.47   26.78   27.40   80516100     24.997
2012-01-04   AAPL  410.00  414.68  409.28  413.44   65005500     55.711
2012-01-05   MSFT   27.38   27.73   27.29   27.68   56081400     25.252
...           ...     ...     ...     ...     ...        ...        ...
2012-12-26   MSFT   27.03   27.20   26.70   26.86   31631100     25.197
2012-12-27   AAPL  513.54  516.25  504.66  515.06  113780100     70.021
2012-12-27   MSFT   26.89   27.09   26.57   26.96   39394000     25.291
2012-12-28   MSFT   26.71   26.90   26.55   26.55   28239900     24.906
2012-12-28   AAPL  510.29  514.48  508.12  509.59   88569600     69.278

[498 rows x 7 columns]

In [94]:
s4g = combined[['Symbol','Adj Close']].reset_index()
s4g

          Date Symbol  Adj Close
0   2012-01-03   MSFT     24.422
1   2012-01-03   AAPL     55.414
2   2012-01-04   MSFT     24.997
3   2012-01-04   AAPL     55.711
4   2012-01-05   MSFT     25.252
..         ...    ...        ...
493 2012-12-26   MSFT     25.197
494 2012-12-27   AAPL     70.021
495 2012-12-27   MSFT     25.291
496 2012-12-28   MSFT     24.906
497 2012-12-28   AAPL     69.278

[498 rows x 3 columns]

In [98]:
s4g.insert(1,'Year',pd.DatetimeIndex(s4g['Date']).year)
s4g.insert(2,'Month',pd.DatetimeIndex(s4g['Date']).month)

In [99]:
s4g

          Date  Year  Month Symbol  Adj Close
0   2012-01-03  2012      1   MSFT     24.422
1   2012-01-03  2012      1   AAPL     55.414
2   2012-01-04  2012      1   MSFT     24.997
3   2012-01-04  2012      1   AAPL     55.711
4   2012-01-05  2012      1   MSFT     25.252
..         ...   ...    ...    ...        ...
493 2012-12-26  2012     12   MSFT     25.197
494 2012-12-27  2012     12   AAPL     70.021
495 2012-12-27  2012     12   MSFT     25.291
496 2012-12-28  2012     12   MSFT     24.906
497 2012-12-28  2012     12   AAPL     69.278

[498 rows x 5 columns]

In [100]:
s4g.groupby('Symbol')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fcddde5a350>

In [101]:
grouped = s4g.groupby('Symbol')
type(grouped.groups)

pandas.io.formats.printing.PrettyDict

In [102]:
grouped.groups

{'AAPL': [1, 3, 5, 7, 9, 11, 13, 14, 16, 18, 20, 23, 25, 27, 29, 30, 33, 34, 37, 38, 41, 43, 44, 46, 49, 51, 52, 54, 57, 59, 61, 62, 64, 67, 68, 71, 72, 75, 77, 79, 81, 82, 84, 86, 89, 90, 93, 95, 96, 99, 100, 103, 105, 107, 109, 111, 112, 114, 117, 119, 121, 122, 124, 126, 128, 130, 132, 134, 137, 139, 140, 142, 145, 147, 149, 151, 153, 154, 156, 158, 160, 163, 164, 166, 168, 170, 173, 175, 176, 178, 180, 183, 184, 186, 189, 190, 193, 194, 196, 198, ...], 'MSFT': [0, 2, 4, 6, 8, 10, 12, 15, 17, 19, 21, 22, 24, 26, 28, 31, 32, 35, 36, 39, 40, 42, 45, 47, 48, 50, 53, 55, 56, 58, 60, 63, 65, 66, 69, 70, 73, 74, 76, 78, 80, 83, 85, 87, 88, 91, 92, 94, 97, 98, 101, 102, 104, 106, 108, 110, 113, 115, 116, 118, 120, 123, 125, 127, 129, 131, 133, 135, 136, 138, 141, 143, 144, 146, 148, 150, 152, 155, 157, 159, 161, 162, 165, 167, 169, 171, 172, 174, 177, 179, 181, 182, 185, 187, 188, 191, 192, 195, 197, 199, ...]}

In [103]:
len(grouped), grouped.ngroups

(2, 2)

In [104]:
def print_groups(groupobject):
  for name, group in groupobject:
    print (name)
    print(group.head())

In [105]:
print_groups(grouped)

AAPL
        Date  Year  Month Symbol  Adj Close
1 2012-01-03  2012      1   AAPL     55.414
3 2012-01-04  2012      1   AAPL     55.711
5 2012-01-05  2012      1   AAPL     56.330
7 2012-01-06  2012      1   AAPL     56.919
9 2012-01-09  2012      1   AAPL     56.829
MSFT
        Date  Year  Month Symbol  Adj Close
0 2012-01-03  2012      1   MSFT     24.422
2 2012-01-04  2012      1   MSFT     24.997
4 2012-01-05  2012      1   MSFT     25.252
6 2012-01-06  2012      1   MSFT     25.644
8 2012-01-09  2012      1   MSFT     25.307


In [106]:
grouped.size()

Symbol
AAPL    249
MSFT    249
dtype: int64

In [107]:
grouped.get_group('MSFT')

          Date  Year  Month Symbol  Adj Close
0   2012-01-03  2012      1   MSFT     24.422
2   2012-01-04  2012      1   MSFT     24.997
4   2012-01-05  2012      1   MSFT     25.252
6   2012-01-06  2012      1   MSFT     25.644
8   2012-01-09  2012      1   MSFT     25.307
..         ...   ...    ...    ...        ...
489 2012-12-21  2012     12   MSFT     25.750
491 2012-12-24  2012     12   MSFT     25.385
493 2012-12-26  2012     12   MSFT     25.197
495 2012-12-27  2012     12   MSFT     25.291
496 2012-12-28  2012     12   MSFT     24.906

[249 rows x 5 columns]

In [108]:
mcg = s4g.groupby(['Symbol','Year','Month'])
print_groups(mcg)

('AAPL', 2012, 1)
        Date  Year  Month Symbol  Adj Close
1 2012-01-03  2012      1   AAPL     55.414
3 2012-01-04  2012      1   AAPL     55.711
5 2012-01-05  2012      1   AAPL     56.330
7 2012-01-06  2012      1   AAPL     56.919
9 2012-01-09  2012      1   AAPL     56.829
('AAPL', 2012, 2)
         Date  Year  Month Symbol  Adj Close
41 2012-02-01  2012      2   AAPL     61.472
43 2012-02-02  2012      2   AAPL     61.328
44 2012-02-03  2012      2   AAPL     61.942
46 2012-02-06  2012      2   AAPL     62.520
49 2012-02-07  2012      2   AAPL     63.175
('AAPL', 2012, 3)
         Date  Year  Month Symbol  Adj Close
81 2012-03-01  2012      3   AAPL     73.368
82 2012-03-02  2012      3   AAPL     73.464
84 2012-03-05  2012      3   AAPL     71.844
86 2012-03-06  2012      3   AAPL     71.453
89 2012-03-07  2012      3   AAPL     71.511
('AAPL', 2012, 4)
          Date  Year  Month Symbol  Adj Close
124 2012-04-02  2012      4   AAPL     83.361
126 2012-04-03  2012      4   AA

In [109]:
mi = s4g.set_index(['Symbol','Year','Month'])
mi

                        Date  Adj Close
Symbol Year Month                      
MSFT   2012 1     2012-01-03     24.422
AAPL   2012 1     2012-01-03     55.414
MSFT   2012 1     2012-01-04     24.997
AAPL   2012 1     2012-01-04     55.711
MSFT   2012 1     2012-01-05     25.252
...                      ...        ...
            12    2012-12-26     25.197
AAPL   2012 12    2012-12-27     70.021
MSFT   2012 12    2012-12-27     25.291
            12    2012-12-28     24.906
AAPL   2012 12    2012-12-28     69.278

[498 rows x 2 columns]

In [110]:
mig_l1= mi.groupby(level=0)
print_groups(mig_l1)

AAPL
                        Date  Adj Close
Symbol Year Month                      
AAPL   2012 1     2012-01-03     55.414
            1     2012-01-04     55.711
            1     2012-01-05     56.330
            1     2012-01-06     56.919
            1     2012-01-09     56.829
MSFT
                        Date  Adj Close
Symbol Year Month                      
MSFT   2012 1     2012-01-03     24.422
            1     2012-01-04     24.997
            1     2012-01-05     25.252
            1     2012-01-06     25.644
            1     2012-01-09     25.307


In [111]:
mig_l12 = mi.groupby(level=['Symbol','Year','Month'])
print_groups(mig_l12)

('AAPL', 2012, 1)
                        Date  Adj Close
Symbol Year Month                      
AAPL   2012 1     2012-01-03     55.414
            1     2012-01-04     55.711
            1     2012-01-05     56.330
            1     2012-01-06     56.919
            1     2012-01-09     56.829
('AAPL', 2012, 2)
                        Date  Adj Close
Symbol Year Month                      
AAPL   2012 2     2012-02-01     61.472
            2     2012-02-02     61.328
            2     2012-02-03     61.942
            2     2012-02-06     62.520
            2     2012-02-07     63.175
('AAPL', 2012, 3)
                        Date  Adj Close
Symbol Year Month                      
AAPL   2012 3     2012-03-01     73.368
            3     2012-03-02     73.464
            3     2012-03-05     71.844
            3     2012-03-06     71.453
            3     2012-03-07     71.511
('AAPL', 2012, 4)
                        Date  Adj Close
Symbol Year Month                      
AAPL   2

## Aggregation

In [112]:
mig_l12.agg(np.mean)

                   Adj Close
Symbol Year Month           
AAPL   2012 1         57.751
            2         67.048
            3         77.820
            4         81.659
            5         76.090
...                      ...
MSFT   2012 8         28.174
            9         28.641
            10        27.044
            11        26.005
            12        25.309

[24 rows x 1 columns]

In [113]:
s4g.groupby(['Symbol','Year','Month'], as_index=False).agg(np.mean)[:5]

  Symbol  Year  Month  Adj Close
0   AAPL  2012      1     57.751
1   AAPL  2012      2     67.048
2   AAPL  2012      3     77.820
3   AAPL  2012      4     81.659
4   AAPL  2012      5     76.090

In [114]:
mig_l12.agg([np.mean, np.std, np.sum])

                  Adj Close                 
                       mean    std       sum
Symbol Year Month                           
AAPL   2012 1        57.751  1.798  1155.026
            2        67.048  3.570  1340.963
            3        77.820  4.162  1712.033
            4        81.659  3.063  1633.189
            5        76.090  1.991  1673.985
...                     ...    ...       ...
MSFT   2012 8        28.174  0.473   647.996
            9        28.641  0.431   544.176
            10       27.044  0.667   567.930
            11       26.005  1.001   546.103
            12       25.309  0.364   480.871

[24 rows x 3 columns]