In [2]:
import numpy as np
import pandas as pd

import datetime
from datetime import datetime, date

pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 7)
pd.set_option('display.max_rows', 10)
pd.set_option('display.width', 60)

import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
s = pd.Series([1, 2, 3, 4])
s

0    1
1    2
2    3
3    4
dtype: int64

In [4]:
s[1]

2

In [5]:
s[[1, 3]]

1    2
3    4
dtype: int64

In [6]:
s = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])

In [7]:
s

a    1
b    2
c    3
d    4
dtype: int64

In [8]:
s[['a', 'd']]

a    1
d    4
dtype: int64

In [9]:
s[[1, 2]]

b    2
c    3
dtype: int64

In [10]:
s.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [11]:
dates = pd.date_range('2018-10-1', '2018-10-6')
dates

DatetimeIndex(['2018-10-01', '2018-10-02', '2018-10-03',
               '2018-10-04', '2018-10-05', '2018-10-06'],
              dtype='datetime64[ns]', freq='D')

In [12]:
temps1 = pd.Series([80, 82, 85, 90, 83, 87], index=dates)

In [13]:
temps1

2018-10-01    80
2018-10-02    82
2018-10-03    85
2018-10-04    90
2018-10-05    83
2018-10-06    87
Freq: D, dtype: int64

In [15]:
temps1['2018-10-4']

90

In [16]:
temps2 = pd.Series([70, 75, 69, 83, 79, 77], index=dates)
temps2

2018-10-01    70
2018-10-02    75
2018-10-03    69
2018-10-04    83
2018-10-05    79
2018-10-06    77
Freq: D, dtype: int64

In [17]:
temp_diffs = temps1 - temps2
temp_diffs

2018-10-01    10
2018-10-02     7
2018-10-03    16
2018-10-04     7
2018-10-05     4
2018-10-06    10
Freq: D, dtype: int64

In [18]:
temp_diffs[2]

16

In [19]:
temp_diffs.mean()

9.0

In [20]:
temps_df = pd.DataFrame(
    {
        'Missoula': temps1,
        'Philadelphia': temps2
    }
)
temps_df

            Missoula  Philadelphia
2018-10-01        80            70
2018-10-02        82            75
2018-10-03        85            69
2018-10-04        90            83
2018-10-05        83            79
2018-10-06        87            77

In [22]:
temps_df['Missoula']

2018-10-01    80
2018-10-02    82
2018-10-03    85
2018-10-04    90
2018-10-05    83
2018-10-06    87
Freq: D, Name: Missoula, dtype: int64

In [24]:
temps_df[['Philadelphia', 'Missoula']]

            Philadelphia  Missoula
2018-10-01            70        80
2018-10-02            75        82
2018-10-03            69        85
2018-10-04            83        90
2018-10-05            79        83
2018-10-06            77        87

In [25]:
temps_df.Missoula

2018-10-01    80
2018-10-02    82
2018-10-03    85
2018-10-04    90
2018-10-05    83
2018-10-06    87
Freq: D, Name: Missoula, dtype: int64

In [26]:
temps_df.Missoula - temps_df.Philadelphia

2018-10-01    10
2018-10-02     7
2018-10-03    16
2018-10-04     7
2018-10-05     4
2018-10-06    10
Freq: D, dtype: int64

In [27]:
temps_df['Difference'] = temp_diffs
temps_df

            Missoula  Philadelphia  Difference
2018-10-01        80            70          10
2018-10-02        82            75           7
2018-10-03        85            69          16
2018-10-04        90            83           7
2018-10-05        83            79           4
2018-10-06        87            77          10

In [28]:
temps_df.columns

Index(['Missoula', 'Philadelphia', 'Difference'], dtype='object')

In [29]:
temps_df.Difference[1:4]

2018-10-02     7
2018-10-03    16
2018-10-04     7
Freq: D, Name: Difference, dtype: int64

In [30]:
temps_df.iloc[1]

Missoula        82
Philadelphia    75
Difference       7
Name: 2018-10-02 00:00:00, dtype: int64

In [31]:
temps_df.iloc[1].index

Index(['Missoula', 'Philadelphia', 'Difference'], dtype='object')

In [32]:
temps_df.loc['2018-10-3']

Missoula        85
Philadelphia    69
Difference      16
Name: 2018-10-03 00:00:00, dtype: int64

In [33]:
temps_df.iloc[[1, 3, 5]].Difference

2018-10-02     7
2018-10-04     7
2018-10-06    10
Freq: 2D, Name: Difference, dtype: int64

In [34]:
temps_df.Missoula > 82

2018-10-01    False
2018-10-02    False
2018-10-03     True
2018-10-04     True
2018-10-05     True
2018-10-06     True
Freq: D, Name: Missoula, dtype: bool

In [35]:
temps_df[temps_df.Missoula > 82]

            Missoula  Philadelphia  Difference
2018-10-03        85            69          16
2018-10-04        90            83           7
2018-10-05        83            79           4
2018-10-06        87            77          10

In [36]:
!head /home/sha/data/GOOG.csv

Date,Open,High,Low,Close,Adj Close,Volume
2006-12-01,242.082611,243.283112,238.356583,479.483612,239.502289,11304900
2006-12-04,240.598175,242.804901,238.779999,483.522522,241.519730,9836400
2006-12-05,242.789963,243.806152,241.539658,485.666626,242.590714,8236700
2006-12-06,242.570786,245.280624,241.355347,487.371948,243.442520,8933900
2006-12-07,244.199677,244.981750,240.309265,481.318573,240.418854,9363500
2006-12-08,240.070160,243.387726,239.103775,482.784546,241.151108,7979500
2006-12-11,241.554596,243.537170,240.996689,482.605042,241.061447,6551200
2006-12-12,241.021591,242.271912,239.243256,480.460938,239.990463,8393200
2006-12-13,241.440018,241.843506,237.619339,477.678558,238.600662,9359100


In [38]:
data_dir = '/home/sha/data/'
file_name = 'GOOG.csv'
df = pd.read_csv(data_dir + file_name, parse_dates=['Date'], index_col='Date')
df

           Date        Open        High         Low  \
0    2006-12-01  242.082611  243.283112  238.356583   
1    2006-12-04  240.598175  242.804901  238.779999   
2    2006-12-05  242.789963  243.806152  241.539658   
3    2006-12-06  242.570786  245.280624  241.355347   
4    2006-12-07  244.199677  244.981750  240.309265   
...         ...         ...         ...         ...   
2512 2016-11-23  767.729980  768.283020  755.250000   
2513 2016-11-25  764.260010  765.000000  760.520020   
2514 2016-11-28  760.000000  779.530029  759.799988   
2515 2016-11-29  771.530029  778.500000  768.239990   
2516 2016-11-30  770.070007  772.989990  754.830017   

           Close   Adj Close    Volume  
0     479.483612  239.502289  11304900  
1     483.522522  241.519730   9836400  
2     485.666626  242.590714   8236700  
3     487.371948  243.442520   8933900  
4     481.318573  240.418854   9363500  
...          ...         ...       ...  
2512  760.989990  760.989990   1478400  
2513  761.6

In [39]:
df.index

RangeIndex(start=0, stop=2517, step=1)

In [40]:
type(df.Date[0])

pandas._libs.tslibs.timestamps.Timestamp