## Pandas and Matplotlib

Data source: www.kaggle.com/mczielinski/bitcoin-historical-data

In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [41]:
df = pd.read_csv('../../data/bitcoin/bitflyerJPY_1-min_data_2018-06-01_to_2018-06-27.csv')

df.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,2018-06-01 00:00:00,816163,816460,815700,816460,9.137702,7456346.0,815997.92105
1,2018-06-01 00:01:00,816460,816999,816438,816563,4.359824,3560639.0,816693.3285
2,2018-06-01 00:02:00,816910,817079,816741,817074,1.701717,1390245.0,816965.75657
3,2018-06-01 00:03:00,817066,817541,816883,817462,3.546464,2898951.0,817419.97322
4,2018-06-01 00:04:00,817903,818000,817348,818000,9.823,8033058.0,817780.58868


In [42]:
df.head(10)

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,2018-06-01 00:00:00,816163,816460,815700,816460,9.137702,7456346.0,815997.92105
1,2018-06-01 00:01:00,816460,816999,816438,816563,4.359824,3560639.0,816693.3285
2,2018-06-01 00:02:00,816910,817079,816741,817074,1.701717,1390245.0,816965.75657
3,2018-06-01 00:03:00,817066,817541,816883,817462,3.546464,2898951.0,817419.97322
4,2018-06-01 00:04:00,817903,818000,817348,818000,9.823,8033058.0,817780.58868
5,2018-06-01 00:05:00,818000,818462,817836,818434,4.893215,4003442.0,818161.85934
6,2018-06-01 00:06:00,818544,818544,818434,818500,1.332351,1090443.0,818435.60924
7,2018-06-01 00:07:00,818437,818900,818372,818432,7.093076,5805792.0,818515.5117
8,2018-06-01 00:08:00,818232,820790,817438,819665,23.945281,19631050.0,819829.65286
9,2018-06-01 00:09:00,819669,819669,817863,819296,6.429505,5264810.0,818851.52641


In [43]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37441 entries, 0 to 37440
Data columns (total 8 columns):
Timestamp            37441 non-null object
Open                 37441 non-null int64
High                 37441 non-null int64
Low                  37441 non-null int64
Close                37441 non-null int64
Volume_(BTC)         37441 non-null float64
Volume_(Currency)    37441 non-null float64
Weighted_Price       37441 non-null float64
dtypes: float64(3), int64(4), object(1)
memory usage: 2.3+ MB


In [44]:
# timestamp is an object, let's convert it to a real time stamp

df.Timestamp = pd.to_datetime(df.Timestamp)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37441 entries, 0 to 37440
Data columns (total 8 columns):
Timestamp            37441 non-null datetime64[ns]
Open                 37441 non-null int64
High                 37441 non-null int64
Low                  37441 non-null int64
Close                37441 non-null int64
Volume_(BTC)         37441 non-null float64
Volume_(Currency)    37441 non-null float64
Weighted_Price       37441 non-null float64
dtypes: datetime64[ns](1), float64(3), int64(4)
memory usage: 2.3 MB


In [46]:
# let's find out the range of timestamps and pick a subset

df.Timestamp.describe()

count                   37441
unique                  37441
top       2018-06-26 15:40:00
freq                        1
first     2018-06-01 00:00:00
last      2018-06-27 00:00:00
Name: Timestamp, dtype: object

In [47]:
# pick June 13 using loc

df.loc[(df.Timestamp > '2018-06-12 23:59:59') & (df.Timestamp < '2018-06-14 00:00:00')]

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
17280,2018-06-13 00:00:00,729990,729990,729183,729183,3.827011,2.792179e+06,729597.74254
17281,2018-06-13 00:01:00,729121,729200,728576,729088,9.632921,7.021360e+06,728892.03768
17282,2018-06-13 00:02:00,729249,729500,729078,729463,13.908899,1.014184e+07,729161.64858
17283,2018-06-13 00:03:00,729355,730500,729355,729369,19.036052,1.389209e+07,729777.80865
17284,2018-06-13 00:04:00,729426,729675,729200,729232,5.133613,3.744046e+06,729319.95599
17285,2018-06-13 00:05:00,729541,729727,729300,729300,3.093560,2.256407e+06,729388.23394
17286,2018-06-13 00:06:00,729233,729460,728495,728958,9.745344,7.104786e+06,729044.16674
17287,2018-06-13 00:07:00,728904,728945,727233,728729,16.498750,1.201528e+07,728254.12141
17288,2018-06-13 00:08:00,728729,729981,728471,729981,12.929728,9.428490e+06,729210.21449
17289,2018-06-13 00:09:00,729721,730394,729293,730321,20.800977,1.518265e+07,729900.96093


In [48]:
# pick June 13 and any two columns using loc

df.loc[(df.Timestamp > '2018-06-12 23:59:59') & (df.Timestamp < '2018-06-14 00:00:00'), ['Open', 'Close']]

Unnamed: 0,Open,Close
17280,729990,729183
17281,729121,729088
17282,729249,729463
17283,729355,729369
17284,729426,729232
17285,729541,729300
17286,729233,728958
17287,728904,728729
17288,728729,729981
17289,729721,730321
