In [1]:
import pandas as pd
import numpy as np
from datetime import datetime as dt
import glob
import os

## Example

In [2]:
data = pd.read_csv('.1coinUSD.csv', header=None)
data.rename(columns={0:'date', 1:'price', 2:'volume'}, inplace=True)
data['date'] = pd.to_datetime(data['date'],unit='s')

# Main

In [2]:
gz_files = glob.glob('C:/Users/Rocku/Dropbox/Data_analysis/mtgox/Bitcoin tick/*.gz')
gz_files

['C:/Users/Rocku/Dropbox/Data_analysis/mtgox/Bitcoin tick\\1coinUSD.csv.gz',
 'C:/Users/Rocku/Dropbox/Data_analysis/mtgox/Bitcoin tick\\allcoinUSD.csv.gz',
 'C:/Users/Rocku/Dropbox/Data_analysis/mtgox/Bitcoin tick\\anxhkAUD.csv.gz',
 'C:/Users/Rocku/Dropbox/Data_analysis/mtgox/Bitcoin tick\\anxhkCAD.csv.gz',
 'C:/Users/Rocku/Dropbox/Data_analysis/mtgox/Bitcoin tick\\anxhkCHF.csv.gz',
 'C:/Users/Rocku/Dropbox/Data_analysis/mtgox/Bitcoin tick\\anxhkCNY.csv.gz',
 'C:/Users/Rocku/Dropbox/Data_analysis/mtgox/Bitcoin tick\\anxhkEUR.csv.gz',
 'C:/Users/Rocku/Dropbox/Data_analysis/mtgox/Bitcoin tick\\anxhkGBP.csv.gz',
 'C:/Users/Rocku/Dropbox/Data_analysis/mtgox/Bitcoin tick\\anxhkHKD.csv.gz',
 'C:/Users/Rocku/Dropbox/Data_analysis/mtgox/Bitcoin tick\\anxhkJPY.csv.gz',
 'C:/Users/Rocku/Dropbox/Data_analysis/mtgox/Bitcoin tick\\anxhkNZD.csv.gz',
 'C:/Users/Rocku/Dropbox/Data_analysis/mtgox/Bitcoin tick\\anxhkSGD.csv.gz',
 'C:/Users/Rocku/Dropbox/Data_analysis/mtgox/Bitcoin tick\\anxhkUSD.csv.gz

In [3]:
columns =['Exchange', 'Dates', 'Currency','Datapoints', 'Mean_price', 'Mean_volume', 'Total_volume',
          'Min_volume', 'Max_volume','First_date', 'Last_date', 'TicksPerDaily', 'VolumePerDaily'
         ]
df_described = pd.DataFrame(columns=columns)

success = 0
failed = 0
failed_name = []

print('------------start -------------')
for i, gz_file in enumerate(gz_files):
#     print(gz_file)
    if (i%10 == 1):
        print('total:', success+failed, 'success:', success, 'failed:', failed)
    
    try:            
        data = pd.read_csv(gz_file, compression='gzip',sep=',', header=None)
        data.rename(columns={0:'date', 1:'price', 2:'volume'}, inplace=True)
        data['date'] = pd.to_datetime(data['date'],unit='s')
        described = data.describe()

        data_dict = {}
        data_dict['Exchange'] = gz_file.split('\\')[-1].split('.')[0]
        data_dict['Currency'] = data_dict['Exchange'][-3:]
        data_dict['Datapoints'] = described.loc['count','price']
        data_dict['Mean_price'] = described.loc['mean','price']
        data_dict['Mean_volume'] = described.loc['mean','volume']
        data_dict['Total_volume'] = data['volume'].sum()
        data_dict['Min_volume'] = described.loc['min','volume']
        data_dict['Max_volume'] = described.loc['max','volume']
        data_dict['First_date'] = str(data.loc[0,'date'])
        data_dict['Last_date'] = str(data.loc[len(data)-1,'date'])
        data_dict['Dates'] = data.loc[len(data)-1,'date'] - data.loc[0,'date']
        data_dict['TicksPerDaily'] = data_dict['Datapoints'] / (data_dict['Dates']/ np.timedelta64(1, 'D')).astype(int)
        data_dict['VolumePerDaily'] = data_dict['Total_volume'] / (data_dict['Dates']/ np.timedelta64(1, 'D')).astype(int)
        del data

        df_new = pd.DataFrame([data_dict],columns=columns)
        df_described = df_described.append(df_new)
        success += 1
        
        
    except:
        failed += 1
        failed_name.append(gz_file)
    
print('------------end -------------')

------------start -------------
total: 1 success: 1 failed: 0
total: 11 success: 11 failed: 0
total: 21 success: 21 failed: 0
total: 31 success: 29 failed: 2
total: 41 success: 37 failed: 4
total: 51 success: 47 failed: 4
total: 61 success: 57 failed: 4
total: 71 success: 67 failed: 4
total: 81 success: 77 failed: 4
total: 91 success: 86 failed: 5
total: 101 success: 93 failed: 8
total: 111 success: 103 failed: 8
total: 121 success: 113 failed: 8
total: 131 success: 121 failed: 10
total: 141 success: 131 failed: 10
total: 151 success: 140 failed: 11
total: 161 success: 150 failed: 11
total: 171 success: 160 failed: 11
total: 181 success: 170 failed: 11
total: 191 success: 180 failed: 11
total: 201 success: 190 failed: 11
total: 211 success: 199 failed: 12
total: 221 success: 208 failed: 13
total: 231 success: 218 failed: 13
total: 241 success: 228 failed: 13
total: 251 success: 231 failed: 20
total: 261 success: 233 failed: 28
total: 271 success: 240 failed: 31
------------end --------

In [None]:
df_described.sort_values(by='Total_volume',ascending=False, inplace=True)

In [None]:
df_described.to_csv('summary_stat_bitcoin_exchanges.csv', header='column_names', index=False, encoding='utf-8')

In [None]:
group_by_currency = df_described.groupby('Currency')

In [None]:
group_by_currency.agg('count').sort_values(by='Exchange', ascending=False)

In [None]:
data = group_by_currency.aggregate(['count','mean','std'])

In [None]:
data.to_csv('data.csv')