In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [2]:
# Import all csv files with exchange data
# Add a column with currency to each one
# Concat all them into one DataFrame named exchange

import glob
import re

files = glob.glob('data/exchange market/*.csv')

curr_names = []
curr_list = []
for file in files:
    name = re.findall('\\\(.+).csv', file)
    name_string = ''.join(name)
    curr_names.append(name_string)
    df = pd.read_csv(file, index_col=None, header=0)
    df['Currency'] = name_string
    curr_list.append(df)

exchange = pd.concat(curr_list, axis=0, ignore_index=True)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




In [3]:
exchange.tail()

Unnamed: 0,% var.,Apertura,Currency,Fecha,Máximo,Mínimo,Vol.,Último
11274,"-0,25%",14619,EUR_USD,29.09.2009,14646,14526,,14585
11275,"-0,46%",14699,EUR_USD,28.09.2009,14722,14560,,14621
11276,"0,18%",14664,EUR_USD,25.09.2009,14726,14614,,14688
11277,"-0,48%",14710,EUR_USD,24.09.2009,14806,14628,,14661
11278,"-0,40%",14790,EUR_USD,23.09.2009,14844,14725,,14731


In [4]:
exchange.columns = ['Var_%','Open', 'Currency', 'Date', 'Max', 'Min', 'Vol', 'Last']

In [5]:
exchange.head()

Unnamed: 0,Var_%,Open,Currency,Date,Max,Min,Vol,Last
0,"-0,58%","7.476,8",Bitcoin,24.10.2019,"7.501,5","7.366,3","895,39K","7.433,3"
1,"-6,90%","8.031,0",Bitcoin,23.10.2019,"8.055,0","7.329,2","892,08K","7.477,0"
2,"-2,15%","8.207,4",Bitcoin,22.10.2019,"8.289,1","8.017,7","570,39K","8.031,4"
3,"-0,18%","8.224,1",Bitcoin,21.10.2019,"8.315,3","8.155,7","489,40K","8.208,2"
4,"3,34%","7.957,4",Bitcoin,20.10.2019,"8.288,5","7.889,6","513,91K","8.223,4"


In [6]:
exchange['Date'] = pd.to_datetime(exchange['Date'], format='%d.%m.%Y')
exchange.head()

Unnamed: 0,Var_%,Open,Currency,Date,Max,Min,Vol,Last
0,"-0,58%","7.476,8",Bitcoin,2019-10-24,"7.501,5","7.366,3","895,39K","7.433,3"
1,"-6,90%","8.031,0",Bitcoin,2019-10-23,"8.055,0","7.329,2","892,08K","7.477,0"
2,"-2,15%","8.207,4",Bitcoin,2019-10-22,"8.289,1","8.017,7","570,39K","8.031,4"
3,"-0,18%","8.224,1",Bitcoin,2019-10-21,"8.315,3","8.155,7","489,40K","8.208,2"
4,"3,34%","7.957,4",Bitcoin,2019-10-20,"8.288,5","7.889,6","513,91K","8.223,4"


In [7]:
def number_formatting(x):
    a = x.replace('.','')
    return a.replace(',','.')

for col in ['Last', 'Open', 'Max', 'Min']:
    exchange[col] = pd.to_numeric(exchange[col].apply(number_formatting))
       
exchange.head()

Unnamed: 0,Var_%,Open,Currency,Date,Max,Min,Vol,Last
0,"-0,58%",7476.8,Bitcoin,2019-10-24,7501.5,7366.3,"895,39K",7433.3
1,"-6,90%",8031.0,Bitcoin,2019-10-23,8055.0,7329.2,"892,08K",7477.0
2,"-2,15%",8207.4,Bitcoin,2019-10-22,8289.1,8017.7,"570,39K",8031.4
3,"-0,18%",8224.1,Bitcoin,2019-10-21,8315.3,8155.7,"489,40K",8208.2
4,"3,34%",7957.4,Bitcoin,2019-10-20,8288.5,7889.6,"513,91K",8223.4


In [8]:
exchange.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11279 entries, 0 to 11278
Data columns (total 8 columns):
Var_%       11279 non-null object
Open        11279 non-null float64
Currency    11279 non-null object
Date        11279 non-null datetime64[ns]
Max         11279 non-null float64
Min         11279 non-null float64
Vol         3386 non-null object
Last        11279 non-null float64
dtypes: datetime64[ns](1), float64(4), object(3)
memory usage: 705.0+ KB


In [9]:
def var_reformatting(x):
    a = x.replace(',','.')
    return a.replace('%','')

exchange['Var_%'] =pd.to_numeric(exchange['Var_%'].apply(var_reformatting))

In [10]:
exchange.head()

Unnamed: 0,Var_%,Open,Currency,Date,Max,Min,Vol,Last
0,-0.58,7476.8,Bitcoin,2019-10-24,7501.5,7366.3,"895,39K",7433.3
1,-6.9,8031.0,Bitcoin,2019-10-23,8055.0,7329.2,"892,08K",7477.0
2,-2.15,8207.4,Bitcoin,2019-10-22,8289.1,8017.7,"570,39K",8031.4
3,-0.18,8224.1,Bitcoin,2019-10-21,8315.3,8155.7,"489,40K",8208.2
4,3.34,7957.4,Bitcoin,2019-10-20,8288.5,7889.6,"513,91K",8223.4


In [11]:
exchange['Currency'].unique()

array(['Bitcoin', 'EUR_CNY', 'EUR_GBP', 'EUR_USD'], dtype=object)

In [12]:
bitcoin_df = exchange.loc[exchange['Currency']=='Bitcoin']
EUR_CNY_df = exchange.loc[exchange['Currency']=='EUR_CNY']
EUR_GBP_df = exchange.loc[exchange['Currency']=='EUR_GBP']
EUR_USD_df = exchange.loc[exchange['Currency']=='EUR_USD']


In [13]:
def volume_change(x):
    if 'K' in x:
        a = x.replace('K','')
        return a.replace(',','.')
    if 'M' in x:
        a = x.replace('M','')
        return a.replace(',','000.')
    

In [14]:
bitcoin_df['Vol'] = pd.to_numeric(bitcoin_df['Vol'].apply(volume_change))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [18]:
EUR_CNY_df['Vol'] = pd.to_numeric(bitcoin_df['Vol'].apply(volume_change))

TypeError: argument of type 'float' is not iterable

In [None]:
EUR_GBP_df