# Storytelling Data Visualization on Exchange Rates

dataset
- Euro daily exchange rates (1999-2021)
- data source is the European Central Bank

project goal
- explanatory data viz
- use Gestalt principles and FiveThirtyEight style

In [1]:
import numpy as np
import pandas as pd
import datetime as dt

import matplotlib.pyplot as plt
import matplotlib.style as style
%matplotlib inline
style.use('fivethirtyeight')

## 1. The Euro Daily Exchange Rates Dataset

In [2]:
exchange_rates = pd.read_csv('euro-daily-hist_1999_2020.csv')

In [3]:
exchange_rates[:5]

Unnamed: 0,Period\Unit:,[Australian dollar ],[Bulgarian lev ],[Brazilian real ],[Canadian dollar ],[Swiss franc ],[Chinese yuan renminbi ],[Cypriot pound ],[Czech koruna ],[Danish krone ],...,[Romanian leu ],[Russian rouble ],[Swedish krona ],[Singapore dollar ],[Slovenian tolar ],[Slovak koruna ],[Thai baht ],[Turkish lira ],[US dollar ],[South African rand ]
0,2021-01-08,1.5758,1.9558,6.5748,1.5543,1.0827,7.9184,,26.163,7.4369,...,4.8708,90.8,10.051,1.6228,,,36.848,9.0146,1.225,18.7212
1,2021-01-07,1.5836,1.9558,6.5172,1.5601,1.0833,7.9392,,26.147,7.4392,...,4.8712,91.2,10.0575,1.6253,,,36.859,8.9987,1.2276,18.7919
2,2021-01-06,1.5824,1.9558,6.5119,1.564,1.0821,7.9653,,26.145,7.4393,...,4.872,90.8175,10.0653,1.6246,,,36.921,9.0554,1.2338,18.5123
3,2021-01-05,1.5927,1.9558,6.5517,1.5651,1.0803,7.9315,,26.227,7.4387,...,4.8721,91.6715,10.057,1.618,,,36.776,9.0694,1.2271,18.4194
4,2021-01-04,1.5928,1.9558,6.3241,1.5621,1.0811,7.9484,,26.141,7.4379,...,4.8713,90.342,10.0895,1.6198,,,36.728,9.0579,1.2296,17.9214


In [4]:
exchange_rates[-5:]

Unnamed: 0,Period\Unit:,[Australian dollar ],[Bulgarian lev ],[Brazilian real ],[Canadian dollar ],[Swiss franc ],[Chinese yuan renminbi ],[Cypriot pound ],[Czech koruna ],[Danish krone ],...,[Romanian leu ],[Russian rouble ],[Swedish krona ],[Singapore dollar ],[Slovenian tolar ],[Slovak koruna ],[Thai baht ],[Turkish lira ],[US dollar ],[South African rand ]
5694,1999-01-08,1.8406,,,1.7643,1.6138,,0.58187,34.938,7.4433,...,1.3143,27.2075,9.165,1.9537,188.84,42.56,42.559,0.3718,1.1659,6.7855
5695,1999-01-07,1.8474,,,1.7602,1.6165,,0.58187,34.886,7.4431,...,1.3092,26.9876,9.18,1.9436,188.8,42.765,42.1678,0.3701,1.1632,6.8283
5696,1999-01-06,1.882,,,1.7711,1.6116,,0.582,34.85,7.4452,...,1.3168,27.4315,9.305,1.9699,188.7,42.778,42.6949,0.3722,1.1743,6.7307
5697,1999-01-05,1.8944,,,1.7965,1.6123,,0.5823,34.917,7.4495,...,1.3168,26.5876,9.4025,1.9655,188.775,42.848,42.5048,0.3728,1.179,6.7975
5698,1999-01-04,1.91,,,1.8004,1.6168,,0.58231,35.107,7.4501,...,1.3111,25.2875,9.4696,1.9554,189.045,42.991,42.6799,0.3723,1.1789,6.9358


In [5]:
exchange_rates.describe()

Unnamed: 0,[Iceland krona ],[Romanian leu ],[Turkish lira ]
count,3292.0,5637.0,5637.0
mean,100.764678,3.867979,2.689991
std,32.461566,0.86242,1.867694
min,68.07,1.2912,0.3701
25%,81.075,3.5162,1.68
50%,87.86,4.2142,2.103
75%,122.525,4.4768,3.0593
max,305.0,4.8768,10.1489


In [6]:
exchange_rates.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5699 entries, 0 to 5698
Data columns (total 41 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Period\Unit:              5699 non-null   object 
 1   [Australian dollar ]      5699 non-null   object 
 2   [Bulgarian lev ]          5297 non-null   object 
 3   [Brazilian real ]         5431 non-null   object 
 4   [Canadian dollar ]        5699 non-null   object 
 5   [Swiss franc ]            5699 non-null   object 
 6   [Chinese yuan renminbi ]  5431 non-null   object 
 7   [Cypriot pound ]          2346 non-null   object 
 8   [Czech koruna ]           5699 non-null   object 
 9   [Danish krone ]           5699 non-null   object 
 10  [Estonian kroon ]         3130 non-null   object 
 11  [UK pound sterling ]      5699 non-null   object 
 12  [Greek drachma ]          520 non-null    object 
 13  [Hong Kong dollar ]       5699 non-null   object 
 14  [Croatia

#### Observations
- col names
    - remove [ ] 
    - remove trailing spaces before ]
    - replace space with _
- ~~Cypriot pound and Slovenian tolar, Slovak tolar, Bulgarian lev, Brazilian real, Chinese yuan renminbi --> what percent is NaN?~~ *vizualing USD-to-EUR*
- from .describe(): 3 cols numeric? 
    - need to convert to float and look at .describe() again
- .info() confirms that 
    - 3 cols float
    - 38 cols object
- `Period\Unit:` not datetime, but uniform format? 2021-01-08
- 40 cols = currencies
- ~ 5,700 entries
- ~~high % data null:~~ *vizualing USD-to-EUR*
    - ~~12  [Greek drachma ]...520 non-null    object~~
    - ~~35  [Slovenian tolar ]...2085 non-null   object~~ 
    - ~~24  [Maltese lira ]........2346 non-null   object~~   
    - ~~07  [Cypriot pound ]...2346 non-null   object~~ 
    - ~~36  [Slovak koruna ]...2608 non-null   object~~ 
- ~~Malta and Cypriot have the same num of entries, but are two separate countries, "The political relations are close due to similarities between the 2 countries (historical, economical and regional)."  [Wikipedia: Cyprus–Malta relations](https://en.wikipedia.org/wiki/Cyprus%E2%80%93Malta_relations)~~





    



# 2. Data Cleaning

In [7]:
exchange_rates[:2]

Unnamed: 0,Period\Unit:,[Australian dollar ],[Bulgarian lev ],[Brazilian real ],[Canadian dollar ],[Swiss franc ],[Chinese yuan renminbi ],[Cypriot pound ],[Czech koruna ],[Danish krone ],...,[Romanian leu ],[Russian rouble ],[Swedish krona ],[Singapore dollar ],[Slovenian tolar ],[Slovak koruna ],[Thai baht ],[Turkish lira ],[US dollar ],[South African rand ]
0,2021-01-08,1.5758,1.9558,6.5748,1.5543,1.0827,7.9184,,26.163,7.4369,...,4.8708,90.8,10.051,1.6228,,,36.848,9.0146,1.225,18.7212
1,2021-01-07,1.5836,1.9558,6.5172,1.5601,1.0833,7.9392,,26.147,7.4392,...,4.8712,91.2,10.0575,1.6253,,,36.859,8.9987,1.2276,18.7919


In [14]:
# rename cols (us_dollar, date)
# change date to dt
# sort by date
# reset index (dropping initial index)

exchange_rates.rename(columns={'[US dollar ]':'us_dollar',
                               'Period\\Unit:': 'date'}, #second \ needed to avoid unicode error
                      inplace=True)
exchange_rates['date'] = pd.to_datetime(exchange_rates['date'])
exchange_rates.sort_values('date', inplace=True)
exchange_rates.reset_index(drop=True, inplace=True)

In [23]:
# isolate dat and us_dollar, inspect series for anomolies
# drop us_dollar rows with -
# convert us_dollar to float

euro_to_dollar = exchange_rates[['date', 'us_dollar']]
# euro_to_dollar['us_dollar'].value_counts() #62 rows have - in exchange rate col

euro_to_dollar = euro_to_dollar[~euro_to_dollar['us_dollar'].str.contains('-')]
# euro_to_dollar['us_dollar'].value_counts() # second time to check 

euro_to_dollar['us_dollar'] = euro_to_dollar['us_dollar'].astype(float)
euro_to_dollar['us_dollar'].value_counts() # third time to check 


us_dollar
1.2276    9
1.1215    8
1.1305    7
1.0867    6
1.3373    6
         ..
1.1892    1
1.1869    1
1.1752    1
1.1770    1
1.1750    1
Name: count, Length: 3527, dtype: int64