# Regex

In [1]:
import pandas as pd 
import requests 
import io 
import zipfile 
from io import BytesIO
import os

myurl="https://www.forexite.com/free_forex_quotes/2011/11/011111.zip"

dirForexite="data/raw/FX/intraday/forexite"
if not os.path.exists(dirForexite):
    os.makedirs("data/raw/FX/intraday/forexite")
    
myfile='data/raw/FX/intraday/forexite/011111.zip' 


In [2]:
FX=pd.read_csv(myurl)
FX.to_parquet("data/raw/FX/intraday/forexite/011111.parquet")     # requires fastparquet package
FX  

Unnamed: 0,<TICKER>,<DTYYYYMMDD>,<TIME>,<OPEN>,<HIGH>,<LOW>,<CLOSE>
0,EURUSD,20111101,100,1.3841,1.3842,1.3841,1.3841
1,EURUSD,20111101,200,1.3841,1.3841,1.3840,1.3840
2,EURUSD,20111101,300,1.3841,1.3841,1.3835,1.3836
3,EURUSD,20111101,400,1.3836,1.3836,1.3836,1.3836
4,EURUSD,20111101,500,1.3837,1.3837,1.3833,1.3835
...,...,...,...,...,...,...,...
73184,USXUSD,20111101,235600,77.3600,77.4200,77.3600,77.3900
73185,USXUSD,20111101,235700,77.3900,77.3900,77.3800,77.3800
73186,USXUSD,20111101,235800,77.3800,77.3900,77.3800,77.3900
73187,USXUSD,20111101,235900,77.4000,77.4200,77.4000,77.4200


In [3]:
import re
FX.columns=[re.sub("(<|>)","",mycol) for mycol in FX.columns]   # replaces < or > in the column names

In [4]:
# the pandas way
FX["TICKER"].replace('^(\w{3})(\w{3})$','\\1/\\2',regex=True,inplace=True)

In [5]:
FX["TIME"]=["%06d" % myval for myval in FX["TIME"].values.astype(int)]   # adds the padding 0s in the TIME column


In [6]:
# looks good 
FX

Unnamed: 0,TICKER,DTYYYYMMDD,TIME,OPEN,HIGH,LOW,CLOSE
0,EUR/USD,20111101,000100,1.3841,1.3842,1.3841,1.3841
1,EUR/USD,20111101,000200,1.3841,1.3841,1.3840,1.3840
2,EUR/USD,20111101,000300,1.3841,1.3841,1.3835,1.3836
3,EUR/USD,20111101,000400,1.3836,1.3836,1.3836,1.3836
4,EUR/USD,20111101,000500,1.3837,1.3837,1.3833,1.3835
...,...,...,...,...,...,...,...
73184,USX/USD,20111101,235600,77.3600,77.4200,77.3600,77.3900
73185,USX/USD,20111101,235700,77.3900,77.3900,77.3800,77.3800
73186,USX/USD,20111101,235800,77.3800,77.3900,77.3800,77.3900
73187,USX/USD,20111101,235900,77.4000,77.4200,77.4000,77.4200


In [7]:
FX.index=pd.to_datetime(FX["DTYYYYMMDD"].map(str)+" "+FX["TIME"],
               format="%Y%m%d %H:%M:%S",
              utc=True)                              # converts the DTYYYYMMDD and TIME columns into datetime, and sets the index

# Categorical plots


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters  # avoids a warning about timezones
register_matplotlib_converters()

sns.set()   # set default style

sns.lineplot(x=FX.index, y="CLOSE",  hue="TICKER",data=FX)     # one color (hue) per TICKER
plt.xticks(rotation=45)                                        # avoids overlapping x labels
plt.ylabel(r'p_t')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)  # it is quite hard to see anything because of the large diversity of y scales
plt.savefig("FX_notscaled.pdf")

In [None]:
normalize_price=lambda x: x/x.iloc[0]   # divides a vector by its first value. Note the x.iloc, because the transform receives a pandas
FX["normalized_close"]=FX[ ["TICKER","CLOSE"] ].groupby("TICKER").transform(normalize_price)   

In [None]:
sns.lineplot(x=FX.index, y="normalized_close",hue="TICKER",data=FX)
plt.xticks(rotation=45) 
plt.ylabel(r'\frac{p_t}{p_0}')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)