# Finantial Times Scraping

### Inicializações

In [10]:
import pandas as pd
from datetime import datetime, timedelta
import requests


### Scrapping Exchange Rates from www.exchangerates.org.uk

In [11]:
url = 'https://www.exchangerates.org.uk/USD-EUR-exchange-rate-history.html'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}

response = requests.get(url, headers=headers)
# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the HTML content
    df = pd.read_html(response.content, skiprows=[0,1,182])[0][[0,1]].rename(columns={0:'Date',1:'USD2EUR'})
    # Your further processing here
else:
    raise ValueError(f"Failed to retrieve the webpage. Status code: {response.status_code}")


In [12]:
exchange_rates_csv = 'exchange_rates.csv'
#df = pd.read_html(url, skiprows=[0,1,182])[0][[0,1]].rename(columns={0:'Date',1:'USD2EUR'})
df['Date'] = df['Date'].apply(lambda x: pd.to_datetime(x).date())
df['USD2EUR'] = df['USD2EUR'].apply(lambda x: float(x[8:15]))
df.set_index('Date',inplace=True)

df_historic_usd2eur = pd.read_csv(exchange_rates_csv,sep=';',parse_dates=True)
df_historic_usd2eur['Date'] = df_historic_usd2eur['Date'].apply(lambda x: pd.to_datetime(x).date())
df_historic_usd2eur.set_index('Date',inplace=True)

df_historic_usd2eur = pd.concat([df.loc[:max(df_historic_usd2eur.index)][:-1],df_historic_usd2eur])
df_historic_usd2eur.to_csv(exchange_rates_csv,sep=';')

### Update Portofolio From Transactions

In [13]:
#transacoes_csv = 'D:\\Personal\\Python\\Fundos\\transacoes.csv'
transacoes_csv = 'transacoes.csv'

encoding='latin_1'
thousands = ','
decimal = '.'
to_date = lambda d: datetime.strptime(d, '%d-%m-%Y')
converters={'Data de subscrição': to_date}

portfolio_csv = 'portofolio.csv'
df_portofolio = pd.read_csv(transacoes_csv ,sep=',',encoding=encoding,thousands=thousands, decimal=decimal, converters=converters)
df_portofolio = df_portofolio[pd.isnull(df_portofolio['Data de resgate'])].groupby(['Code','Nome','Moeda'])['Quantidade'].sum().reset_index()
df_portofolio.to_csv(portfolio_csv,sep=';',index=False)
df_portofolio.sort_values(by='Nome')

Unnamed: 0,Code,Nome,Moeda,Quantidade
24,LU1883314244,Amundi Europ Value A C,EUR,24.056
1,IE00B23S7K36,BNY Mellon Brazil Equity,EUR,3835.96
22,LU1495982271,Black Strat Asia Pacif Abs E2,EUR,111.73
5,LU0108459040,Candriam Biotech C Cap USD,USD,12.65
7,LU0140363697,F Technology N,EUR,315.377
2,IE00BD4GTQ32,FTGF ClearBridge Infra Val,EUR,303.229
13,LU0286668966,Fidelity Asian High Yield EUR,EUR,713.7
10,LU0197230542,Fidelity India Focus A,EUR,76.31
15,LU0333810850,GS India Equity,EUR,114.129
11,LU0213962813,HSBC Turkey Equity Eur,EUR,181.129


### Update wiht scrapping Quotes from FT 

In [14]:
url = 'https://markets.ft.com/data/funds/tearsheet/historical?s={}:{}'
xls = 'historico_cotacoes.xlsx'
to_date = lambda d: datetime.strptime(d, '%Y-%m-%d')
converters={'date': to_date}

novo_dict_df = dict()

dict_df = {key.strip():value for key,value in pd.read_excel(xls,sheet_name=None).items()}

for symbol, df in dict_df.items():
    if symbol in df_portofolio['Code'].tolist():
        dict_df[symbol]['Date'] = dict_df[symbol]['Date'].apply(lambda x: x.date())
        novo_dict_df[symbol] = dict_df[symbol].set_index('Date',drop=True).sort_index(ascending=False)
    

for symbol, _, currency, _ in df_portofolio.itertuples(index=False):

    # try to get data from a csv dwoloaded from Morningstar
    try:
        path = '.\\cotacoes_morningstar\\{}.csv'.format(symbol)
        df = pd.read_csv(path ,sep=';',encoding=encoding,thousands=thousands, decimal=decimal, converters=converters)[['date','price']]
        df['date'] = df['date'].apply(lambda x: x.date())
        df = df.set_index('date',drop=True).sort_index(ascending=False).rename(columns={'price':'Close'})
        df.index.rename('Date',inplace=True)
        if symbol in dict_df.keys():
            novo_dict_df[symbol] = pd.concat([novo_dict_df[symbol], df.loc[df.index.difference(novo_dict_df[symbol].index)]]).sort_index(ascending=False)
        else:
            novo_dict_df[symbol] = df
    except:
        print("Warning: No "+path)

    # try to data from FT
    try:
        url = 'https://markets.ft.com/data/funds/tearsheet/historical?s={}:{}'.format(symbol,currency)
        df = pd.read_html(url)[0]
        df['Date'] = df['Date'].apply(lambda x: x[:-17]).apply(lambda x: pd.to_datetime(x).date())
        df = df.set_index('Date',drop=True)[['Close']]
        novo_dict_df[symbol] = pd.concat([novo_dict_df[symbol], df.loc[df.index.difference(novo_dict_df[symbol].index)]]).sort_index(ascending=False)
    except:
        print("Warning: Invalid "+url)
        



In [15]:
with pd.ExcelWriter(xls, engine="openpyxl") as writer:
    for symbol, df in novo_dict_df.items():
        df.to_excel(writer, sheet_name=symbol)
print(datetime.now())

2023-12-27 13:20:01.669221


In [16]:
df_portofolio.sort_values(by='Nome')

Unnamed: 0,Code,Nome,Moeda,Quantidade
24,LU1883314244,Amundi Europ Value A C,EUR,24.056
1,IE00B23S7K36,BNY Mellon Brazil Equity,EUR,3835.96
22,LU1495982271,Black Strat Asia Pacif Abs E2,EUR,111.73
5,LU0108459040,Candriam Biotech C Cap USD,USD,12.65
7,LU0140363697,F Technology N,EUR,315.377
2,IE00BD4GTQ32,FTGF ClearBridge Infra Val,EUR,303.229
13,LU0286668966,Fidelity Asian High Yield EUR,EUR,713.7
10,LU0197230542,Fidelity India Focus A,EUR,76.31
15,LU0333810850,GS India Equity,EUR,114.129
11,LU0213962813,HSBC Turkey Equity Eur,EUR,181.129
