# Web-scraping the data through aplha vantage api for multiple forex currencies for the last ten years 

In [11]:
import requests
import pandas as pd
from datetime import datetime, timedelta

# Replace with your Alpha Vantage API key
api_key = 'YOUR_API_KEY'

# Function to fetch forex data
def fetch_forex_data(from_currency, to_currency):
    url = f'https://www.alphavantage.co/query'
    params = {
        'function': 'FX_DAILY',
        'from_symbol': from_currency,
        'to_symbol': to_currency,
        'apikey': api_key,
        'outputsize': 'full'
    }
    response = requests.get(url, params=params)
    data = response.json()
    
    # Extracting the time series data
    time_series = data.get('Time Series FX (Daily)', {})
    
    # Converting to DataFrame
    df = pd.DataFrame.from_dict(time_series, orient='index')
    df.index = pd.to_datetime(df.index)
    df = df.sort_index()
    
    # Adding a column for the currency pair
    df['Currency Pair'] = f'{from_currency}/{to_currency}'
    
    # Filtering data for the last 10 years
    ten_years_ago = datetime.now() - timedelta(days=365*20)
    df = df[df.index >= ten_years_ago]
    
    return df

# List of common currency pairs to fetch
currency_pairs = [
    ('USD', 'INR'), ('EUR', 'USD'), ('GBP', 'USD'), ('USD', 'JPY'), ('AUD', 'USD'),
    ('USD', 'CAD'), ('USD', 'CHF'), ('NZD', 'USD'), ('EUR', 'GBP'), ('EUR', 'JPY'),
    ('GBP', 'JPY'), ('AUD', 'JPY'), ('EUR', 'AUD'), ('GBP', 'AUD'), ('USD', 'SGD')
]

# Fetching data for all currency pairs and combining into a single DataFrame
all_data = pd.DataFrame()
for from_currency, to_currency in currency_pairs:
    forex_data = fetch_forex_data(from_currency, to_currency)
    all_data = pd.concat([all_data, forex_data])

# Resetting index for the combined DataFrame
all_data.reset_index(inplace=True)
all_data.rename(columns={'index': 'Date',"1. open":"Open_price","2. high":"Day_high","3. low":"Day_low","4. close":"Closing_price"}, inplace=True)

print(all_data)


            Date Open_price   Day_high    Day_low Closing_price Currency Pair
0     2014-11-07   61.39000   61.62000   61.34000      61.40000       USD/INR
1     2014-11-10   61.50000   61.63500   61.34000      61.49500       USD/INR
2     2014-11-11   61.53000   61.55500   61.50500      61.50800       USD/INR
3     2014-11-12   61.50800   61.56000   61.35000      61.39100       USD/INR
4     2014-11-13   61.36800   61.62300   61.35000      61.56500       USD/INR
...          ...        ...        ...        ...           ...           ...
28652 2024-09-15  184.92000  185.86600  183.80200     184.92000       GBP/JPY
28653 2024-09-16  185.98500  187.13800  185.31900     185.98500       GBP/JPY
28654 2024-09-17  186.98900  187.97200  185.84599     186.98900       GBP/JPY
28655 2024-09-18  187.78500  188.97200  187.39500     188.81300       GBP/JPY
28656 2024-09-19  188.32400  190.38900  188.16200     189.64100       GBP/JPY

[28657 rows x 6 columns]


In [12]:
all_data.head()

Unnamed: 0,Date,Open_price,Day_high,Day_low,Closing_price,Currency Pair
0,2014-11-07,61.39,61.62,61.34,61.4,USD/INR
1,2014-11-10,61.5,61.635,61.34,61.495,USD/INR
2,2014-11-11,61.53,61.555,61.505,61.508,USD/INR
3,2014-11-12,61.508,61.56,61.35,61.391,USD/INR
4,2014-11-13,61.368,61.623,61.35,61.565,USD/INR


In [13]:
all_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28657 entries, 0 to 28656
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           28657 non-null  datetime64[ns]
 1   Open_price     28657 non-null  object        
 2   Day_high       28657 non-null  object        
 3   Day_low        28657 non-null  object        
 4   Closing_price  28657 non-null  object        
 5   Currency Pair  28657 non-null  object        
dtypes: datetime64[ns](1), object(5)
memory usage: 1.3+ MB


#### According to the above stats we come to know that the columns are object dtype hence we need to typecast them into float values 

In [15]:
all_data["Open_price"] = all_data["Open_price"].astype(float)
all_data["Day_high"] = all_data["Day_high"].astype(float)
all_data["Day_low"] = all_data["Day_low"].astype(float)
all_data["Closing_price"] = all_data["Closing_price"].astype(float)

In [16]:
all_data.dtypes

Date             datetime64[ns]
Open_price              float64
Day_high                float64
Day_low                 float64
Closing_price           float64
Currency Pair            object
dtype: object

In [17]:
all_data.describe()

Unnamed: 0,Date,Open_price,Day_high,Day_low,Closing_price
count,28657,28657.0,28657.0,28657.0,28657.0
mean,2019-09-22 15:11:34.573751552,44.480479,44.63776,44.316553,44.48128
min,2014-09-18 00:00:00,0.55724,0.56157,0.54786,0.55724
25%,2017-03-22 00:00:00,0.87813,0.88129,0.8755,0.87813
50%,2019-09-24 00:00:00,1.26634,1.27067,1.2621,1.26637
75%,2022-03-23 00:00:00,108.932,109.283,108.637,108.931
max,2024-09-19 00:00:00,207.703,208.102,206.30499,207.703
std,,61.199195,61.426837,60.957576,61.199789


## Outlier Detection

In [18]:
import plotly.express as px 

for i in all_data.select_dtypes("number"):
    fig = px.box(all_data[i],title=f"Outlier Detection for column : {i}")
    fig.show()

#### This shows that there are no outliers in the data 