# Web-scraping the data through aplha vantage api for multiple forex currencies for the last ten years 

In [14]:
import requests
import pandas as pd
from datetime import datetime, timedelta

# Replace with your Alpha Vantage API key
api_key = 'YOUR_API_KEY'

# Function to fetch forex data
def fetch_forex_data(from_currency, to_currency):
    url = f'https://www.alphavantage.co/query'
    params = {
        'function': 'FX_DAILY',
        'from_symbol': from_currency,
        'to_symbol': to_currency,
        'apikey': api_key,
        'outputsize': 'full'
    }
    response = requests.get(url, params=params)
    data = response.json()
    
    # Extracting the time series data
    time_series = data.get('Time Series FX (Daily)', {})
    
    # Converting to DataFrame
    df = pd.DataFrame.from_dict(time_series, orient='index')
    df.index = pd.to_datetime(df.index)
    df = df.sort_index()
    
    # Adding a column for the currency pair
    df['Currency Pair'] = f'{from_currency}/{to_currency}'
    
    # Filtering data for the last 10 years
    ten_years_ago = datetime.now() - timedelta(days=365*20)
    df = df[df.index >= ten_years_ago]
    
    return df

# List of common currency pairs to fetch which are allowed to trade in india 
currency_pairs = [
    ('USD', 'INR'), ('EUR', 'USD'), ('GBP', 'USD'), ('USD', 'JPY'), ('EUR', 'INR'),
    ('JPY', 'INR'), ('GBP', 'INR')
]

# Fetching data for all currency pairs and combining into a single DataFrame
all_data = pd.DataFrame()
for from_currency, to_currency in currency_pairs:
    forex_data = fetch_forex_data(from_currency, to_currency)
    all_data = pd.concat([all_data, forex_data])

# Resetting index for the combined DataFrame
all_data.reset_index(inplace=True)
all_data.rename(columns={'index': 'Date',"1. open":"Open_price","2. high":"Day_high","3. low":"Day_low","4. close":"Closing_price"}, inplace=True)

print(all_data)


            Date Open_price   Day_high    Day_low Closing_price Currency Pair
0     2014-11-07   61.39000   61.62000   61.34000      61.40000       USD/INR
1     2014-11-10   61.50000   61.63500   61.34000      61.49500       USD/INR
2     2014-11-11   61.53000   61.55500   61.50500      61.50800       USD/INR
3     2014-11-12   61.50800   61.56000   61.35000      61.39100       USD/INR
4     2014-11-13   61.36800   61.62300   61.35000      61.56500       USD/INR
...          ...        ...        ...        ...           ...           ...
18190 2024-09-18  110.46154  111.34329  110.14741     110.46154       GBP/INR
18191 2024-09-19  111.03561  111.43085  110.81026     111.03561       GBP/INR
18192 2024-09-22  111.20049  111.59995  110.70608     111.20049       GBP/INR
18193 2024-09-23  111.54687  112.07687  111.43484     111.55328       GBP/INR
18194 2024-09-25  112.14210  112.31860  111.88610     111.93190       GBP/INR

[18195 rows x 6 columns]


In [15]:
all_data.head()

Unnamed: 0,Date,Open_price,Day_high,Day_low,Closing_price,Currency Pair
0,2014-11-07,61.39,61.62,61.34,61.4,USD/INR
1,2014-11-10,61.5,61.635,61.34,61.495,USD/INR
2,2014-11-11,61.53,61.555,61.505,61.508,USD/INR
3,2014-11-12,61.508,61.56,61.35,61.391,USD/INR
4,2014-11-13,61.368,61.623,61.35,61.565,USD/INR


In [16]:
all_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18195 entries, 0 to 18194
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           18195 non-null  datetime64[ns]
 1   Open_price     18195 non-null  object        
 2   Day_high       18195 non-null  object        
 3   Day_low        18195 non-null  object        
 4   Closing_price  18195 non-null  object        
 5   Currency Pair  18195 non-null  object        
dtypes: datetime64[ns](1), object(5)
memory usage: 853.0+ KB


#### According to the above stats we come to know that the columns are object dtype hence we need to typecast them into float values 

In [17]:
all_data["Open_price"] = all_data["Open_price"].astype(float)
all_data["Day_high"] = all_data["Day_high"].astype(float)
all_data["Day_low"] = all_data["Day_low"].astype(float)
all_data["Closing_price"] = all_data["Closing_price"].astype(float)

In [18]:
all_data.dtypes

Date             datetime64[ns]
Open_price              float64
Day_high                float64
Day_low                 float64
Closing_price           float64
Currency Pair            object
dtype: object

In [19]:
all_data.describe()

Unnamed: 0,Date,Open_price,Day_high,Day_low,Closing_price
count,18195,18195.0,18195.0,18195.0,18195.0
mean,2019-10-01 23:17:01.533388288,53.038297,53.234252,52.85596,53.051661
min,2014-09-24 00:00:00,0.50738,0.51169,0.50687,0.50755
25%,2017-04-04 00:00:00,1.1602,1.164145,1.15666,1.16011
50%,2019-10-02 00:00:00,70.596,70.926,70.39,70.6168
75%,2022-03-30 00:00:00,91.0785,91.529,90.6731,91.15318
max,2024-09-25 00:00:00,161.621,161.942,161.3,161.621
std,,47.523577,47.698529,47.352483,47.53249


## Outlier Detection

In [20]:
import plotly.express as px 

for i in all_data.select_dtypes("number"):
    fig = px.box(all_data[i],title=f"Outlier Detection for column : {i}")
    fig.show()

#### This shows that there are no outliers in the data 

# Time-Series Analysis 

In [21]:
all_data["Currency Pair"].unique()

array(['USD/INR', 'EUR/USD', 'GBP/USD', 'USD/JPY', 'EUR/INR', 'JPY/INR',
       'GBP/INR'], dtype=object)

In [30]:
import plotly.express as px

def plot_df(df, x, y, title="", xlabel='Date', ylabel='Open_price'):
    fig = px.line(df, x=x, y=y, title=title, labels={x: xlabel, y: ylabel}, 
                  line_shape='linear', markers=True)
    fig.update_layout(title=title, xaxis_title=xlabel, yaxis_title=ylabel)
    fig.show() 

In [32]:
for i in all_data["Currency Pair"].unique():
    a = all_data[all_data["Currency Pair"]==i][["Date","Open_price","Currency Pair"]]
    #print(a)
    plot_df(a, x='Date', y="Open_price", title=f'Open Price for last 10 years for the currency pair : {i}')



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result

