# Dollar Prices Scraping

## Import Libraries

In [3]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

## Make The Request From Yahoo Finance and Get The Table HTML

In [4]:
url = "https://finance.yahoo.com/quote/DX-Y.NYB/history/?period1=31813200&period2=1745262681"
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
r = requests.get(url, headers=headers)

c = r.text
soup=BeautifulSoup(c, "lxml")

table = soup.find("table" , {"class":"table yf-1jecxey noDl hideOnPrint"})

## Get The Headers of The Table

In [5]:
headers = table.find_all("th" , {"class":"yf-1jecxey"})

titles = []

for i in headers:
    titles.append(i.text.split("  ")[0])
    
df = pd.DataFrame(columns = titles)
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume


## Get The Content of The Table

In [6]:
rows_con = table.find_all("tr" , {"class":"yf-1jecxey"})
for i in rows_con[1:]:
    data = i.find_all("td" , {"class":"yf-1jecxey"})
    row = [tr.text for tr in data]
    new_row_df = pd.DataFrame([row], columns=df.columns)  # Ensure columns match
    df = pd.concat([df, new_row_df], ignore_index=True)
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,"Apr 21, 2025",99.08,99.21,97.92,98.29,98.29,-
1,"Apr 17, 2025",99.23,99.75,99.22,99.38,99.38,-
2,"Apr 16, 2025",100.06,100.1,99.17,99.38,99.38,-
3,"Apr 15, 2025",99.81,100.28,99.48,100.22,100.22,-
4,"Apr 14, 2025",100.02,100.16,99.21,99.64,99.64,-


## Data Preprocessing

In [7]:
df.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,"Apr 21, 2025",99.08,99.21,97.92,98.29,98.29,-
1,"Apr 17, 2025",99.23,99.75,99.22,99.38,99.38,-
2,"Apr 16, 2025",100.06,100.1,99.17,99.38,99.38,-
3,"Apr 15, 2025",99.81,100.28,99.48,100.22,100.22,-
4,"Apr 14, 2025",100.02,100.16,99.21,99.64,99.64,-
5,"Apr 11, 2025",100.78,100.78,99.01,99.78,99.78,-
6,"Apr 10, 2025",102.99,103.03,100.7,100.87,100.87,-
7,"Apr 9, 2025",102.75,103.33,101.84,102.9,102.9,-
8,"Apr 8, 2025",103.43,103.44,102.75,102.96,102.96,-
9,"Apr 7, 2025",103.07,103.54,102.18,103.26,103.26,-


In [8]:
df.shape

(13790, 7)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13790 entries, 0 to 13789
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Date       13790 non-null  object
 1   Open       13790 non-null  object
 2   High       13790 non-null  object
 3   Low        13790 non-null  object
 4   Close      13790 non-null  object
 5   Adj Close  13790 non-null  object
 6   Volume     13790 non-null  object
dtypes: object(7)
memory usage: 754.3+ KB


## Date Format

In [10]:
df['Date'] = pd.to_datetime(df['Date'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13790 entries, 0 to 13789
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       13790 non-null  datetime64[ns]
 1   Open       13790 non-null  object        
 2   High       13790 non-null  object        
 3   Low        13790 non-null  object        
 4   Close      13790 non-null  object        
 5   Adj Close  13790 non-null  object        
 6   Volume     13790 non-null  object        
dtypes: datetime64[ns](1), object(6)
memory usage: 754.3+ KB


## Numeric Data Format

In [11]:
numbers = df.select_dtypes(include = ["object"]).columns
numbers

Index(['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

In [12]:
for col in numbers:
    df[col] = df[col].str.replace("," , "")
    df[col] = pd.to_numeric(df[col], errors='coerce')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13790 entries, 0 to 13789
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       13790 non-null  datetime64[ns]
 1   Open       13790 non-null  float64       
 2   High       13790 non-null  float64       
 3   Low        13790 non-null  float64       
 4   Close      13790 non-null  float64       
 5   Adj Close  13790 non-null  float64       
 6   Volume     2 non-null      float64       
dtypes: datetime64[ns](1), float64(6)
memory usage: 754.3 KB


In [13]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2025-04-21,99.08,99.21,97.92,98.29,98.29,
1,2025-04-17,99.23,99.75,99.22,99.38,99.38,
2,2025-04-16,100.06,100.1,99.17,99.38,99.38,
3,2025-04-15,99.81,100.28,99.48,100.22,100.22,
4,2025-04-14,100.02,100.16,99.21,99.64,99.64,


In [14]:
df = df.drop(columns = ['Open', 'High', 'Low', 'Close', 'Volume'])
df = df.set_index('Date')
df = df.asfreq('b', method = 'ffill')

In [15]:
df.head()

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
1971-01-04,120.53
1971-01-05,120.52
1971-01-06,120.49
1971-01-07,120.55
1971-01-08,120.53


In [16]:
df.tail()

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2025-04-15,100.22
2025-04-16,99.38
2025-04-17,99.38
2025-04-18,98.29
2025-04-21,98.29


# Save The Data In CSV File

In [17]:
df.to_csv("us_dollar_close.csv")