# Inflation_Calculator: `Web Scrapping`

## Table of Content
[A. Data Gathering](#gather)\
[B. Data Cleaning](#clean)

In [1]:
# Mounting the Google Drive

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


<a id = 'gather'></a>
## A. Data Gathering

In [None]:
# Importing Libraries

from bs4 import BeautifulSoup
import requests
import pandas as pd

In [None]:
# Getting URL

url = 'https://www.macrotrends.net/countries/IND/india/inflation-rate-cpi'

html_content = requests.get(url).text

In [None]:
# URL Content

soup = BeautifulSoup(html_content, "lxml")
print(soup.prettify())

<!DOCTYPE html>
<!--[if lt IE 7]>      <html class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
<!--[if IE 7]>         <html class="no-js lt-ie9 lt-ie8"> <![endif]-->
<!--[if IE 8]>         <html class="no-js lt-ie9"> <![endif]-->
<!--[if gt IE 8]><!-->
<html class="no-js">
 <!--<![endif]-->
 <head>
  <meta charset="utf-8"/>
  <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
  <link href="https://www.macrotrends.net/countries/IND/india/inflation-rate-cpi" rel="canonical"/>
  <title>
   India Inflation Rate 1960-2020 | MacroTrends
  </title>
  <meta content="Inflation as measured by the consumer price index reflects the annual percentage change in the cost to the average consumer of acquiring a basket of goods and services that may be fixed or changed at specified intervals, such as yearly. The Laspeyres formula is generally used." name="description"/>
  <meta content="" name="robots"/>
  <link href="/assets/images/icons/FAVICON/macro-trends_favicon.ico" rel="shortcut icon"

In [None]:
# URL Title

print(soup.title.text)

India Inflation Rate 1960-2020 | MacroTrends


In [None]:
# Extracting Title & Header from the URL

inflation_table = soup.find("div", attrs={"class": "col-xs-6"})
inflation_table_data = inflation_table.table.find_all('thead')

tl = inflation_table_data[0].text.lstrip()
hr = inflation_table_data[1].text.lstrip()

print(tl) # title
print(hr) # header

India Inflation Rate - Historical Data


Year
Inflation Rate (%)
Annual Change




In [None]:
# Extrating Column Values from URL

inflation_table = soup.find("div", attrs={"class": "col-xs-6"})
inflation_table_data = inflation_table.tbody.find_all("tr")

col_values = []
for i in range(len(inflation_table_data)):
  col_values.append(inflation_table_data[i].text.lstrip())

print(col_values)

['2019\n7.66%\n2.80%\n', '2018\n4.86%\n2.37%\n', '2017\n2.49%\n-2.45%\n', '2016\n4.94%\n-0.93%\n', '2015\n5.87%\n-0.48%\n', '2014\n6.35%\n-4.55%\n', '2013\n10.91%\n1.60%\n', '2012\n9.31%\n0.45%\n', '2011\n8.86%\n-3.13%\n', '2010\n11.99%\n1.11%\n', '2009\n10.88%\n2.53%\n', '2008\n8.35%\n1.98%\n', '2007\n6.37%\n0.58%\n', '2006\n5.80%\n1.55%\n', '2005\n4.25%\n0.48%\n', '2004\n3.77%\n-0.04%\n', '2003\n3.81%\n-0.49%\n', '2002\n4.30%\n0.52%\n', '2001\n3.78%\n-0.23%\n', '2000\n4.01%\n-0.66%\n', '1999\n4.67%\n-8.56%\n', '1998\n13.23%\n6.07%\n', '1997\n7.16%\n-1.81%\n', '1996\n8.98%\n-1.25%\n', '1995\n10.22%\n-0.02%\n', '1994\n10.25%\n3.92%\n', '1993\n6.33%\n-5.46%\n', '1992\n11.79%\n-2.08%\n', '1991\n13.87%\n4.90%\n', '1990\n8.97%\n1.90%\n', '1989\n7.07%\n-2.31%\n', '1988\n9.38%\n0.58%\n', '1987\n8.80%\n0.07%\n', '1986\n8.73%\n3.17%\n', '1985\n5.56%\n-2.76%\n', '1984\n8.32%\n-3.55%\n', '1983\n11.87%\n3.98%\n', '1982\n7.89%\n-5.22%\n', '1981\n13.11%\n1.77%\n', '1980\n11.35%\n5.07%\n', '1979\n6.

<a id = 'clean'></a>
## B. Data Cleaning

In [None]:
# Organizing Title Content

title = []
n = str('')
for i in range(len(tl)):

  if tl[i] != '\n':
    z = str(tl[i])
    n = str(n) + str(z)

  if tl[i] == '\n':
    title.append(n)
    n = str('')

title.pop()
title

['India Inflation Rate - Historical Data']

In [None]:
# Organizing the Header Content

header = []
n = str('')
for i in range(len(hr)):

  if hr[i] != '\n':
    z = str(hr[i])
    n = str(n) + str(z)

  if hr[i] == '\n':
    header.append(n)
    n = str('')

header.pop()
header

['Year', 'Inflation Rate (%)', 'Annual Change']

In [None]:
# Organizing the Values

values = []
column_values = []
a = str('')
for i in col_values:
  for j in range(len(i)):

    if i[j] != '\n':
      x = str(i[j])
      a = str(a) + str(x)

    if i[j] == '\n':
      values.append(a)
      a = str('')

  column_values.append(values)
  values = []

column_values

[['2019', '7.66%', '2.80%'],
 ['2018', '4.86%', '2.37%'],
 ['2017', '2.49%', '-2.45%'],
 ['2016', '4.94%', '-0.93%'],
 ['2015', '5.87%', '-0.48%'],
 ['2014', '6.35%', '-4.55%'],
 ['2013', '10.91%', '1.60%'],
 ['2012', '9.31%', '0.45%'],
 ['2011', '8.86%', '-3.13%'],
 ['2010', '11.99%', '1.11%'],
 ['2009', '10.88%', '2.53%'],
 ['2008', '8.35%', '1.98%'],
 ['2007', '6.37%', '0.58%'],
 ['2006', '5.80%', '1.55%'],
 ['2005', '4.25%', '0.48%'],
 ['2004', '3.77%', '-0.04%'],
 ['2003', '3.81%', '-0.49%'],
 ['2002', '4.30%', '0.52%'],
 ['2001', '3.78%', '-0.23%'],
 ['2000', '4.01%', '-0.66%'],
 ['1999', '4.67%', '-8.56%'],
 ['1998', '13.23%', '6.07%'],
 ['1997', '7.16%', '-1.81%'],
 ['1996', '8.98%', '-1.25%'],
 ['1995', '10.22%', '-0.02%'],
 ['1994', '10.25%', '3.92%'],
 ['1993', '6.33%', '-5.46%'],
 ['1992', '11.79%', '-2.08%'],
 ['1991', '13.87%', '4.90%'],
 ['1990', '8.97%', '1.90%'],
 ['1989', '7.07%', '-2.31%'],
 ['1988', '9.38%', '0.58%'],
 ['1987', '8.80%', '0.07%'],
 ['1986', '8.73%', 

In [None]:
# Converting the list into DataFrame

df = pd.DataFrame(column_values,columns = header)

In [None]:
# DataFrame

df

Unnamed: 0,Year,Inflation Rate (%),Annual Change
0,2019,7.66%,2.80%
1,2018,4.86%,2.37%
2,2017,2.49%,-2.45%
3,2016,4.94%,-0.93%
4,2015,5.87%,-0.48%
5,2014,6.35%,-4.55%
6,2013,10.91%,1.60%
7,2012,9.31%,0.45%
8,2011,8.86%,-3.13%
9,2010,11.99%,1.11%


In [None]:
# Checking DataTypes

df.dtypes

Year                  object
Inflation Rate (%)    object
Annual Change         object
dtype: object

In [None]:
# Dropping the Annual Change column

df.drop(columns = 'Annual Change',inplace = True)

In [None]:
# Removing Special Characters

df['Inflation Rate (%)'].replace(to_replace='%', value='', inplace=True, regex=True)

In [None]:
# Converting the DataTypes:

df['Year'] = df['Year'].astype('int64')
df['Inflation Rate (%)'] = df['Inflation Rate (%)'].astype('float')

In [None]:
# Confirmation

df.dtypes

Year                    int64
Inflation Rate (%)    float64
dtype: object

In [None]:
# Saving as CSV
df.to_csv('/content/drive/My Drive/Colab Notebooks/'+ title[0] +'.csv' ,index = False)

# Currency Conversion `Web Scrapping`

## Table of Content
[C. Data Gathering](#gather_cr)\
[D. Data Cleaning](#clean_cr)

In [39]:
# Importing Libraries

from bs4 import BeautifulSoup
import requests
import pandas as pd

<a id = 'gather_cr'></a>
## C. Data Gathering

In [40]:
# Getting URL

url = 'https://en.wikipedia.org/wiki/Exchange_rate_history_of_the_Indian_rupee'

html_content = requests.get(url).text

In [41]:
# URL Content

soup = BeautifulSoup(html_content, "lxml")
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Exchange rate history of the Indian rupee - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"a81cfd89-ec70-4690-8509-135049a0ca54","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"Exchange_rate_history_of_the_Indian_rupee","wgTitle":"Exchange rate history of the Indian rupee","wgCurRevisionId":987771429,"wgRevisionId":987771429,"wgArticleId":43723456,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Rupee","Coins of India","Denominations (currency)","Currency lists

In [92]:
# URL Title

title = soup.title.text
print(title)

Exchange rate history of the Indian rupee - Wikipedia


In [43]:
# Extracting Header from the URL

head = soup.find("div", attrs={"class": "mw-parser-output"})
head_data = head.table.find_all('th')

header = []
for i in head_data:
  header.append(i.text)

# Removing '\n' from last header
header[10] = header[10][:-1]

header

['Year',
 'SDR (average)',
 'SDR (end year)',
 'USD (average)',
 'USD (end year)',
 'GBP (average)',
 'GBP (end year)',
 'DM/EUR (average)',
 'DM/EUR (end year)',
 'JPY (average)',
 'JPY (end year)']

In [44]:
# Extracting Column values from URL

curr_table = soup.find("div", attrs={"class": "mw-parser-output"})
curr_table_data = curr_table.table.find_all('td')

curr = []
for i in curr_table_data:
  curr.append(i.text)

curr

['1974-75',
 '9.6233',
 '9.723',
 '7.9408',
 '7.794',
 '18.8',
 '18.776',
 '3.1917',
 '3.324',
 '3',
 '2.7\n',
 '1975-76',
 '10.3642',
 '10.375',
 '8.6825',
 '8.973',
 '18.3933',
 '17.19',
 '3.4458',
 '3.535',
 '3',
 '3\n',
 '1976-77',
 '10.35',
 '10.206',
 '8.9775',
 '8.804',
 '15.5733',
 '15.144',
 '3.6308',
 '3.686',
 '3',
 '3.2\n',
 '1977-78',
 '10.1605',
 '10.43',
 '8.5858',
 '8.434',
 '15.4292',
 '15.656',
 '3.8358',
 '4.169',
 '3.33',
 '3.8\n',
 '1978-79',
 '10.4315',
 '10.488',
 '8.2267',
 '8.15',
 '15.9658',
 '16.861',
 '4.22',
 '4.364',
 '4',
 '3.9\n',
 '1979-80',
 '10.4935',
 '10.251',
 '8.0975',
 '8.193',
 '17.655',
 '17.753',
 '4.4717',
 '4.219',
 '3.58',
 '3.3\n',
 '1980-81',
 '10.1777',
 '10.0620',
 '7.9092',
 '8.1900',
 '18.5042',
 '18.3800',
 '4.1875',
 '3.9000',
 '3.7500',
 '3.9000\n',
 '1981-82',
 '10.3354',
 '10.4030',
 '8.9683',
 '9.3460',
 '17.1096',
 '16.6520',
 '3.8607',
 '3.8710',
 '3.9400',
 '3.8000\n',
 '1982-83',
 '10.5628',
 '10.7540',
 '9.6660',
 '9.9700',

<a id = 'clean_cr'></a>
## D. Data Cleaning

In [48]:
# Organizing Gathered Data

currency_table_data = []
l = []

for i in curr:
  
  if i[-1] != '\n':
    l.append(i)

  else:
    i = i[:-1]
    l.append(i)
    currency_table_data.append(l)
    l = []

currency_table_data

[['1974-75',
  '9.6233',
  '9.723',
  '7.9408',
  '7.794',
  '18.8',
  '18.776',
  '3.1917',
  '3.324',
  '3',
  '2.7'],
 ['1975-76',
  '10.3642',
  '10.375',
  '8.6825',
  '8.973',
  '18.3933',
  '17.19',
  '3.4458',
  '3.535',
  '3',
  '3'],
 ['1976-77',
  '10.35',
  '10.206',
  '8.9775',
  '8.804',
  '15.5733',
  '15.144',
  '3.6308',
  '3.686',
  '3',
  '3.2'],
 ['1977-78',
  '10.1605',
  '10.43',
  '8.5858',
  '8.434',
  '15.4292',
  '15.656',
  '3.8358',
  '4.169',
  '3.33',
  '3.8'],
 ['1978-79',
  '10.4315',
  '10.488',
  '8.2267',
  '8.15',
  '15.9658',
  '16.861',
  '4.22',
  '4.364',
  '4',
  '3.9'],
 ['1979-80',
  '10.4935',
  '10.251',
  '8.0975',
  '8.193',
  '17.655',
  '17.753',
  '4.4717',
  '4.219',
  '3.58',
  '3.3'],
 ['1980-81',
  '10.1777',
  '10.0620',
  '7.9092',
  '8.1900',
  '18.5042',
  '18.3800',
  '4.1875',
  '3.9000',
  '3.7500',
  '3.9000'],
 ['1981-82',
  '10.3354',
  '10.4030',
  '8.9683',
  '9.3460',
  '17.1096',
  '16.6520',
  '3.8607',
  '3.8710',
  

In [81]:
# Converting list to Dataframe

df_cr = pd.DataFrame(currency_table_data,columns = header)

In [82]:
# Dataframe
df_cr.head()

Unnamed: 0,Year,SDR (average),SDR (end year),USD (average),USD (end year),GBP (average),GBP (end year),DM/EUR (average),DM/EUR (end year),JPY (average),JPY (end year)
0,1974-75,9.6233,9.723,7.9408,7.794,18.8,18.776,3.1917,3.324,3.0,2.7
1,1975-76,10.3642,10.375,8.6825,8.973,18.3933,17.19,3.4458,3.535,3.0,3.0
2,1976-77,10.35,10.206,8.9775,8.804,15.5733,15.144,3.6308,3.686,3.0,3.2
3,1977-78,10.1605,10.43,8.5858,8.434,15.4292,15.656,3.8358,4.169,3.33,3.8
4,1978-79,10.4315,10.488,8.2267,8.15,15.9658,16.861,4.22,4.364,4.0,3.9


In [83]:
# Dataframe Columns
df_cr.columns

Index(['Year', 'SDR (average)', 'SDR (end year)', 'USD (average)',
       'USD (end year)', 'GBP (average)', 'GBP (end year)', 'DM/EUR (average)',
       'DM/EUR (end year)', 'JPY (average)', 'JPY (end year)'],
      dtype='object')

In [84]:
# About Dataframe
df_cr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45 entries, 0 to 44
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Year               45 non-null     object
 1   SDR (average)      45 non-null     object
 2   SDR (end year)     45 non-null     object
 3   USD (average)      45 non-null     object
 4   USD (end year)     45 non-null     object
 5   GBP (average)      45 non-null     object
 6   GBP (end year)     45 non-null     object
 7   DM/EUR (average)   45 non-null     object
 8   DM/EUR (end year)  45 non-null     object
 9   JPY (average)      45 non-null     object
 10  JPY (end year)     45 non-null     object
dtypes: object(11)
memory usage: 4.0+ KB


In [85]:
# Dropping Columns
df_cr.drop(df_cr.columns[[1,2,3,5,6,7,8,9,10]],axis = 1,inplace = True)

# Renaming Column
df_cr.rename(columns={'USD (end year)': 'USD'},inplace = True)

# Changing 'Year' Data
df_cr['Year'] = df_cr['Year'].map(lambda x: str(x)[:-3])

# Dtype Conversion
df_cr['Year'] = df_cr['Year'].astype('int64')
df_cr['USD'] = df_cr['USD'].astype('float64')

In [88]:
# Confirmation

df_cr.dtypes

Year      int64
USD     float64
dtype: object

In [87]:
# Final Dataframe
df_cr

Unnamed: 0,Year,USD
0,1974,7.794
1,1975,8.973
2,1976,8.804
3,1977,8.434
4,1978,8.15
5,1979,8.193
6,1980,8.19
7,1981,9.346
8,1982,9.97
9,1983,10.707


In [94]:
# Saving as CSV
df_cr.to_csv('/content/drive/MyDrive/Colab Notebooks/'+ title +'.csv' ,index = False)