# Let's Fetch **The Price of Gas in India**

In [None]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from pathlib import Path
from datetime import datetime
import time

fuels = ['petrol', 'diesel', 'lpg', 'cng']
source_page = lambda fuel: f"https://www.goodreturns.in/{fuel}-price.html"

In [None]:
# Selenium options required to create a 'headless' browser
options = Options()
options.add_argument("--blink-settings=imagesEnabled=false")
options.add_argument("--headless=new")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.5481.77 Safari/537.37")

driver = webdriver.Chrome(options=options)

In [None]:
def city_price(result):
    city = result.split('₹')
    price = city[1].split(' ')[0]
    city = city[0].strip()
    return city, price

fuel_prices = {}

for fuel in fuels:
    driver.get(source_page(fuel))
    # time.sleep(5)  # Allow extra time for JavaScript to load
    results = driver.find_element(By.CLASS_NAME, "gd-fuel-table-block")
    results = results.text.split("\n")[2:]

    fuel_prices[fuel] = {}
    for result in results:
        city, price = city_price(result)
        fuel_prices[fuel][city] = price


In [None]:
fuel = 'diesel'

data = fuel_prices[fuel.lower()]
df = pd.DataFrame(data.items(), columns=["City", "Price"])
df['Price'] = df['Price'].astype(float)
df = df.sort_values(by='Price', ascending=False)
df = df.reset_index(drop=True)

print(f"{fuel.upper()} PRICES AS ON {datetime.now().strftime('%d %B %Y')}")
print(f"Source: {source_page(fuel)}")
print("-" * 35)
display(df)
