In [18]:
import json
import re
from bs4 import BeautifulSoup
import requests
from datetime import datetime, timedelta

# Function to get data from URL
def get_data(url):
    r = requests.get(url)
    return r.text

# Extract train data from the HTML
def extract_train_data(html_data):
    soup = BeautifulSoup(html_data, 'html.parser')
    train_info = soup.find('section', style=lambda value: value and 'text-align:center' in value)
    if train_info:
        train_number = train_info.find('b').text
        train_name = soup.find('h1', class_='banner-title').text.split('|')[0].strip()
        journey_time = re.search(r'Total journey duration is\s+(\d+:\d+)', train_info.text)
        journey_time = journey_time.group(1) if journey_time else None
        
        return {
            'TrainNo': train_number,
            'TrainName': train_name,
            'JourneyTime': journey_time
        }
    return None

# Extract main station data
def get_main_station_data(html_data, journey_date):
    soup = BeautifulSoup(html_data, 'html.parser')
    main_stations = []
    stations = soup.find_all('div', class_='well well-sm')
    
    for station in stations:
        station_name_elem = station.find('span', class_='rs__station-name')
        station_name = station_name_elem.text.strip() if station_name_elem else 'Unknown'
        
        date_elem = station.find_all('div', class_='col-xs-3')[1]
        date = date_elem.find_all('span')[1].text.strip() if date_elem else 'Unknown'
        
        times = station.find_all('div', class_='col-xs-2')
        arrival_time = times[0].text.strip() if len(times) > 0 else 'No Information'
        departure_time = times[1].text.strip() if len(times) > 1 else 'No Information'
        
        delay_elem = station.find('div', class_='rs__station-delay')
        delay = delay_elem.text.strip() if delay_elem else 'No Information'
        
        station_data = {
            'stationName': station_name,
            'date': f"{date}-2024",  # Assuming the year is 2024
            'arrivalTime': arrival_time,
            'departureTime': departure_time,
            'delay': delay
        }
        main_stations.append(station_data)
    
    return main_stations

# Get current station
def get_current_station(html_data):
    soup = BeautifulSoup(html_data, 'html.parser')
    current_station_div = soup.find('div', class_='circle blink')
    if current_station_div:
        station_name = current_station_div.find_next('span', class_='rs__station-name').text.strip()
        return station_name
    return 'No Information'

# Extract available dates
def extract_available_dates(html_data):
    soup = BeautifulSoup(html_data, 'html.parser')
    date_options = soup.find_all('option', attrs={'data-day': True})
    available_dates = []
    for option in date_options:
        day_text = option.get('data-day')
        date_value = option.get('value')
        available_dates.append(f"{day_text} {date_value}")
    return available_dates if available_dates else []

# URL and data extraction
url = "https://www.confirmtkt.com/train-running-status/14722"
html_data = get_data(url)
train_data = extract_train_data(html_data)

# Journey date (example, this should be dynamically fetched or input)
journey_date = "28-Jun-2024"

# Main stations data
main_stations = get_main_station_data(html_data, journey_date)

# Current station
current_station = get_current_station(html_data)

# Available dates
available_dates = extract_available_dates(html_data)

# Extract last updated time (status_as_of)
soup = BeautifulSoup(html_data, 'html.parser')
status_as_of = soup.find('div', class_='train-update__time')
status_as_of = status_as_of.text.strip() if status_as_of else 'No Information'

# Calculate previous and upcoming stations
current_index = next((i for i, station in enumerate(main_stations) if station['stationName'] == current_station), None)
previous_stations = main_stations[:current_index] if current_index is not None else []
upcoming_stations = main_stations[current_index + 1:] if current_index is not None else main_stations

# Calculate spent time
spent_time = None
if current_index is not None:
    current_station_data = main_stations[current_index]
    arrival_time = current_station_data['arrivalTime']
    departure_time = current_station_data['departureTime']
    if arrival_time != 'No Information' and departure_time != 'No Information':
        arrival_time = datetime.strptime(arrival_time, '%H:%M')
        departure_time = datetime.strptime(departure_time, '%H:%M')
        spent_time = str(departure_time - arrival_time)
    elif arrival_time != 'No Information':
        spent_time = 'Arrived'
    elif departure_time != 'No Information':
        spent_time = 'Not yet arrived'

# Prepare the final JSON response
response = {
    "status_as_of": status_as_of,
    "journey_time": train_data['JourneyTime'] if train_data else None,
    "total_distance": None,  # This information is not available in the provided HTML
    "available_dates": available_dates,
    "current_station": current_station,
    "previous_stations": previous_stations,
    "upcoming_stations": upcoming_stations,
    "spent_time": spent_time,
    "main_stations": main_stations
}

# Print the final JSON response
print(json.dumps(response, indent=4))

{
    "status_as_of": "Last Updated:\u00a028 Jun 2024 23:01, (Disclaimer: This train running information is not affiliated with or endorsed by Indian Railways or IRCTC.)",
    "journey_time": "14:35",
    "total_distance": null,
    "available_dates": [
        "3 days ago 25-Jun-2024",
        "2 days ago 26-Jun-2024",
        "yesterday 27-Jun-2024",
        "today 28-Jun-2024",
        "tommorrow 29-Jun-2024"
    ],
    "current_station": "Hanumangarh Junction",
    "previous_stations": [
        {
            "stationName": "Abohar",
            "date": "28-Jun-2024",
            "arrivalTime": "",
            "departureTime": "19:40",
            "delay": "Right Time"
        },
        {
            "stationName": "Pakki",
            "date": "28-Jun-2024",
            "arrivalTime": "19:53",
            "departureTime": "19:55",
            "delay": "Delay by  2 min"
        },
        {
            "stationName": "Malout",
            "date": "28-Jun-2024",
            "arrival

In [19]:
import requests
from bs4 import BeautifulSoup
import json
import re

def get_data(url):
    r = requests.get(url)
    return r.text

url = "https://www.confirmtkt.com/train-running-status/14722"
html_data = get_data(url)
soup = BeautifulSoup(html_data, 'html.parser')

# Extracting required data
def extract_train_info(soup):
    # Extract JSON data embedded in the script tag
    script_tag = soup.find('script', text=re.compile('var data ='))
    json_text = re.search(r'var data = ({.*?});', script_tag.string, re.DOTALL).group(1)
    data = json.loads(json_text)
    
    # Train start day and running days
    start_day = data['Source']
    running_days = [day for day, runs in data['DaysOfRun'].items() if runs]
    
    # Total distance
    total_distance = data['Schedule'][-1]['Distance']
    
    # Current station information
    current_station_code = data.get('currentStnCode', 'N/A')
    current_station_name = data.get('currentStnName', 'N/A')
    
    # Upcoming stations
    upcoming_stations = []
    current_station_found = False
    for station in data['Schedule']:
        if station['StationCode'] == current_station_code:
            current_station_found = True
        if current_station_found:
            upcoming_stations.append({
                'station_name': station['StationName'],
                'station_code': station['StationCode'],
                'distance_from_current': station['Distance']
            })
    
    # Previous stations with halt information
    previous_stations = []
    for station in data['Schedule']:
        if station['StationCode'] == current_station_code:
            break
        previous_stations.append({
            'station_name': station['StationName'],
            'station_code': station['StationCode'],
            'platform_number': station.get('ExpectedPlatformNo', 'N/A'),
            'distance_from_start': station['Distance']
        })
    
    return {
        'start_day': start_day,
        'running_days': running_days,
        'total_distance': total_distance,
        'current_station_code': current_station_code,
        'current_station_name': current_station_name,
        'upcoming_stations': upcoming_stations,
        'previous_stations': previous_stations
    }

train_info = extract_train_info(soup)

# Displaying the extracted data
print("Train Start Day:", train_info['start_day'])
print("Running Days:", train_info['running_days'])
print("Total Distance:", train_info['total_distance'])
print("Current Station Code:", train_info['current_station_code'])
print("Current Station Name:", train_info['current_station_name'])
print("\nUpcoming Stations:")
for station in train_info['upcoming_stations']:
    print(station)
print("\nPrevious Stations with Halt Info:")
for station in train_info['previous_stations']:
    print(station)


Train Start Day: Abohar
Running Days: ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat']
Total Distance: 673.4
Current Station Code: N/A
Current Station Name: N/A

Upcoming Stations:

Previous Stations with Halt Info:
{'station_name': 'Abohar Jn', 'station_code': 'ABS', 'platform_number': None, 'distance_from_start': '0.0'}
{'station_name': 'Pakki', 'station_code': 'PKK', 'platform_number': None, 'distance_from_start': '14.9'}
{'station_name': 'Malout', 'station_code': 'MOT', 'platform_number': '1', 'distance_from_start': '29.5'}
{'station_name': 'Giddarbaha', 'station_code': 'GDB', 'platform_number': None, 'distance_from_start': '45.6'}
{'station_name': 'Bulluana', 'station_code': 'BHX', 'platform_number': '1', 'distance_from_start': '57.3'}
{'station_name': 'Bathinda Jn', 'station_code': 'BTI', 'platform_number': '2', 'distance_from_start': '73.2'}
{'station_name': 'Mandi Dabwali', 'station_code': 'MBY', 'platform_number': None, 'distance_from_start': '109.0'}
{'station_name': 'Sangar

  script_tag = soup.find('script', text=re.compile('var data ='))


In [20]:
import requests
from bs4 import BeautifulSoup
import json
import re

def get_data(url):
    r = requests.get(url)
    return r.text

url = "https://www.confirmtkt.com/train-running-status/14722"
html_data = get_data(url)
soup = BeautifulSoup(html_data, 'html.parser')

# Extract last updated time (status_as_of)
status_as_of = soup.find('div', class_='train-update__time').text.strip()

# Extract journey time
journey_time_text = soup.find(string=lambda t: 'Total journey duration is' in t)
journey_time = None
if journey_time_text:
    journey_time = re.search(r'(\d+:\d+)', journey_time_text).group(1)

# Extract station details
stations = []
station_rows = soup.find_all('div', class_='row rs__station-row flexy')
for i, row in enumerate(station_rows):
    station_name = row.find('span', class_='rs__station-name ellipsis').text.strip()
    arrival_time = row.find_all('div', class_='col-xs-2')[0].text.strip()
    departure_time = row.find_all('div', class_='col-xs-2')[1].text.strip()
    delay = row.find('div', class_='rs__station-delay').text.strip()
    date = row.find_all('div', class_='col-xs-3')[1].find_all('span')[1].text.strip()
    stations.append({
        'station_name': station_name,
        'arrival_time': arrival_time,
        'departure_time': departure_time,
        'delay': delay,
        'date': date,
        'stoppage_number': i + 1
    })

# Extract source and destination
source = stations[0]['station_name'] if stations else None
destination = stations[-1]['station_name'] if stations else None

# Extract current station code (current_location_info)
current_location_info_tag = soup.find('div', class_='train-update__status')
current_location_info = None
if current_location_info_tag:
    current_location_info_text = current_location_info_tag.text.strip()
    current_location_info_match = re.search(r'(\w+ Junction)', current_location_info_text)
    if current_location_info_match:
        current_location_info = current_location_info_match.group(1)

# Extract previous and upcoming stations
current_index = next((i for i, station in enumerate(stations) if station['station_name'] == current_location_info), None)
previous_stations = stations[:current_index] if current_index is not None else []
upcoming_stations = stations[current_index + 1:] if current_index is not None else []

# Calculate spent time based on the arrival and departure times of the current station
spent_time = None
if current_index is not None:
    spent_time = stations[current_index]['departure_time']

# Prepare the final JSON response
response = {
    "status_as_of": status_as_of,
    "journey_time": journey_time,
    "source": source,
    "destination": destination,
    "current_location_info": current_location_info,
    "previous_stations": previous_stations,
    "upcoming_stations": upcoming_stations,
    "spent_time": spent_time,
    "stations": stations
}

# Print the JSON response
print(json.dumps(response, indent=4))

{
    "status_as_of": "Last Updated:\u00a028 Jun 2024 23:09, (Disclaimer: This train running information is not affiliated with or endorsed by Indian Railways or IRCTC.)",
    "journey_time": "14:35",
    "source": "Abohar",
    "destination": "Jodhpur Junction",
    "current_location_info": null,
    "previous_stations": [],
    "upcoming_stations": [],
    "spent_time": null,
    "stations": [
        {
            "station_name": "Abohar",
            "arrival_time": "",
            "departure_time": "19:40",
            "delay": "Right Time",
            "date": "28-Jun",
            "stoppage_number": 1
        },
        {
            "station_name": "Pakki",
            "arrival_time": "19:53",
            "departure_time": "19:55",
            "delay": "Delay by  2 min",
            "date": "28-Jun",
            "stoppage_number": 2
        },
        {
            "station_name": "Malout",
            "arrival_time": "20:05",
            "departure_time": "20:07",
            