# Install dependencies

In [41]:
# pip install pytz

Note: you may need to restart the kernel to use updated packages.


# imports

In [44]:
import pandas as pd
from datetime import datetime
import pytz
import requests
import json
from concurrent.futures import ThreadPoolExecutor
import os
import ast

# Scrape place codes

In [52]:
js_file_url = "https://travel.state.gov/etc/designs/travel/TSGglobal_libs/data/PostsVWT.js"

In [53]:
content = requests.get(js_file_url).text

In [54]:
print(content)







(function($){
    window.travelStateGov = window.travelStateGov || {};
    travelStateGov.PostsVWT = {
        sourceData : [
            {"code":"P142","value":"N Djamena"},
            {"code":"P142","value":"N'Djamena"},

         {"code":"abidjan","value":"Abidjan"}
        ,
         {"code":"P2","value":"Abu Dhabi"}
        ,
         {"code":"P3","value":"Abuja"}
        ,
         {"code":"P4","value":"Accra"}
        ,
         {"code":"adana","value":"Adana"}
        ,
         {"code":"P5","value":"Addis Ababa"}
        ,
         {"code":"P6","value":"Algiers"}
        ,
         {"code":"P7","value":"Almaty"}
        ,
         {"code":"P8","value":"Amman"}
        ,
         {"code":"P9","value":"Amsterdam"}
        ,
         {"code":"P10","value":"Ankara"}
        ,
         {"code":"P11","value":"Antananarivo"}
        ,
         {"code":"P225","value":"Apia"}
        ,
         {"code":"P12","value":"Ashgabat"}
        ,
         {"code":"P13","value":"Asmara"}


In [55]:
codes = ast.literal_eval('['+content.split('[')[1].split(']')[0]+']')

In [56]:
codes

[{'code': 'P142', 'value': 'N Djamena'},
 {'code': 'P142', 'value': "N'Djamena"},
 {'code': 'abidjan', 'value': 'Abidjan'},
 {'code': 'P2', 'value': 'Abu Dhabi'},
 {'code': 'P3', 'value': 'Abuja'},
 {'code': 'P4', 'value': 'Accra'},
 {'code': 'adana', 'value': 'Adana'},
 {'code': 'P5', 'value': 'Addis Ababa'},
 {'code': 'P6', 'value': 'Algiers'},
 {'code': 'P7', 'value': 'Almaty'},
 {'code': 'P8', 'value': 'Amman'},
 {'code': 'P9', 'value': 'Amsterdam'},
 {'code': 'P10', 'value': 'Ankara'},
 {'code': 'P11', 'value': 'Antananarivo'},
 {'code': 'P225', 'value': 'Apia'},
 {'code': 'P12', 'value': 'Ashgabat'},
 {'code': 'P13', 'value': 'Asmara'},
 {'code': 'astana', 'value': 'Astana'},
 {'code': 'P15', 'value': 'Asuncion'},
 {'code': 'athens', 'value': 'Athens'},
 {'code': 'P17', 'value': 'Auckland'},
 {'code': 'P226', 'value': 'Baghdad'},
 {'code': 'P19', 'value': 'Baku'},
 {'code': 'P20', 'value': 'Bamako'},
 {'code': 'P21', 'value': 'Bandar Seri Begawan'},
 {'code': 'P22', 'value': 'Ban

In [57]:
type(codes)

list

In [58]:
type(codes[0])

dict

In [59]:
## or read from a pre saved json file
# with open('place_codes.json','r') as fp:
#     codes = json.load(fp=fp)['sourceData']
# codes

# Function to request visa waiting time with place id as input

In [60]:
def get_wait_times(cid : str,name: str = 'NA') -> dict:
    query_str = f"https://travel.state.gov/content/travel/resources/database/database.getVisaWaitTimes.html?cid={cid}&aid=VisaWaitTimesHomePage"
    response = requests.get(query_str)
    output = response.text.strip().split('|')
    output_days = [item for item in output]
    labels = ["Interview Required Visitors (B1/B2)","Interview Required Students/Exchange Visitors (F, M, J)",
             "Interview Required Petition-Based Temporary Workers (H, L, O, P, Q)","Interview Waiver Visitors (B1/B2)",
             "Interview Waiver Students/Exchange Visitors (F, M, J)","Interview Waiver Petition-Based Temporary Workers (H, L, O, P, Q)"]
    output = dict(zip(['location_id','location_name',*labels],[cid,name,*output_days]))
    return output

In [61]:
get_wait_times('P48')

{'location_id': 'P48',
 'location_name': 'NA',
 'Interview Required Visitors (B1/B2)': '980 Days',
 'Interview Required Students/Exchange Visitors (F, M, J)': '37 Days',
 'Interview Required Petition-Based Temporary Workers (H, L, O, P, Q)': '365 Days',
 'Interview Waiver Visitors (B1/B2)': '171 Days',
 'Interview Waiver Students/Exchange Visitors (F, M, J)': '4 Days',
 'Interview Waiver Petition-Based Temporary Workers (H, L, O, P, Q)': '298 Days'}

In [62]:
# for item in codes:
#     try:
#         get_wait_times(item['code'],item['value'])
#     except:
#         print(item)

In [63]:
# %%time
# results = [get_wait_times(item['code'],item['value']) for item in codes]

# Get visa times for all places

In [64]:
with ThreadPoolExecutor(max_workers=os.cpu_count()*2) as pool:
    res = pool.map(get_wait_times,[item['code'] for item in codes],[item['value'] for item in codes])
    res = list(res)

In [65]:
res

[{'location_id': 'P142',
  'location_name': 'N Djamena',
  'Interview Required Visitors (B1/B2)': '65 Days',
  'Interview Required Students/Exchange Visitors (F, M, J)': '65 Days',
  'Interview Required Petition-Based Temporary Workers (H, L, O, P, Q)': '65 Days',
  'Interview Waiver Visitors (B1/B2)': '1 Days',
  'Interview Waiver Students/Exchange Visitors (F, M, J)': '1 Days',
  'Interview Waiver Petition-Based Temporary Workers (H, L, O, P, Q)': '1 Days'},
 {'location_id': 'P142',
  'location_name': "N'Djamena",
  'Interview Required Visitors (B1/B2)': '65 Days',
  'Interview Required Students/Exchange Visitors (F, M, J)': '65 Days',
  'Interview Required Petition-Based Temporary Workers (H, L, O, P, Q)': '65 Days',
  'Interview Waiver Visitors (B1/B2)': '1 Days',
  'Interview Waiver Students/Exchange Visitors (F, M, J)': '1 Days',
  'Interview Waiver Petition-Based Temporary Workers (H, L, O, P, Q)': '1 Days'},
 {'location_id': 'abidjan',
  'location_name': 'Abidjan',
  'Interview

# convert responses to json

In [66]:
df = pd.DataFrame(res)

In [67]:
df

Unnamed: 0,location_id,location_name,Interview Required Visitors (B1/B2),"Interview Required Students/Exchange Visitors (F, M, J)","Interview Required Petition-Based Temporary Workers (H, L, O, P, Q)",Interview Waiver Visitors (B1/B2),"Interview Waiver Students/Exchange Visitors (F, M, J)","Interview Waiver Petition-Based Temporary Workers (H, L, O, P, Q)"
0,P142,N Djamena,65 Days,65 Days,65 Days,1 Days,1 Days,1 Days
1,P142,N'Djamena,65 Days,65 Days,65 Days,1 Days,1 Days,1 Days
2,abidjan,Abidjan,183 Days,77 Days,1 Days,1 Days,1 Days,1 Days
3,P2,Abu Dhabi,378 Days,295 Days,175 Days,1 Days,1 Days,1 Days
4,P3,Abuja,Emergency Appointments Only,Emergency Appointments Only,Emergency Appointments Only,Emergency Appointments Only,Emergency Appointments Only,Emergency Appointments Only
...,...,...,...,...,...,...,...,...
239,P217,Windhoek,58 Days,30 Days,30 Days,30 Days,30 Days,30 Days
240,P218,Yaounde,9 Days,9 Days,13 Days,Same Day,Same Day,Same Day
241,yekaterinburg,Yekaterinburg,Closed,Closed,Closed,Closed,Closed,Closed
242,P220,Yerevan,260 Days,120 Days,79 Days,1 Days,1 Days,1 Days


# Save data

## Getting current time

In [68]:
timezone = 'Asia/Kolkata'

In [69]:
current_time = datetime.now(pytz.timezone(timezone))
current_time

datetime.datetime(2022, 12, 2, 23, 46, 17, 831979, tzinfo=<DstTzInfo 'Asia/Kolkata' IST+5:30:00 STD>)

In [70]:
timestamp = current_time.strftime('%y-%m-%d_%H:%M:%S')
timestamp

'22-12-02_23:46:17'

## Save the file to csv and xlsx

In [71]:
df.to_excel(f'us_visa_app_times_{timestamp}.xlsx',index=False)
df.to_csv(f'us_visa_app_times_{timestamp}.csv',index=False)