In [None]:
import urllib.request, json
import csv

# scraping data from GISAID data storage
user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'

url = "https://phylodynamics.pandemicprepardness.org/charon/getDataset?prefix=/SARS-CoV-2/Louisiana"
headers={'User-Agent':user_agent,} 

request=urllib.request.Request(url,None,headers) #The assembled request
response = urllib.request.urlopen(request)
data = response.read()
response.close

encoding = response.info().get_content_charset('utf-8')
JSON_object = json.loads(data.decode(encoding))

## Create dictionary for locational longitude and latitude
geo_dict = JSON_object['meta']['geo_resolutions'][0]['demes']

## function to get longiture, latitude information for each case
def get_long_lat(geodict, region):
    lat=0
    long=0
    if region in geodict.keys():
        coord = geodict[region]
        lat = coord['latitude']
        long = coord['longitude']
    elif region == 'Washington':
        lat = 47.751076
        long = -120.740135
    elif region == 'District of Columbia':
        lat = 38.942142
        long = -77.025955
    elif region == 'Montana /':
        coord = geodict['Montana']
        lat = coord['latitude']
        long = coord['longitude']
    elif region == 'Guam':
        lat = 13.264341
        long = 144.672104
    elif region == 'Virgin Islands of the U.S.':
        lat = 18.335765
        long = -64.896335
    elif region == 'Los Angeles':
        lat = 34.052235
        long = -118.243683
    return lat, long


# Store data from JSON format into edges format in .csv file
edges = []

## function to get information for each edge
def get_edges(treedict, geo_dict, parent=None):
    #name = next(iter(treedict.keys()))
    if 'Region' in treedict['node_attrs'].keys():
        region=treedict['node_attrs']['Region']['value']
    else:
        region = ' '
        lat = ' '
        long = ' '
        
    if region == 'Wuhan':
        lat,long = get_long_lat(geo_dict, 'Asia')
    elif region != ' ':
        lat,long = get_long_lat(geo_dict, region)
        
    if 'covv_lineage' in treedict['node_attrs'].keys():
        lineage = treedict['node_attrs']['covv_lineage']['value']
    else:
        lineage = ' '
        
    if 'covv_gender' in treedict['node_attrs'].keys():
        gender = treedict['node_attrs']['covv_gender']['value']
    else:
        gender = ' '
        
    if 'covv_npatient_age' in treedict['node_attrs'].keys():
        age = treedict['node_attrs']['covv_npatient_age']['value']
    else:
        age = ' '
        
    if 'covv_orig_lab' in treedict['node_attrs'].keys():
        ori_lab = treedict['node_attrs']['covv_orig_lab']['value']
    else:
        ori_lab = ' '
        
    if 'covv_subm_lab' in treedict['node_attrs'].keys():
        sub_lab = treedict['node_attrs']['covv_subm_lab']['value']
    else:
        sub_lab = ' '

    name = treedict['name']+\
    ';'+region+\
    ';'+lineage+\
    ';'+str(treedict['node_attrs']['num_date']['value'])+';'+str(lat)+';'+str(long)+\
    ';'+gender+\
    ';'+age+\
    ';'+ori_lab+\
    ';'+sub_lab
    
    ### getting direct parent node in recursive depth-first search (DFS) algortihm    
    if parent is not None:
        edges.append((parent, name))
    if 'children' in treedict.keys():
        for item in treedict['children']:
            if isinstance(item, dict):
                get_edges(item, geo_dict, parent=name)
            elif isinstance(item, list):
                for el in item:
                    edges.append((parent, el))
            else:
                edges.append((name, item))
    else:
        pass
        #print(name)
         
## calling function to transcribe data from JSON to array
get_edges(JSON_object['tree'], geo_dict, parent=None)

# Using graph function to output as graph tree and graph file (compatible with Gephi)
import graphviz

d = graphviz.Digraph()
for row in edges:
    d.edge(row[0],row[1])
    
outfile_name = 'test-output/phylogeny_tree_LA.gv'
d.render(outfile_name, view=False) 