DISCLAIMER: This technique of sampling svg elements for use in tableau is based on the code of Tristan Guillevin. You can reach him on twitter : @ladataviz

NOTE: I have done some preprocessing which might not be mentioned anywhere in the documentation. This is that I deleted the unncessary rectagle elements in the svg files whenever required. These rectangles include ones such as -  the svg output of R or Python will have an all enclosing rectagle which interferes with the output of the https://fskpf.github.io/, the out put of https://fskpf.github.io/ might have all enclosing rectangles or other unncessary svg element(s). You might want to identify and take care of those while inspecting the svg element.

Although the below code is specifically written for the graphs needed in my viz, but you will be able to adapt the technique for any shape. Reach me at @noel_jbasil on twitter for any help.

In [1]:
#Importing required libraries
import numpy as np
import pandas as pd
from xml.dom import minidom
import os
import svg.path as svg

In [2]:
#Code to create tableau backend for Bubble charts

os.chdir('D:/#Viz for social good/Academics without Borders/OutRough/packed bubble')

#Get all svg files in the folder
for filename in os.listdir():
    if filename.endswith(".svg"): 
        doc=minidom.parse(filename)

        #Get all the g tags. Note: g tags contain the paths
        rects = doc.getElementsByTagName("g")

        #Instantiate a list to save the paths in each groups aka 'g tags'
        paths=[]

        #Loop through all the g tags
        for rect in rects:
                if rect.getAttribute("clip-path") != "":    #Paths for each shape is in g tags with 'clip-path' attribute
                    paths.append(rect.getElementsByTagName("path")) 

        #Above 'For loop' creates 'paths' which is a list of lists. 
        #This is because each g tag contain two paths - one for outer boundary and other for sketchy fill lines of each shape

        #Next task is to seperate out the paths of outer boundary and inner sketchy fill into two lists

        #Instatntiate two lists for the same
        out_paths=[]
        in_paths=[]

        #Loop through all the paths
        for path in paths:
            in_paths.append(path[0].getAttribute('d'))                  #d attribute contains the string that defines the path
            out_paths.append(path[1].getAttribute('d'))

        #Now that we have separated out the d strings, we have to parse them back to path objects inorder to do further   
        #manipulations

        #Instatiate new lists to save parsed d strings
        out_paths_parsed=[]
        in_paths_parsed=[]

        #loop over each list and parse each d string element
        for i in range(0,len(out_paths)):
            out_paths_parsed.append(svg.parse_path(out_paths[i]))
            in_paths_parsed.append(svg.parse_path(in_paths[i]))

        #Before we head out and sample the paths, we have to identify paths which have to be sampled at a higher rate (larger 
        #circles of other shapes) and the ones which have to sampled at a lower rate (small shapes). For this, we utilize the 
        #length of the inner path

        len_tag=[]
        for p in in_paths:
            if len(p)>14000:
                len_tag.append(1)
            else:
                len_tag.append(0)

        #Now let sample points from each path separately. First up the outer paths

        #Instantiate arrays for X cordinate,Y cordinate, Shape indicator (i.e, first bar, second bar etc in a bar chart), and 
        #order for plotting in tableau
        out_x=[]
        out_y=[]
        out_shape=[]
        out_order=[]

        for i in range(0,len(out_paths_parsed)):                            #looping through all the paths in the array
            if len_tag[i]==1:
                for o,j in enumerate(np.arange(0,1,1/4500),1):              #sampling 6000 points from each path
                    out_x.append(out_paths_parsed[i].point(j).real)         # extracting x coordinate; which is the real part
                    out_y.append(out_paths_parsed[i].point(j).imag)         # extracting y coordinate; which is the imag part
                    out_shape.append(i)                                     #Update shape list
                    out_order.append(o)                                     #Update order list
            else:
                for o,j in enumerate(np.arange(0,1,1/1500),1):               #sampling 500 points from each path
                    out_x.append(out_paths_parsed[i].point(j).real)         # extracting x coordinate; which is the real part
                    out_y.append(out_paths_parsed[i].point(j).imag)         # extracting y coordinate; which is the imag part
                    out_shape.append(i)                                     #Update shape list
                    out_order.append(o)                                     #Update order list

        #Now lets save the extracted information as a dataframe
        Outer_xy = pd.DataFrame({"shape":out_shape,"order":out_order,"x coordinate":out_x,"y coordinate":out_y})

        #Next up, lets recreate the above code for inner paths
        in_x=[]
        in_y=[]
        in_shape=[]
        in_order=[]
        for i in range(0,len(in_paths_parsed)):
            if len_tag[i]==1:        
                for o,j in enumerate(np.arange(0,1,1/4500),1):          #Using arange because range() only accepts integers 
                    in_x.append(in_paths_parsed[i].point(j).real)
                    in_y.append(in_paths_parsed[i].point(j).imag)
                    in_shape.append(i+2000)                  # Added 2000 just to distinguish it from the shapes of out path
                    in_order.append(o)
            else:
                for o,j in enumerate(np.arange(0,1,1/1500),1):
                    in_x.append(in_paths_parsed[i].point(j).real)
                    in_y.append(in_paths_parsed[i].point(j).imag)
                    in_shape.append(i+2000)                 # Added 2000 just to distinguish it from the shapes of out path
                    in_order.append(o)

        #Now lets save this information as a dataframe as well
        Inner_xy = pd.DataFrame({"shape":in_shape,"order":in_order,"x coordinate":in_x,"y coordinate":in_y})

        #Lets combine both the dataframes and export as a csv

        Sketchy_style_backend=Outer_xy.append(Inner_xy)
        Sketchy_style_backend.to_csv(f'D:/#Viz for social good/Academics without Borders/OutData/{os.path.splitext(filename)[0]}_tab_backend.csv',index=False)

In [None]:
#Code to create tableau backend for horizontal bar chart

#Load and parse SVG file into Python
doc = minidom.parse("D:/#Viz for social good/Academics without Borders/OutRough/Stack horiz bar/Pricing_rough.svg")

#Get all the g tags. Note: g tags contain the paths
rects = doc.getElementsByTagName("g")

#Instantiate a list to save the paths in each groups aka 'g tags'
paths=[]

#Loop through all the g tags
for rect in rects:
        if rect.getAttribute("clip-path") != "":    #Paths for each shape is in g tags with 'clip-path' attribute
            paths.append(rect.getElementsByTagName("path")) 

#Above 'For loop' creates 'paths' which is a list of lists. 
#This is because each g tag contain two paths - one for outer boundary and other for sketchy fill lines of each shape

#Next task is to seperate out the paths of outer boundary and inner sketchy fill into two lists

#Instatntiate two lists for the same
out_paths=[]
in_paths=[]

#Loop through all the paths
for path in paths:
    in_paths.append(path[0].getAttribute('d'))                  #d attribute contains the string that defines the path
    out_paths.append(path[1].getAttribute('d'))

#Now that we have separated out the d strings, we have to parse them back to path objects inorder to do further   
#manipulations

#Instatiate new lists to save parsed d strings
out_paths_parsed=[]
in_paths_parsed=[]

#loop over each list and parse each d string element
for i in range(0,len(out_paths)):
    out_paths_parsed.append(svg.parse_path(out_paths[i]))
    in_paths_parsed.append(svg.parse_path(in_paths[i]))

#Now let sample points from each path separately. First up the outer paths

#Instantiate arrays for X cordinate,Y cordinate, Shape indicator (i.e, first bar, second bar etc in a bar chart), and order 
#for plotting in tableau

out_x=[]
out_y=[]
out_shape=[]
out_order=[]

for i in range(0,len(out_paths_parsed)):                        #looping through all the paths in the array
    for o,j in enumerate(np.arange(0,1,1/4500),1):              #sampling 4500 points from each path
        out_x.append(out_paths_parsed[i].point(j).real)         # extracting x coordinate; which is the real part
        out_y.append(out_paths_parsed[i].point(j).imag)         # extracting y coordinate; which is the imag part
        out_shape.append(i)                                     #Update shape list
        out_order.append(o)                                     #Update order list

#Now lets save the extracted information as a dataframe
Outer_xy = pd.DataFrame({"shape":out_shape,"order":out_order,"x coordinate":out_x,"y coordinate":out_y})

#Next up, lets recreate the above code for inner paths

in_x=[]
in_y=[]
in_shape=[]
in_order=[]
for i in range(0,len(in_paths_parsed)):
    for o,j in enumerate(np.arange(0,1,1/4500),1):              #Using arange because range() only accepts integers 
        in_x.append(in_paths_parsed[i].point(j).real)
        in_y.append(in_paths_parsed[i].point(j).imag)
        in_shape.append(i+20)                                   # Added 20 just to distinguish it from the shapes of out path
        in_order.append(o)

#Now lets save this information as a dataframe as well
Inner_xy = pd.DataFrame({"shape":in_shape,"order":in_order,"x coordinate":in_x,"y coordinate":in_y})

#Lets combine both the dataframes and export as a csv

Sketchy_style_backend=Outer_xy.append(Inner_xy)
Sketchy_style_backend.to_csv('D:/#Viz for social good/Academics without Borders/OutData/Pricing_rough_tab_backend.csv',index=False)

In [None]:
#Code to create tableau backend for network chart

#-------------------------------------------Code for VI model network graph-------------------------------------------------


#We will run this code separately for each network graph because this code involves hardcoding based on number of edges and nodes

#Lets start with VI model first

#Load and parse SVG file into Python
doc = minidom.parse("D:/#Viz for social good/Academics without Borders/OutRough/network/VI model_rough_cleaned.svg")

#Get all the g tags. Note: g tags contain the paths
rects = doc.getElementsByTagName("g")

#Instantiate a list to save the paths in each groups aka 'g tags'
paths=[]

#Loop through all the g tags
for rect in rects:
        paths.append(rect.getElementsByTagName("path")) 
            
#In this code, we will have to do some hard coding regretfully. In my inspection of the svg file in HTMl format, I understood that for first set of g tags are for edges(aka lines) of the networks, next set are for the arrow tips of the edges in the respective order and the final set are for the nodes.Hence we will have to write the code according because, these three types of elements require different types of preprocessing as follows:
#                    Lines  - No need for preprocessing, directly. But need to slightly modify the code used above, as there is 
#                             only one element in the g tag rather than two
#                    Arrows - The first path in the g tag is an enclosing rectangle and needs to be discarded. This leaves us 
#                             with onle one path per g tag and hence will the same code modification mentioned for lines
#                    Nodes  - Same code to be used as the the stacked rectangle as it contains two paths - one for outer 
#                             background and the other for cross-hatch fill

#Removing unecessary enclosing rectangles from the paths for arrows
#Note: Since we have 6 edges and 7 nodes, first six paths will be edges, next six will be arrows and the next 7 will be nodes
for i in range(0,len(paths)):
    if i>=6 and i<12:             
        paths[i]=[paths[i][1]]
    else:
        paths[i]=paths[i]

#Now let us separate out paths for edges(edges are made up of lines and arrows) and nodes

edges_paths  =paths[0:12]
node_paths  =paths[12:]

# let us do further processing first for edges path and then for node paths

#Let us separate out the d strings, and parse them back to path objects inorder to do further   
#manipulations

#Note: For edges paths, we do not need to separate out the paths into outer and inner like previously as there is just a line and there wont be separate outline and fill paths

#Instantiate lists to save path objects:
edges_paths_parsed = []

#loop over list and get each d string element and convert it into path objects
for path in edges_paths:
    edges_paths_parsed.append(svg.parse_path(path[0].getAttribute('d'))) #d attribute contains the string that defines the path

#Now let sample points from the paths

#Instantiate arrays for X cordinate,Y cordinate, Shape indicator (i.e, first bar, second bar etc in a bar chart), and order 
#for plotting in tableau

x=[]
y=[]
shape=[]
order=[]

for i in range(0,len(edges_paths_parsed)):                    #looping through all the paths in the array
    for o,j in enumerate(np.arange(0,1,1/1500),1):             #sampling 4500 points from each path
        x.append(edges_paths_parsed[i].point(j).real)         # extracting x coordinate; which is the real part
        y.append(edges_paths_parsed[i].point(j).imag)         # extracting y coordinate; which is the imag part
        shape.append(i)                                       #Update shape list
        order.append(o)                                       #Update order list

#Now lets save the extracted information as a dataframe
Edges_xy = pd.DataFrame({"shape":shape,"order":order,"x coordinate":x,"y coordinate":y})

#Now let us do the processing for nodes

#Since the Node path contain outline and fill paths, first task is to seperate them out into two lists

#Instatntiate two lists for the same
out_paths=[]
in_paths=[]

#Loop through all the paths
for path in node_paths:
    in_paths.append(path[0].getAttribute('d'))                  #d attribute contains the string that defines the path
    out_paths.append(path[1].getAttribute('d'))

#Now that we have separated out the d strings, we have to parse them back to path objects inorder to do further   
#manipulations

#Instatiate new lists to save parsed d strings
out_paths_parsed=[]
in_paths_parsed=[]

#loop over each list and parse each d string element
for i in range(0,len(out_paths)):
    out_paths_parsed.append(svg.parse_path(out_paths[i]))
    in_paths_parsed.append(svg.parse_path(in_paths[i]))

#Before we head out and sample the paths, we have to identify paths which have to be sampled at a higher rate (larger 
#circles of other shapes) and the ones which have to sampled at a lower rate (small shapes). For this, we utilize the 
#length of the inner path

len_tag=[]
for p in in_paths:
    if len(p)>14000:
        len_tag.append(1)
    else:
        len_tag.append(0)

#Now let sample points from each path separately. First up the outer paths

#Instantiate arrays for X cordinate,Y cordinate, Shape indicator (i.e, first bar, second bar etc in a bar chart), and 
#order for plotting in tableau
out_x=[]
out_y=[]
out_shape=[]
out_order=[]

for i in range(0,len(out_paths_parsed)):                            #looping through all the paths in the array
    if len_tag[i]==1:
        for o,j in enumerate(np.arange(0,1,1/6500),1):              #sampling 4500 points from each path
            out_x.append(out_paths_parsed[i].point(j).real)         # extracting x coordinate; which is the real part
            out_y.append(out_paths_parsed[i].point(j).imag)         # extracting y coordinate; which is the imag part
            out_shape.append(i+2000)                        # Added 2000 just to distinguish it from the shapes of out path
            out_order.append(o)                                     #Update order list
    else:
        for o,j in enumerate(np.arange(0,1,1/1500),1):               #sampling 500 points from each path
            out_x.append(out_paths_parsed[i].point(j).real)         # extracting x coordinate; which is the real part
            out_y.append(out_paths_parsed[i].point(j).imag)         # extracting y coordinate; which is the imag part
            out_shape.append(i+2000)                        # Added 2000 just to distinguish it from the shapes of out path
            out_order.append(o)                                     #Update order list

#Now lets save the extracted information as a dataframe
Outer_xy = pd.DataFrame({"shape":out_shape,"order":out_order,"x coordinate":out_x,"y coordinate":out_y})

#Next up, lets recreate the above code for inner paths
in_x=[]
in_y=[]
in_shape=[]
in_order=[]
for i in range(0,len(in_paths_parsed)):
    if len_tag[i]==1:        
        for o,j in enumerate(np.arange(0,1,1/6500),1):          #Using arange because range() only accepts integers 
            in_x.append(in_paths_parsed[i].point(j).real)
            in_y.append(in_paths_parsed[i].point(j).imag)
            in_shape.append(i+4000)                  # Added 4000 just to distinguish it from the shapes of out path
            in_order.append(o)
    else:
        for o,j in enumerate(np.arange(0,1,1/1500),1):
            in_x.append(in_paths_parsed[i].point(j).real)
            in_y.append(in_paths_parsed[i].point(j).imag)
            in_shape.append(i+4000)                 # Added 4000 just to distinguish it from the shapes of out path
            in_order.append(o)

#Now lets save this information as a dataframe as well
Inner_xy = pd.DataFrame({"shape":in_shape,"order":in_order,"x coordinate":in_x,"y coordinate":in_y})

#Lets combine both the dataframes and export as a csv

Nodes_xy=Outer_xy.append(Inner_xy)

Network_xy = Edges_xy.append(Nodes_xy)

Network_xy.to_csv(f'D:/#Viz for social good/Academics without Borders/OutData/VI Model_tab_backend.csv',index=False)

#-------------------------------------------Code for Teach the Teacher model network graph-------------------------------------

#Now let us do the same procedure for the network graph for Teach the Teacher model
#Lets start with VI model first

#Load and parse SVG file into Python
doc = minidom.parse("D:/#Viz for social good/Academics without Borders/OutRough/network/TtT model_rough_cleaned.svg")

#Get all the g tags. Note: g tags contain the paths
rects = doc.getElementsByTagName("g")

#Instantiate a list to save the paths in each groups aka 'g tags'
paths=[]

#Loop through all the g tags
for rect in rects:
        paths.append(rect.getElementsByTagName("path")) 
            
#In this code, we will have to do some hard coding regretfully. In my inspection of the svg file in HTMl format, I understood that for first set of g tags are for edges(aka lines) of the networks, next set are for the arrow tips of the edges in the respective order and the final set are for the nodes.Hence we will have to write the code according because, these three types of elements require different types of preprocessing as follows:
#                    Lines  - No need for preprocessing, directly. But need to slightly modify the code used above, as there is 
#                             only one element in the g tag rather than two
#                    Arrows - The first path in the g tag is an enclosing rectangle and needs to be discarded. This leaves us 
#                             with onle one path per g tag and hence will the same code modification mentioned for lines
#                    Nodes  - Same code to be used as the the stacked rectangle as it contains two paths - one for outer 
#                             background and the other for cross-hatch fill

#Removing unecessary enclosing rectangles from the paths for arrows
#Note: Since we have 21 edges and 22 nodes, first six paths will be lines, next six will be arrows and the next 7 will be nodes
for i in range(0,len(paths)):
    if i>=21 and i<42:             
        paths[i]=[paths[i][1]]
    else:
        paths[i]=paths[i]

#Now let us separate out paths for edges(edges are made up of lines and arrows) and nodes

edges_paths  =paths[0:42]
node_paths  =paths[42:]

# let us do further processing first for edges path and then for node paths

#Let us separate out the d strings, and parse them back to path objects inorder to do further   
#manipulations

#Note: For edges paths, we do not need to separate out the paths into outer and inner like previously as there is just a line and there wont be separate outline and fill paths

#Instantiate lists to save path objects:
edges_paths_parsed = []

#loop over list and get each d string element and convert it into path objects
for path in edges_paths:
    edges_paths_parsed.append(svg.parse_path(path[0].getAttribute('d'))) #d attribute contains the string that defines the path

#Now let sample points from the paths

#Instantiate arrays for X cordinate,Y cordinate, Shape indicator (i.e, first bar, second bar etc in a bar chart), and order 
#for plotting in tableau

x=[]
y=[]
shape=[]
order=[]

for i in range(0,len(edges_paths_parsed)):                    #looping through all the paths in the array
    for o,j in enumerate(np.arange(0,1,1/1500),1):             #sampling 500 points from each path
        x.append(edges_paths_parsed[i].point(j).real)         # extracting x coordinate; which is the real part
        y.append(edges_paths_parsed[i].point(j).imag)         # extracting y coordinate; which is the imag part
        shape.append(i)                                       #Update shape list
        order.append(o)                                       #Update order list

#Now lets save the extracted information as a dataframe
Edges_xy = pd.DataFrame({"shape":shape,"order":order,"x coordinate":x,"y coordinate":y})

#Now let us do the processing for nodes

#Since the Node path contain outline and fill paths, first task is to seperate them out into two lists

#Instatntiate two lists for the same
out_paths=[]
in_paths=[]

#Loop through all the paths
for path in node_paths:
    in_paths.append(path[0].getAttribute('d'))                  #d attribute contains the string that defines the path
    out_paths.append(path[1].getAttribute('d'))

#Now that we have separated out the d strings, we have to parse them back to path objects inorder to do further   
#manipulations

#Instatiate new lists to save parsed d strings
out_paths_parsed=[]
in_paths_parsed=[]

#loop over each list and parse each d string element
for i in range(0,len(out_paths)):
    out_paths_parsed.append(svg.parse_path(out_paths[i]))
    in_paths_parsed.append(svg.parse_path(in_paths[i]))

#Before we head out and sample the paths, we have to identify paths which have to be sampled at a higher rate (larger 
#circles of other shapes) and the ones which have to sampled at a lower rate (small shapes). For this, we utilize the 
#length of the inner path

len_tag=[]
for p in in_paths:
    if len(p)>14000:
        len_tag.append(1)
    else:
        len_tag.append(0)

#Now let sample points from each path separately. First up the outer paths

#Instantiate arrays for X cordinate,Y cordinate, Shape indicator (i.e, first bar, second bar etc in a bar chart), and 
#order for plotting in tableau
out_x=[]
out_y=[]
out_shape=[]
out_order=[]

for i in range(0,len(out_paths_parsed)):                            #looping through all the paths in the array
    if len_tag[i]==1:
        for o,j in enumerate(np.arange(0,1,1/6500),1):              #sampling 6000 points from each path
            out_x.append(out_paths_parsed[i].point(j).real)         # extracting x coordinate; which is the real part
            out_y.append(out_paths_parsed[i].point(j).imag)         # extracting y coordinate; which is the imag part
            out_shape.append(i+2000)                        # Added 2000 just to distinguish it from the shapes of out path
            out_order.append(o)                                     #Update order list
    else:
        for o,j in enumerate(np.arange(0,1,1/1000),1):               #sampling 500 points from each path
            out_x.append(out_paths_parsed[i].point(j).real)         # extracting x coordinate; which is the real part
            out_y.append(out_paths_parsed[i].point(j).imag)         # extracting y coordinate; which is the imag part
            out_shape.append(i+2000)                        # Added 2000 just to distinguish it from the shapes of out path
            out_order.append(o)                                     #Update order list

#Now lets save the extracted information as a dataframe
Outer_xy = pd.DataFrame({"shape":out_shape,"order":out_order,"x coordinate":out_x,"y coordinate":out_y})

#Next up, lets recreate the above code for inner paths
in_x=[]
in_y=[]
in_shape=[]
in_order=[]
for i in range(0,len(in_paths_parsed)):
    if len_tag[i]==1:        
        for o,j in enumerate(np.arange(0,1,1/6500),1):          #Using arange because range() only accepts integers 
            in_x.append(in_paths_parsed[i].point(j).real)
            in_y.append(in_paths_parsed[i].point(j).imag)
            in_shape.append(i+4000)                  # Added 4000 just to distinguish it from the shapes of out path
            in_order.append(o)
    else:
        for o,j in enumerate(np.arange(0,1,1/1000),1):
            in_x.append(in_paths_parsed[i].point(j).real)
            in_y.append(in_paths_parsed[i].point(j).imag)
            in_shape.append(i+4000)                 # Added 4000 just to distinguish it from the shapes of out path
            in_order.append(o)

#Now lets save this information as a dataframe as well
Inner_xy = pd.DataFrame({"shape":in_shape,"order":in_order,"x coordinate":in_x,"y coordinate":in_y})

#Lets combine both the dataframes and export as a csv

Nodes_xy=Outer_xy.append(Inner_xy)

Network_xy = Edges_xy.append(Nodes_xy)

Network_xy.to_csv(f'D:/#Viz for social good/Academics without Borders/OutData/TtT Model_tab_backend.csv',index=False)