Automatically creates shapefiles of lines from a starting shapefile of the Basel tram network, which is the input data.

In this case, the script below will produce two shapefiles:
- Slow-speed segments
- Construction sites (slow-speed and total-block.)

Both shapefiles contain segments (tracks) of the Basel tram network that are fictionally supposed to have had a slow-speed segment or a construction site.
Not only geometries are automatically created, but also two columns with the time period during which such segments were supposed to exist (e.g. the start and end date of a given construction site.)

In [2]:
#
# Import modules
#
import pandas as pd
import geopandas as gpd
import shapefile
from osgeo import osr 
from shapely.geometry import Point, shape, LineString
import matplotlib
import random
import time
from datetime import datetime, timezone, date, timedelta
from secrets import randbelow
import fiona


#
# Define functions
#
def makeDate(datestring):
    """Converts a string to a date

    Parameters
    ----------
    datestring : str
        The string that wants to be converted into date

    Returns
    -------
    date
        String converted into a date
    """

    date = pd.to_datetime(datestring)
    return date

def random_date(val):
    start_date = makeDate('2015-01-01').replace(day=1, month=1).toordinal()
    end_date = date.today().toordinal()
    random_day = date.fromordinal(random.randint(start_date, end_date))
    random_day = makeDate(random_day)
    return random_day

def SHPtoGeodataframe(shp):
    """Converts an ESRI shapefile to a Geopandas geodataframe
    
    Parameters
    ----------
    shp : str
        Name of the shapefile (without extension) to be converted into a GeoDataframe
        
    Returns
    -------
    gdf
        Converted Geopandas geodataframe from the input shapefile
    """
    # read the shapefile
    tracts = shapefile.Reader(shp+'.shp')
    
    # get the fields of the shapefile using a list comprehension
    fields = [field[0] for field in tracts.fields[1:]]
    
    # initalise two lists for the attributes and the geometries
    attributes = []
    geometry = []

    # append attributes and geometries
    for row in tracts.shapeRecords():
        geometry.append(shape(row.shape.__geo_interface__))
        attributes.append(dict(zip(fields, row.record)))
    
    # set the coordinates reference system for the data from the prj file
    with open(shp+'.prj') as p:
        proj4 = osr.SpatialReference(p.read()).ExportToProj4()
    # create the geodataframe
    gdf = gpd.GeoDataFrame(data=attributes, geometry=geometry, crs=proj4)
    return gdf


#
# Definition of the variables
#
gdf_net = SHPtoGeodataframe('basel_tram_network')


#
# Create the shapefiles of slow-speed segments and construction sites
#

## slow-speed segments
#initialise lists
index = []
lines = []

for i in range(len(gdf_net)):
    # Extract the point values that define the line
    xx, yy = gdf_net['geometry'][i].coords.xy
    start_i = int(random.randrange(len(xx))/2)
    end_i = random.randint(start_i,len(xx))
    if end_i-start_i <= 1:
        end_i = end_i + 2
    xcoord = xx[start_i:end_i]
    ycoord = yy[start_i:end_i]
    
    coordtuples = list(zip(xcoord, ycoord))
    line = LineString(coordtuples)
    if line.length >= 9 and line.length <= 1100:
        # append all the data
        lines.append(line)
    
# create the geodataframe
lines_gdf = gpd.GeoDataFrame(data=index, geometry=lines, crs=gdf_net.crs)
lines_gdf[['start_date', 'end_date']] = ''
lines_gdf['start_date'] = lines_gdf['start_date'].apply(random_date)
lines_gdf['end_date'] = lines_gdf['start_date'] + timedelta(days=randbelow(365))
lines_gdf[['start_date', 'end_date']] = lines_gdf[['start_date', 'end_date']].astype(str)
lines_gdf.sort_values(by=['start_date'], inplace=True)
# export the shapefile
lines_gdf.to_file('slow_tracks')

## construction sites
#initialise lists
index = []
lines = []

for i in range(len(gdf_net)):
    # Extract the point values that define the line
    xx, yy = gdf_net['geometry'][i].coords.xy
    start_i = int(random.randrange(len(xx))/2)
    end_i = random.randint(start_i,len(xx))
    if end_i-start_i <= 1:
        end_i = end_i + 2
    xcoord = xx[start_i:end_i]
    ycoord = yy[start_i:end_i]
    
    coordtuples = list(zip(xcoord, ycoord))
    line = LineString(coordtuples)
    if line.length >= 10 and line.length <= 3500:
        # append all the data
        lines.append(line)
    
# create the geodataframe
lines_gdf = gpd.GeoDataFrame(data=index, geometry=lines, crs=gdf_net.crs)
lines_gdf[['start_date', 'end_date', 'type']] = ''
lines_gdf['start_date'] = lines_gdf['start_date'].apply(random_date)
lines_gdf['end_date'] = lines_gdf['start_date'] + timedelta(days=randbelow(365))

lines_gdf[['start_date', 'end_date']] = lines_gdf[['start_date', 'end_date']].astype(str)


total_blocks = lines_gdf.sample(frac = 0.40) 
total_blocks['type'] = 'Total block'
slow_speed = lines_gdf.loc[~lines_gdf.index.isin(total_blocks.index)].copy()
slow_speed['type'] = 'Slow-speed'
lines_gdf = pd.concat([slow_speed, total_blocks], axis=0)
lines_gdf.sort_values(by=['start_date'], inplace=True)
# create shapefile
lines_gdf.to_file('construction_sites')

At the end, you will get two folders:
- slow_tracks
- construction_sites

In them, you will fild the shapefiles (.shp) and their auxilary files (.cpg, .dbf, .shx, .prj)