# Genrate basic trip metrics for a random population for the MATSim model.

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.geometry import Point, Polygon, MultiPolygon
import random

# Load the OD matrix
od_matrix = pd.read_csv(r"C:\Users\XXXX\Downloads\test-2\od.csv")

# Drop rows where 'n_crisis' is null
od_matrix = od_matrix.dropna(subset=['n_crisis'])

# Convert start_quad and end_quadke to strings and ensure they are 14 digits long
od_matrix['start_quad'] = od_matrix['start_quad'].apply(lambda x: f"{int(x):014d}")
od_matrix['end_quadke'] = od_matrix['end_quadke'].apply(lambda x: f"{int(x):014d}")
od_matrix['n_crisis'] = od_matrix['n_crisis'].astype(int)

# Load the shapefile
shapefile = gpd.read_file(r"C:\Users\mafgo\Downloads\test-2\base_quad.shp")

# Ensure that the quad_id is also formatted as a 14-digit string
shapefile['quad_id'] = shapefile['quad_id'].apply(lambda x: f"{int(x):014d}")

# Multiply n_crisis by 2.08 and round to integer
od_matrix['new_n_crisis'] = (od_matrix['n_crisis'] * 2.08).round().astype(int)

# Function to generate random points within a polygon
def generate_random_points(polygon, num_points):
    min_x, min_y, max_x, max_y = polygon.bounds
    points = []
    while len(points) < num_points:
        random_point = Point([random.uniform(min_x, max_x), random.uniform(min_y, max_y)])
        if polygon.contains(random_point):
            points.append(random_point)
    return points

# Initialize a list to hold the final output data
output_data = []

# Loop over each row in the OD matrix
for idx, row in od_matrix.iterrows():
    start_quad = row['start_quad']
    end_quadke = row['end_quadke']
    new_n_crisis = row['new_n_crisis']

    # Check if start_quad and end_quadke exist in the shapefile
    if start_quad not in shapefile['quad_id'].values or end_quadke not in shapefile['quad_id'].values:
        print(f"Skipping row {idx} because start_quad {start_quad} or end_quadke {end_quadke} not found in shapefile.")
        continue

    try:
        # Find the geometries for start and end quads
        start_geom = shapefile[shapefile['quad_id'] == start_quad].geometry.values[0]
        end_geom = shapefile[shapefile['quad_id'] == end_quadke].geometry.values[0]

        # If the geometry is MultiPolygon, use the largest polygon for simplicity
        if isinstance(start_geom, MultiPolygon):
            start_geom = max(start_geom, key=lambda a: a.area)
        if isinstance(end_geom, MultiPolygon):
            end_geom = max(end_geom, key=lambda a: a.area)

        # Generate random points within the start and end quadrants
        start_points = generate_random_points(start_geom, new_n_crisis)
        end_points = generate_random_points(end_geom, new_n_crisis)

        # Create the output rows
        for i in range(new_n_crisis):
            output_data.append({
                'agent_id': len(output_data) + 1,
                'n_crisis': new_n_crisis,
                'start_quad': start_quad,
                'end_quadke': end_quadke,
                'x1': start_points[i].x,
                'y1': start_points[i].y,
                'x2': end_points[i].x,
                'y2': end_points[i].y
            })
    except Exception as e:
        print(f"Error processing row {idx} with start_quad {start_quad} and end_quadke {end_quadke}: {e}")

# Convert output data to DataFrame
output_df = pd.DataFrame(output_data)

# Save the output to a CSV file
output_df.to_csv(r"C:\Users\XXXX\Downloads\test-2\disaggregated_od.csv", index=False)

print("Process completed successfully.")


Process completed successfully.


# Assigning the timestamp for each generated trip.

In [2]:
import random
import datetime

# Define the start and end times
start_time = "07:00:00"
end_time = "09:59:59"

# Parse the time strings
start = datetime.datetime.strptime(start_time, "%H:%M:%S")
end = datetime.datetime.strptime(end_time, "%H:%M:%S")

# Add a random time frame to each entry in the DataFrame
for i in range(len(output_df)):
    # Generate a random time delta between the start and end times
    delta = end - start
    int_delta = (delta.days * 24 * 60 * 60) + delta.seconds
    random_seconds = random.randint(0, int_delta)
    
    # Add the random time delta to the start time
    random_time = start + datetime.timedelta(seconds=random_seconds)
    
    # Format the output
    random_time_formatted = random_time.strftime("%H:%M:%S")
    
    # Add the random time to the corresponding row in the DataFrame
    output_df.at[i, 'time'] = random_time_formatted


In [5]:
output_df 

Unnamed: 0,agent_id,n_crisis,start_quad,end_quadke,x1,y1,x2,y2,time
0,1,29,12212101212100,12212101210322,3.337362e+06,3.657167e+06,3.337312e+06,3.659500e+06,08:53:16
1,2,29,12212101212100,12212101210322,3.337332e+06,3.658099e+06,3.336798e+06,3.661461e+06,07:36:51
2,3,29,12212101212100,12212101210322,3.337825e+06,3.657038e+06,3.337025e+06,3.661552e+06,07:47:48
3,4,29,12212101212100,12212101210322,3.337866e+06,3.656863e+06,3.336989e+06,3.660068e+06,08:41:14
4,5,29,12212101212100,12212101210322,3.336586e+06,3.657604e+06,3.338752e+06,3.659577e+06,07:05:15
...,...,...,...,...,...,...,...,...,...
478738,478739,23,12212101210302,12212101212100,3.337704e+06,3.665570e+06,3.338314e+06,3.656777e+06,07:56:19
478739,478740,23,12212101210302,12212101212100,3.338529e+06,3.664298e+06,3.337621e+06,3.657467e+06,09:10:37
478740,478741,23,12212101210302,12212101212100,3.337353e+06,3.665439e+06,3.337393e+06,3.658560e+06,08:18:31
478741,478742,23,12212101210302,12212101212100,3.336953e+06,3.664676e+06,3.336460e+06,3.657558e+06,08:18:21


# Assigning the modal split for each generated trip.

In [9]:
import numpy as np

# Define the list of the used transportation systems.
trans_mode = ["car", "pt"]

# Assign the values randomly to the DataFrame.
for row in range(output_df.shape[0]):
    output_df.at[row, "trans_mode"] = np.random.choice(trans_mode, p=[0.4337, 0.5663])


In [10]:
output_df

Unnamed: 0,agent_id,n_crisis,start_quad,end_quadke,x1,y1,x2,y2,time,trans_mode
0,1,29,12212101212100,12212101210322,3.337362e+06,3.657167e+06,3.337312e+06,3.659500e+06,08:53:16,pt
1,2,29,12212101212100,12212101210322,3.337332e+06,3.658099e+06,3.336798e+06,3.661461e+06,07:36:51,pt
2,3,29,12212101212100,12212101210322,3.337825e+06,3.657038e+06,3.337025e+06,3.661552e+06,07:47:48,pt
3,4,29,12212101212100,12212101210322,3.337866e+06,3.656863e+06,3.336989e+06,3.660068e+06,08:41:14,car
4,5,29,12212101212100,12212101210322,3.336586e+06,3.657604e+06,3.338752e+06,3.659577e+06,07:05:15,pt
...,...,...,...,...,...,...,...,...,...,...
478738,478739,23,12212101210302,12212101212100,3.337704e+06,3.665570e+06,3.338314e+06,3.656777e+06,07:56:19,pt
478739,478740,23,12212101210302,12212101212100,3.338529e+06,3.664298e+06,3.337621e+06,3.657467e+06,09:10:37,pt
478740,478741,23,12212101210302,12212101212100,3.337353e+06,3.665439e+06,3.337393e+06,3.658560e+06,08:18:31,pt
478741,478742,23,12212101210302,12212101212100,3.336953e+06,3.664676e+06,3.336460e+06,3.657558e+06,08:18:21,car


# Distribution of activities to origin and destination based on the collected Survery data.

In [None]:
# Adding the value "home" to a new attribute "act_type_origin".
for row in range(output_df.shape[0]):
    output_df.at[row, "act_type_origin"] = "home"

# Defining the list of activities assigned to the new attribute "act_type_destination."
act_type_destination = ["work", "business", "education", "shopping", "leisure"]

# Assign the values randomly to the DataFrame.
for row in range(output_df.shape[0]):
    output_df.at[row, "act_type_destination"] = np.random.choice(
        act_type_destination, p=[0.503, 0.043, 0.433, 0.008, 0.013]
    )


In [14]:
output_df

Unnamed: 0,agent_id,n_crisis,start_quad,end_quadke,x1,y1,x2,y2,time,trans_mode,act_type_origin,act_type_destination
0,1,29,12212101212100,12212101210322,3.337362e+06,3.657167e+06,3.337312e+06,3.659500e+06,08:53:16,pt,home,education
1,2,29,12212101212100,12212101210322,3.337332e+06,3.658099e+06,3.336798e+06,3.661461e+06,07:36:51,pt,home,work
2,3,29,12212101212100,12212101210322,3.337825e+06,3.657038e+06,3.337025e+06,3.661552e+06,07:47:48,pt,home,work
3,4,29,12212101212100,12212101210322,3.337866e+06,3.656863e+06,3.336989e+06,3.660068e+06,08:41:14,car,home,work
4,5,29,12212101212100,12212101210322,3.336586e+06,3.657604e+06,3.338752e+06,3.659577e+06,07:05:15,pt,home,education
...,...,...,...,...,...,...,...,...,...,...,...,...
478738,478739,23,12212101210302,12212101212100,3.337704e+06,3.665570e+06,3.338314e+06,3.656777e+06,07:56:19,pt,home,work
478739,478740,23,12212101210302,12212101212100,3.338529e+06,3.664298e+06,3.337621e+06,3.657467e+06,09:10:37,pt,home,education
478740,478741,23,12212101210302,12212101212100,3.337353e+06,3.665439e+06,3.337393e+06,3.658560e+06,08:18:31,pt,home,work
478741,478742,23,12212101210302,12212101212100,3.336953e+06,3.664676e+06,3.336460e+06,3.657558e+06,08:18:21,car,home,education


# Convert to MATSim Syntataic Population

In [16]:
import pandas as pd
import xml.etree.ElementTree as ET


df = output_df

# Create the root element for the MATSim population XML
population = ET.Element("population")

# Iterate over each row in the DataFrame to create MATSim person entries
for _, row in df.iterrows():
    person = ET.SubElement(population, "person", id=str(row['agent_id']))
    plan = ET.SubElement(person, "plan", selected="yes")
    
    # Create origin activity
    origin_activity = ET.SubElement(
        plan, "activity", 
        type=row['act_type_origin'], 
        x=str(row['x1']), y=str(row['y1']), 
        end_time=row['time']
    )
    
    # Add leg for the trip
    ET.SubElement(plan, "leg", mode=row['trans_mode'])
    
    # Create destination activity
    destination_activity = ET.SubElement(
        plan, "activity", 
        type=row['act_type_destination'], 
        x=str(row['x2']), y=str(row['y2'])
    )

# Convert the population element tree to a string and save to an XML file
output_file_path = r"C:\Users\XXXX\MATSim.xml"
tree = ET.ElementTree(population)
tree.write(output_file_path)

print(f"MATSim population file has been generated and saved to {output_file_path}")


MATSim population file has been generated and saved to C:\Users\mafgo\Downloads\MATSim.xml


# Display Part of the Population

In [20]:
import xml.etree.ElementTree as ET
import xml.dom.minidom as minidom

# Function to pretty print an XML element
def pretty_print_element(element):
    # Convert the element to a string
    rough_string = ET.tostring(element, 'utf-8')
    # Parse the string into a DOM object
    reparsed = minidom.parseString(rough_string)
    # Return the pretty-printed XML string
    return reparsed.toprettyxml(indent="  ")

# Iterate through the first 3 persons and pretty print them
for person in population.findall('person')[:3]:  # Change the range as needed
    pretty_person_str = pretty_print_element(person)
    print(pretty_person_str)
    print('---')  # Separator between persons


<?xml version="1.0" ?>
<person id="1">
  <plan selected="yes">
    <activity type="home" x="3337362.4904898363" y="3657166.860519129" end_time="08:53:16"/>
    <leg mode="pt"/>
    <activity type="education" x="3337311.8246361203" y="3659500.129212097"/>
  </plan>
</person>

---
<?xml version="1.0" ?>
<person id="2">
  <plan selected="yes">
    <activity type="home" x="3337331.846059297" y="3658098.641626134" end_time="07:36:51"/>
    <leg mode="pt"/>
    <activity type="work" x="3336797.86084589" y="3661461.1143141757"/>
  </plan>
</person>

---
<?xml version="1.0" ?>
<person id="3">
  <plan selected="yes">
    <activity type="home" x="3337824.56277442" y="3657037.5227478575" end_time="07:47:48"/>
    <leg mode="pt"/>
    <activity type="work" x="3337024.932781996" y="3661551.695988111"/>
  </plan>
</person>

---


In [21]:
# for more info, contact me on mohamedashraf.elgohary@polimi.it