In [1]:
"""
------------------------------------------------------------------------------
About
In this data analysis, the number and proportion of female and male street names
is calculated for each admin area.

The percentage of female street names represents the proportion of female street 
names compared to the total number of both female and male street names.
------------------------------------------------------------------------------
"""

'\n------------------------------------------------------------------------------\nAbout\nIn this data analysis, the number and proportion of female and male street names\nis calculated for each admin area.\n\nThe percentage of female street names represents the proportion of female street \nnames compared to the total number of both female and male street names.\n------------------------------------------------------------------------------\n'

In [2]:
import sys
print(sys.executable)
print(sys.version)

/opt/anaconda3/bin/python
3.12.7 | packaged by Anaconda, Inc. | (main, Oct  4 2024, 08:22:19) [Clang 14.0.6 ]


In [3]:
"""
------------------------------------------------------------------------------
Libraries
------------------------------------------------------------------------------
"""

import pandas as pd
import geopandas as gpd

import matplotlib.pyplot as plt

: 

In [88]:
"""
------------------------------------------------------------------------------
Load GeoJson files
------------------------------------------------------------------------------
"""

admin_areas = gpd.read_file("../data/raw/Prognoseräume.json")

gdf_streetNames = gpd.read_file("../data/raw/ways.geojson")

In [None]:
"""
------------------------------------------------------------------------------
Data exploration
------------------------------------------------------------------------------
"""

gdf_streetNames.shape

In [None]:
gdf_streetNames.isna().sum()

In [None]:
gdf_streetNames.iloc[0]

In [None]:
gdf_streetNames[gdf_streetNames["name"]== "Rose-Scheuer-Karpin-Straße"]

In [None]:
gdf_streetNames["gender"].value_counts()

In [None]:
#subset female/male

fm = ["F", "M"]

gdf_streetNames_fm = gdf_streetNames[gdf_streetNames["gender"].isin(fm)]

gdf_streetNames_fm.head()

In [None]:
gdf_streetNames_fm.shape

In [None]:
len(gdf_streetNames_fm["name"].unique())

In [None]:
gdf_streetNames_fm.dtypes

In [None]:
gdf_streetNames_fm.isna().sum()

In [None]:
admin_areas.head()

In [None]:
#Check for empty or invalid geometries
print(f"Empty geometry in gdf_streetNames_fm: {len(gdf_streetNames_fm[gdf_streetNames_fm.is_empty])}")
print(f"Invalid geometry in gdf_streets_fm: {len(gdf_streetNames_fm[~gdf_streetNames_fm.is_valid])}")

In [None]:
"""
------------------------------------------------------------------------------
CRS
------------------------------------------------------------------------------
"""

gdf_streetNames_fm.crs

In [102]:
#ensure both GeoDataFrames have the same CRS 
if admin_areas.crs != gdf_streetNames_fm.crs:
    gdf_streetNames_fm = gdf_streetNames_fm.to_crs(admin_areas.crs)

In [None]:
#plot female and male street names
fig, ax = plt.subplots(figsize=(10, 10))

gdf_streetNames_fm[gdf_streetNames_fm["gender"] == "F"].plot(ax=ax, color="red", label="Female") 
gdf_streetNames_fm[gdf_streetNames_fm["gender"] == "M"].plot(ax=ax, color="blue", label="Male")

plt.legend()
plt.show()

In [None]:
gdf_streetNames_fm["gender"].value_counts()

In [105]:
"""
------------------------------------------------------------------------------
Functions: 
female street names
male street names
------------------------------------------------------------------------------
"""

def calculate_f_streets(admin_area, gdf_streetNames_fm):
    #find intersections between the street dataframe and admin areas
    intersections = gdf_streetNames_fm[gdf_streetNames_fm.intersects(admin_area.geometry)]
    
    #Count the unique female street names in the intersections
    unique_f_streets = intersections[intersections["gender"] == "F"]["name"].nunique()
    
    print(f"Admin Area: {admin_area['pgr_name']}, Unique_f_streets: {unique_f_streets}")
    
    return unique_f_streets


def calculate_m_streets(admin_area, gdf_streetNames_fm):
    #find intersections between the street dataframe and admin areas
    intersections = gdf_streetNames_fm[gdf_streetNames_fm.intersects(admin_area.geometry)]
    
    #Count the unique female street names in the intersections
    unique_m_streets = intersections[intersections["gender"] == "M"]["name"].nunique()
    
    print(f"Admin Area: {admin_area['pgr_name']}, Unique_m_streets: {unique_m_streets}")
    
    return unique_m_streets


In [None]:
#test one admin area
intersections = gdf_streetNames_fm[gdf_streetNames_fm.intersects(admin_areas.iloc[0].geometry)]
intersections[intersections["gender"] == "F"]["name"].nunique()

In [None]:
intersections[intersections["gender"] == "M"]["name"].nunique()

In [None]:
gdf_streetNames_fm.geometry.head()

In [None]:
#calculation of unique_f_streets and unique_m_streets for each administrative area
admin_areas["unique_f_streets"] = admin_areas.apply(
    lambda row: calculate_f_streets(row, gdf_streetNames_fm), axis=1
)

admin_areas["unique_m_streets"] = admin_areas.apply(
    lambda row: calculate_m_streets(row, gdf_streetNames_fm), axis=1
)

#subset
streetNames = admin_areas[["pgr_id", "pgr_name", "unique_f_streets", "unique_m_streets"]]
streetNames.head()

In [None]:
#add total street count and female streets in percent
streetNames["unique_streets_count"] = streetNames["unique_f_streets"] + streetNames["unique_m_streets"]
streetNames["fStreets_percent"] = (streetNames["unique_f_streets"] / streetNames["unique_streets_count"]).round(3) *100

In [None]:
#sort descending
streetNames.sort_values(by="unique_f_streets", ascending=False, inplace =True)
streetNames

In [None]:
#min max
streetNames["fStreets_percent"].min()

In [None]:
streetNames["fStreets_percent"].max()

In [114]:
#csv
streetNames.to_csv("../data/csv/femaleStreetNames.csv", index = False)