In [1]:
import pandas as pd
initial_data = pd.read_csv ('combined_data_21_01_2025.csv')

In [2]:
# Group by 'device_id' and count the number of rows for each device
device_counts = initial_data.groupby('device_id').size()

# Filter out device_ids with fewer than 10 data entries
valid_device_ids = device_counts[device_counts >= 10].index

# Filter the original DataFrame to keep only rows with valid device_ids
initial_data = initial_data[initial_data['device_id'].isin(valid_device_ids)]

# Display the filtered DataFrame
#print(initial_data)
#print (device_counts)

In [3]:
from geopy.geocoders import Nominatim
from shapely.geometry import Point
from shapely.ops import transform
import pyproj
from shapely import wkt

# Function to get coordinates of the building address
def get_coordinates(address):
    geolocator = Nominatim(user_agent="Buffer_Creation")
    location = geolocator.geocode(address)
    if location:
        return location.latitude, location.longitude
    else:
        raise ValueError(f"Address '{address}' not found.")

# Function to filter out rows within the buffer
def filter_within_buffer(initial_data_w, address, radius_m):
    # Get the building's coordinates (latitude, longitude)
    building_lat, building_lon = get_coordinates(address)
    
    # Create a Point (building location)
    building_point = Point(building_lon, building_lat)
    
    # Set up the projection for UTM (meter-based projection)
    proj_wgs84 = pyproj.CRS('EPSG:4326')  # WGS84 (lat/lon)
    proj_utm = pyproj.CRS('EPSG:32632')  # UTM zone 32N (adjust for your location)

    # Transform the building point to UTM (to get meter-based coordinates)
    transformer = pyproj.Transformer.from_crs(proj_wgs84, proj_utm, always_xy=True)
    building_point_utm = transform(transformer.transform, building_point)

    # Create a buffer in meters (UTM system uses meters)
    building_buffer = building_point_utm.buffer(radius_m)

    # Ensure the geometry column is in the correct format (Shapely geometries)
    def safe_wkt_load(x):
        if isinstance(x, str):  # Only try to load WKT strings
            try:
                return wkt.loads(x)
            except:
                return None  # Return None if the WKT is invalid
        return None  # Return None for non-string entries

    # Apply the safe WKT loading function
    initial_data_w['geometry'] = initial_data_w['geometry'].apply(safe_wkt_load)

    # Reproject the geometry column to UTM
    initial_data_w['geometry_utm'] = initial_data_w['geometry'].apply(lambda point: transform(transformer.transform, point) if point is not None else None)
    
    # Filter rows where the UTM geometry is outside the buffer
    filtered_data = initial_data_w[~initial_data_w['geometry_utm'].apply(lambda point: building_buffer.contains(point) if point is not None else False)]

    # Drop the 'geometry_utm' column as it's no longer needed
    filtered_data = filtered_data.drop(columns=['geometry_utm'])
    
    return filtered_data

# Example usage
address = "Von-Steuben-Straße 21, 48143 Münster"
initial_data_w = initial_data.copy(deep=True)  # Explicitly create a deep copy of the DataFrame

# Apply the filter to get bike data within the buffer
atrai_bike_data = filter_within_buffer(initial_data_w, address, radius_m=15)

#atrai_bike_data.to_csv('atrai_bike_data_ohne_Büro_21_01_25.csv', index=False)

In [4]:
import folium
from folium.plugins import HeatMap

# Step 1: Convert 'createdAt' to datetime
atrai_bike_data['createdAt'] = pd.to_datetime(atrai_bike_data['createdAt'])

# Step 2: Add a column for meteorological seasons based on the month
def get_season(month):
    if month in [3, 4, 5]:  # Spring
        return 'Spring'
    elif month in [6, 7, 8]:  # Summer
        return 'Summer'
    elif month in [9, 10, 11]:  # Autumn
        return 'Autumn'
    else:  # Winter
        return 'Winter'

atrai_bike_data['Season'] = atrai_bike_data['createdAt'].dt.month.apply(get_season)

# Step 3: Sort data by 'Season', 'device_id', and 'createdAt' for better performance
atrai_bike_data = atrai_bike_data.sort_values(by=['Season', 'device_id', 'createdAt'])

# Step 4: Calculate the time difference between consecutive 'createdAt' values for each 'device_id'
atrai_bike_data['time_diff'] = atrai_bike_data.groupby('device_id')['createdAt'].diff().dt.total_seconds()

# Step 5: Calculate the total ride duration for each device_id
total_ride_duration = atrai_bike_data.groupby('device_id')['time_diff'].cumsum().max()

# Step 6: Filter out the first and last 60 seconds of each ride
filtered_data = atrai_bike_data[
    atrai_bike_data.groupby('device_id')['time_diff'].cumsum() > 60  # Remove first 60 seconds
]
filtered_data = filtered_data[
    filtered_data.groupby('device_id')['time_diff'].cumsum() < (total_ride_duration - 60)  # Remove last 60 seconds
]

# Step 7: Add regional filtering option
# Define latitude and longitude bounds for the region
lat_min, lat_max = 51.8, 52.1  # Adjust these values for your region
lng_min, lng_max = 7.5, 7.7    # Adjust these values for your region

# Filter data to include only points within the region
regional_filtered_data = filtered_data[
    (filtered_data['lat'] >= lat_min) & (filtered_data['lat'] <= lat_max) &
    (filtered_data['lng'] >= lng_min) & (filtered_data['lng'] <= lng_max)
]

# Step 8: Function to generate heatmap
def create_heatmap(data, title="Heatmap", file_name="Heatmap.html"):
    # Remove rows with missing temperature, latitude, or longitude data
    heatmap_data_temp = data[['lat', 'lng', 'Temperature']].dropna(subset=['Temperature', 'lat', 'lng'])

    # Prepare the heatmap data: [latitude, longitude, Temperature value]
    heat_data_temp = heatmap_data_temp[['lat', 'lng', 'Temperature']].values

    # Create the folium map centered around the region's average latitude and longitude
    m_temp = folium.Map(location=[(lat_min + lat_max) / 2, (lng_min + lng_max) / 2], zoom_start=13)

    # Create and add the heatmap to the map
    HeatMap(heat_data_temp, radius=15, blur=15).add_to(m_temp)

    # Save the map as an HTML file
    m_temp.save(file_name)
    print(f"{title} saved as {file_name}")

    return m_temp

# Step 9: Generate heatmap for the regional data
regional_heatmap = create_heatmap(
    regional_filtered_data, 
    title="Münster Temperature Heatmap", 
    file_name="Muenster_Temperature_Heatmap.html"
)

# Step 10: Generate seasonal heatmaps (optional)
for season in ['Spring', 'Summer', 'Autumn', 'Winter']:
    seasonal_data = regional_filtered_data[regional_filtered_data['Season'] == season]
    create_heatmap(
        seasonal_data,
        title=f"{season} Temperature Heatmap",
        file_name=f"{season}_Temperature_Heatmap_MS.html"
    )

Münster Temperature Heatmap saved as Muenster_Temperature_Heatmap.html
Spring Temperature Heatmap saved as Spring_Temperature_Heatmap_MS.html
Summer Temperature Heatmap saved as Summer_Temperature_Heatmap_MS.html
Autumn Temperature Heatmap saved as Autumn_Temperature_Heatmap_MS.html
Winter Temperature Heatmap saved as Winter_Temperature_Heatmap_MS.html
