In [30]:
%useLatestDescriptors
%use dataframe, kandy


In [31]:
import java.util.Locale

val df = DataFrame.readCsv(
    fileOrUrl = "data/data_2023.csv",
    delimiter = ';',
    parserOptions = ParserOptions(locale = java.util.Locale.GERMAN)
)

In [32]:
import org.jetbrains.kotlinx.dataframe.api.*
import java.time.LocalDateTime
import java.time.format.DateTimeFormatter
import java.time.Duration

val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm")


val cleaned = df
    .convert("STARTTIME").with { LocalDateTime.parse(it.toString(), formatter) }
    .convert("ENDTIME").with { LocalDateTime.parse(it.toString(), formatter) }
    .convert("STARTLAT", "STARTLON", "ENDLAT", "ENDLON")
    .with { it.toString().replace(',', '.').toDouble() }

val withDuration = cleaned.add("DURATION_MIN") {
    val start = it["STARTTIME"] as LocalDateTime
    val end = it["ENDTIME"] as LocalDateTime
    Duration.between(start, end).toMinutes().toDouble()
}

val rides = withDuration.add("ROUTE") { row ->
    val start = row["RENTAL_STATION_NAME"]?.toString()?.ifBlank { "Non-station" } ?: "Non-station"
    val end = row["RETURN_STATION_NAME"]?.toString()?.ifBlank { "Non-station" } ?: "Non-station"
    "$start → $end"
}

In [33]:
import org.jetbrains.letsPlot.Stat

val capped = rides.filter { (it["DURATION_MIN"] as Number).toDouble() < 60 }


capped.plot {
    histogram("DURATION_MIN", binsOption = BinsOption.byNumber(30)) {
        fillColor(Stat.count) {
            scale = continuous(Color.GREEN..Color.RED)
        }
        borderLine.color = Color.BLACK
    }
    layout {
        title = "Histogram of DURATION_MIN (Under 60 mins)"

    }
}

In [34]:
val stationUsage = rides
    .filter { it["RENTAL_STATION_NAME"] != null }
    .groupBy("RENTAL_STATION_NAME")
    .count()
    .sortByDesc("count")
    .take(10)

stationUsage.plot {
    layout.title = "Most Frequent Rental Stations"
    bars {
        x("RENTAL_STATION_NAME") { axis.name = "Station" }
        y("count") { axis.name = "Number of Rentals" }
        fillColor = Color.BLUE
    }
}


In [35]:
val routes = rides.groupBy("ROUTE")
    .count()
    .filter { it["ROUTE"] != "Non-station → Non-station" }
    .sortByDesc("count")
    .take(10)

routes.plot {
    bars {
        x("ROUTE") { axis.name = "Route" }
        y("count") { axis.name = "Count" }
        fillColor = Color.ORANGE
    }
    layout.title = "Top 10 Most Common Routes"
}

In [36]:
val routes = rides.groupBy("ROUTE")
    .count()
    .filter { it["ROUTE"]?.toString()?.let { r ->
        !r.startsWith("Non-station") && !r.endsWith("Non-station")
    } == true }
    .sortByDesc("count")
    .take(10)

routes.plot {
    bars {
        x("ROUTE") { axis.name = "Route" }
        y("count") { axis.name = "Count" }
        fillColor = Color.ORANGE
    }
    layout.title = "Top 10 Most Common Routes"
}

In [37]:
import java.time.LocalDateTime

val demandPerHour = rides
    .add("HOUR") { (it["STARTTIME"] as LocalDateTime).hour }
    .groupBy("HOUR")
    .count()
    .sortBy("HOUR")

demandPerHour.plot {
    line {
        x("HOUR") { axis.name = "Hour of Day" }
        y("count") { axis.name = "Number of Rides" }
        color = Color.LIGHT_GREEN
    }
    points {
        x("HOUR")
        y("count")
    }
    layout.title = "Hourly Bike Usage"
}

In [38]:
val sampleSize = 1000
val filteredRides = rides.head(1000).filter { row ->
    val startLon = row["STARTLON"] as Double
    val startLat = row["STARTLAT"] as Double
    val endLon = row["ENDLON"] as Double
    val endLat = row["ENDLAT"] as Double

    // Define Munich bounding box (approximate)
    val lonMin = 11.3
    val lonMax = 11.8
    val latMin = 48.0
    val latMax = 48.3

    // Filter out zeroes and keep only points inside Munich bounding box
    val startValid = startLon != 0.0 && startLat != 0.0 &&
            startLon in lonMin..lonMax && startLat in latMin..latMax
    val endValid = endLon != 0.0 && endLat != 0.0 &&
            endLon in lonMin..lonMax && endLat in latMin..latMax

    startValid && endValid
}

filteredRides.plot {
    points {
        x("STARTLON") { axis.name = "Longitude" }
        y("STARTLAT") { axis.name = "Latitude" }
        color = Color.RED
        alpha = 0.5
    }
    points {
        x("ENDLON")
        y("ENDLAT")
        color = Color.BLUE
        alpha = 0.5
    }
    layout.title = "Geolocation of Start (Red) and End (Blue) Points"
}


In [41]:


// Load GeoJSON from your local file path
val munichArea = GeoDataFrame.readGeoJson("data/munich_geojson.json")

// Plot the polygon(s) of Munich city area
munichArea.plot {
    geoMap() {
        fillColor = Color.LIGHT_BLUE
        borderLine {
            color = Color.PURPLE
            width = 1.5
        }
    }
    layout {
        title = "Munich Metropolitan Area"

    }
}

In [40]:
import java.time.DayOfWeek

val ridesWithDayType = rides.add("IS_WEEKEND") { row ->
    val day = (row["STARTTIME"] as LocalDateTime).dayOfWeek
    day == DayOfWeek.SATURDAY || day == DayOfWeek.SUNDAY
}

val usageByDayType = ridesWithDayType
    .groupBy("IS_WEEKEND")
    .count()

usageByDayType.plot {
    bars {
        x("IS_WEEKEND") { axis.name = "Is Weekend" }
        y("count") { axis.name = "Number of Rides" }
        fillColor = Color.LIGHT_BLUE
    }
    layout.title = "Weekend vs Weekday Usage"
}
