In [12]:
%useLatestDescriptors
%use dataframe, kandy


In [13]:
import java.util.Locale

val df = DataFrame.readCsv(
    fileOrUrl = "data/mvg-rad-data.csv",
    delimiter = ';',
    parserOptions = ParserOptions(locale = java.util.Locale.GERMAN)
)

In [14]:
import org.jetbrains.kotlinx.dataframe.api.*
import java.time.LocalDateTime
import java.time.format.DateTimeFormatter
import java.time.Duration

val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm")


val cleaned = df
    .convert("STARTTIME").with { LocalDateTime.parse(it.toString(), formatter) }
    .convert("ENDTIME").with { LocalDateTime.parse(it.toString(), formatter) }
    .convert("STARTLAT", "STARTLON", "ENDLAT", "ENDLON")
    .with { it.toString().replace(',', '.').toDouble() }

val withDuration = cleaned.add("DURATION_MIN") {
    val start = it["STARTTIME"] as LocalDateTime
    val end = it["ENDTIME"] as LocalDateTime
    Duration.between(start, end).toMinutes().toDouble()
}

val rides = withDuration.add("ROUTE") { row ->
    val start = row["RENTAL_STATION_NAME"]?.toString()?.ifBlank { "Non-station" } ?: "Non-station"
    val end = row["RETURN_STATION_NAME"]?.toString()?.ifBlank { "Non-station" } ?: "Non-station"
    "$start → $end"
}

In [15]:
import org.jetbrains.letsPlot.Stat

val capped = rides.filter { (it["DURATION_MIN"] as Number).toDouble() < 60 }


capped.plot {
    histogram("DURATION_MIN", binsOption = BinsOption.byNumber(30)) {
        fillColor(Stat.count) {
            scale = continuous(Color.GREEN..Color.RED)
        }
        borderLine.color = Color.BLACK
    }
    layout {
        title = "Histogram of DURATION_MIN (Under 60 mins)"

    }
}

In [16]:
val stationUsage = rides
    .filter { it["RENTAL_STATION_NAME"] != null }
    .groupBy("RENTAL_STATION_NAME")
    .count()
    .sortByDesc("count")
    .take(10)

stationUsage.plot {
    layout.title = "Most Frequent Rental Stations"
    bars {
        x("RENTAL_STATION_NAME") { axis.name = "Station" }
        y("count") { axis.name = "Number of Rentals" }
        fillColor = Color.BLUE
    }
}


In [17]:
val routes = rides.groupBy("ROUTE")
    .count()
    .filter { it["ROUTE"] != "Non-station → Non-station" }
    .sortByDesc("count")
    .take(10)

routes.plot {
    bars {
        x("ROUTE") { axis.name = "Route" }
        y("count") { axis.name = "Count" }
        fillColor = Color.ORANGE
    }
    layout.title = "Top 10 Most Common Routes"
}

In [18]:
val routes = rides.groupBy("ROUTE")
    .count()
    .filter { it["ROUTE"]?.toString()?.let { r ->
        !r.startsWith("Non-station") && !r.endsWith("Non-station")
    } == true }
    .sortByDesc("count")
    .take(10)

routes.plot {
    bars {
        x("ROUTE") { axis.name = "Route" }
        y("count") { axis.name = "Count" }
        fillColor = Color.ORANGE
    }
    layout.title = "Top 10 Most Common Routes"
}

In [19]:
import java.time.LocalDateTime

val demandPerHour = rides
    .add("HOUR") { (it["STARTTIME"] as LocalDateTime).hour }
    .groupBy("HOUR")
    .count()
    .sortBy("HOUR")

demandPerHour.plot {
    line {
        x("HOUR") { axis.name = "Hour of Day" }
        y("count") { axis.name = "Number of Rides" }
        color = Color.LIGHT_GREEN
    }
    points {
        x("HOUR")
        y("count")
    }
    layout.title = "Hourly Bike Usage"
}

In [20]:


val sampleSize = 1000
val filteredRides = rides.head(1000).filter { row ->
    val startLon = row["STARTLON"] as Double
    val startLat = row["STARTLAT"] as Double
    val endLon = row["ENDLON"] as Double
    val endLat = row["ENDLAT"] as Double

    val lonMin = 11.3
    val lonMax = 11.8
    val latMin = 48.0
    val latMax = 48.3

    val startValid = startLon != 0.0 && startLat != 0.0 &&
            startLon in lonMin..lonMax && startLat in latMin..latMax
    val endValid = endLon != 0.0 && endLat != 0.0 &&
            endLon in lonMin..lonMax && endLat in latMin..latMax

    startValid && endValid
}



filteredRides.plot {
    points {
        x("STARTLON") { axis.name = "Longitude" }
        y("STARTLAT") { axis.name = "Latitude" }
        color = Color.RED
        alpha = 0.5
    }
    points {
        x("ENDLON")
        y("ENDLAT")
        color = Color.BLUE
        alpha = 0.5
    }
    layout.title = "Geolocation of Start (Red) and End (Blue) Points"
}



In [21]:
import kotlinx.serialization.Serializable
import kotlinx.serialization.SerialName
import kotlinx.serialization.encodeToString
import kotlinx.serialization.json.Json
import java.io.File
import org.jetbrains.kotlinx.dataframe.DataFrame

@Serializable
@SerialName("FeatureCollection")
data class GeoJsonFeatureCollection(
    val type: String = "FeatureCollection",
    val features: List<GeoJsonFeature>
)

@Serializable
@SerialName("Feature")
data class GeoJsonFeature(
    val type: String = "Feature",
    val geometry: GeoJsonGeometry,
    val properties: Map<String, String>? = null
)

@Serializable
@SerialName("Point")
data class GeoJsonGeometry(
    val type: String = "Point",
    val coordinates: List<Double>
)

fun saveFilteredRidesAsGeoJson(filteredRides: DataFrame<*>, relativeDir: String, filename: String) {
    val features = mutableListOf<GeoJsonFeature>()

    for (row in filteredRides) {
        val startLon = row["STARTLON"] as Double
        val startLat = row["STARTLAT"] as Double
        val endLon = row["ENDLON"] as Double
        val endLat = row["ENDLAT"] as Double

        features.add(
            GeoJsonFeature(
                geometry = GeoJsonGeometry(coordinates = listOf(startLon, startLat)),
                properties = mapOf("point" to "start")
            )
        )
        features.add(
            GeoJsonFeature(
                geometry = GeoJsonGeometry(coordinates = listOf(endLon, endLat)),
                properties = mapOf("point" to "end")
            )
        )
    }

    val featureCollection = GeoJsonFeatureCollection(features = features)
    val jsonString = Json { prettyPrint = true; encodeDefaults = true }.encodeToString(featureCollection)

    val dir = File(relativeDir)
    if (!dir.exists()) dir.mkdirs()  // create folder if it doesn’t exist

    val file = File(dir, filename)
    file.writeText(jsonString)
    println("Saved GeoJSON with ${features.size} points to ${file.absolutePath}")
}

// Save to a relative folder "output" (folder created if missing)
saveFilteredRidesAsGeoJson(filteredRides, "data", "rides.geojson")

// Show preview of saved file
//println(File("output", "rides.geojson").readText().take(500))


Saved GeoJSON with 1926 points to /Users/enriquelopezmanas/Documents/Machine-Learning/mvg-bike-analysis//data/rides.geojson


In [22]:
%use kandy-geo
val bikeRides =
    GeoDataFrame.readGeoJson("data/rides.geojson")

2025-06-18T10:40:21.997245Z Execution of code '%use kandy-geo...' ERROR Log4j2 could not find a logging implementation. Please add log4j-core to the classpath. Using SimpleLogger to log to the console...


In [None]:

%use kandy-geo
// Load GeoJSON from your local file path
val munichArea = GeoDataFrame.readGeoJson("data/munich_geojson.json")

munichArea.df.geometry.type().toString()// Plot the polygon(s) of Munich city area
munichArea.plot {
    geoMap() {
        fillColor = Color.LIGHT_BLUE
        borderLine {
            color = Color.RED
            width = 1.5
        }
    }

    layout {
        title = "Munich Metropolitan Area"

    }
}

In [26]:

%use kandy-geo
// Load GeoJSON from your local file path
val munichArea = GeoDataFrame.readGeoJson("data/munich_geojson.json")

munichArea.df.geometry.type().toString()// Plot the polygon(s) of Munich city area
munichArea.plot {
    geoMap() {
        fillColor = Color.LIGHT_BLUE
        borderLine {
            color = Color.RED
            width = 1.5
        }
    }
    withData(bikeRides) {
        geoPoints() {
            size = 1.0
            color = Color.YELLOW
        }
    }
    layout {
        title = "Munich Metropolitan Area"
        size = 700 to 500
    }
}

In [40]:
import java.time.DayOfWeek

val ridesWithDayType = rides.add("IS_WEEKEND") { row ->
    val day = (row["STARTTIME"] as LocalDateTime).dayOfWeek
    day == DayOfWeek.SATURDAY || day == DayOfWeek.SUNDAY
}

val usageByDayType = ridesWithDayType
    .groupBy("IS_WEEKEND")
    .count()

usageByDayType.plot {
    bars {
        x("IS_WEEKEND") { axis.name = "Is Weekend" }
        y("count") { axis.name = "Number of Rides" }
        fillColor = Color.LIGHT_BLUE
    }
    layout.title = "Weekend vs Weekday Usage"
}
