In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import matplotlib.cm as cm
import matplotlib.dates as mdates
from scipy.signal import find_peaks
import openmeteo_requests
import requests_cache
from datetime import date, timedelta
import seaborn as sns
import numpy as np

#### Read in Data

Nur wenn full_data.csv im Ordner data noch nicht existiert, sonst überspringen

In [2]:
url_list = []

# URLs zu Daten generieren nach folgendem Schema:
# https://mobidata-bw.de/daten/eco-counter/v2/fahrradzaehler_stundenwerten_{yyyymm}.csv.gz 

for year in range(2013, 2026):
    for month in range(1, 13):
        yyyymm = f"{year}{month:02d}"
        url = f"https://mobidata-bw.de/daten/eco-counter/v2/fahrradzaehler_stundenwerten_{yyyymm}.csv.gz"
        
        # Überprüfen, ob die URL existiert
        response = requests_cache.CachedSession().head(url)
        if response.status_code == 200:    
            # in url_list hinzufügen
            url_list.append(url)

general_columns = pd.read_csv(url_list[1]).columns.tolist()

# Erstelle CSV-Datei, in der Daten aller URLs gespeichert werden
full_data = pd.DataFrame()
for url in url_list:
    csv_data = pd.read_csv(url, low_memory=False)
    assert list(csv_data.columns) == general_columns, f"Spalten stimmen nicht überein in {url}"
    full_data = pd.concat([full_data, csv_data], ignore_index=True)

# Speichere full_data lokal als CSV-Datei
full_data.to_csv("../data/full_data.csv", index=False)   
full_data.head() 

Unnamed: 0,operator_name,domain_name,domain_id,counter_site,counter_site_id,counter_serial,longitude,latitude,timezone,iso_timestamp,channels_in,channels_out,channels_unknown,channels_all,site_temperature,site_rain_accumulation,site_snow_accumulation
0,Eco Counter GmbH,Stadt Karlsruhe,752,Erbprinzenstraße,100004165,Y2H16070301,8.402715,49.007286,(UTC+01:00) Europe/Paris DST,2013-01-01T00:00:00+01:00,9,10,na,19,5.0,0.0,na
1,Eco Counter GmbH,Stadt Karlsruhe,752,Erbprinzenstraße,100004165,Y2H16070301,8.402715,49.007286,(UTC+01:00) Europe/Paris DST,2013-01-01T01:00:00+01:00,15,18,na,33,5.0,0.0,na
2,Eco Counter GmbH,Stadt Karlsruhe,752,Erbprinzenstraße,100004165,Y2H16070301,8.402715,49.007286,(UTC+01:00) Europe/Paris DST,2013-01-01T02:00:00+01:00,17,14,na,31,5.0,0.0,na
3,Eco Counter GmbH,Stadt Karlsruhe,752,Erbprinzenstraße,100004165,Y2H16070301,8.402715,49.007286,(UTC+01:00) Europe/Paris DST,2013-01-01T03:00:00+01:00,14,26,na,40,5.0,0.0,na
4,Eco Counter GmbH,Stadt Karlsruhe,752,Erbprinzenstraße,100004165,Y2H16070301,8.402715,49.007286,(UTC+01:00) Europe/Paris DST,2013-01-01T04:00:00+01:00,13,17,na,30,5.0,0.0,na


In [3]:
full_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6080705 entries, 0 to 6080704
Data columns (total 17 columns):
 #   Column                  Dtype  
---  ------                  -----  
 0   operator_name           object 
 1   domain_name             object 
 2   domain_id               int64  
 3   counter_site            object 
 4   counter_site_id         int64  
 5   counter_serial          object 
 6   longitude               float64
 7   latitude                float64
 8   timezone                object 
 9   iso_timestamp           object 
 10  channels_in             object 
 11  channels_out            object 
 12  channels_unknown        object 
 13  channels_all            int64  
 14  site_temperature        object 
 15  site_rain_accumulation  object 
 16  site_snow_accumulation  object 
dtypes: float64(2), int64(3), object(12)
memory usage: 788.7+ MB


In [5]:
data = full_data.copy()

In [29]:
# Einlesen von alle_fahrradzaehler_daten.csv
data = pd.read_csv("../../data/full_data.csv", low_memory=False)
data['iso_timestamp'] = pd.to_datetime(data['iso_timestamp'], utc = True, errors='coerce') # Isotimestamp ist lokale Zeit und berücksichtigt Sommerzeit
data.head()

Unnamed: 0,operator_name,domain_name,domain_id,counter_site,counter_site_id,counter_serial,longitude,latitude,timezone,iso_timestamp,channels_in,channels_out,channels_unknown,channels_all,site_temperature,site_rain_accumulation,site_snow_accumulation
0,Eco Counter GmbH,Stadt Karlsruhe,752,Erbprinzenstraße,100004165,Y2H16070301,8.402715,49.007286,(UTC+01:00) Europe/Paris DST,2012-12-31 23:00:00+00:00,9,10,na,19,5.0,0.0,na
1,Eco Counter GmbH,Stadt Karlsruhe,752,Erbprinzenstraße,100004165,Y2H16070301,8.402715,49.007286,(UTC+01:00) Europe/Paris DST,2013-01-01 00:00:00+00:00,15,18,na,33,5.0,0.0,na
2,Eco Counter GmbH,Stadt Karlsruhe,752,Erbprinzenstraße,100004165,Y2H16070301,8.402715,49.007286,(UTC+01:00) Europe/Paris DST,2013-01-01 01:00:00+00:00,17,14,na,31,5.0,0.0,na
3,Eco Counter GmbH,Stadt Karlsruhe,752,Erbprinzenstraße,100004165,Y2H16070301,8.402715,49.007286,(UTC+01:00) Europe/Paris DST,2013-01-01 02:00:00+00:00,14,26,na,40,5.0,0.0,na
4,Eco Counter GmbH,Stadt Karlsruhe,752,Erbprinzenstraße,100004165,Y2H16070301,8.402715,49.007286,(UTC+01:00) Europe/Paris DST,2013-01-01 03:00:00+00:00,13,17,na,30,5.0,0.0,na


#### Check Data Set

TODO: Keep relevant checks for submission and clean up

In [102]:
# Check data 
print(data.info())
print(data.isnull().sum())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6122719 entries, 0 to 6122718
Data columns (total 17 columns):
 #   Column                  Dtype              
---  ------                  -----              
 0   operator_name           object             
 1   domain_name             object             
 2   domain_id               int64              
 3   counter_site            object             
 4   counter_site_id         int64              
 5   counter_serial          object             
 6   longitude               float64            
 7   latitude                float64            
 8   timezone                object             
 9   iso_timestamp           datetime64[ns, UTC]
 10  channels_in             object             
 11  channels_out            object             
 12  channels_unknown        object             
 13  channels_all            int64              
 14  site_temperature        object             
 15  site_rain_accumulation  object             
 16  

In [103]:
# Check for duplicates
duplicates = data.duplicated()
print(f"Anzahl der Duplikate: {duplicates.sum()}")

Anzahl der Duplikate: 0


In [104]:
# Number of years in the dataset
data['year'] = data['iso_timestamp'].dt.year
print(f"Jahre im Datensatz: {data['year'].nunique()} - {data['year'].unique()}")

Jahre im Datensatz: 14 - [2012. 2013. 2014. 2015. 2016. 2017. 2018. 2019. 2020.   nan 2021. 2022.
 2023. 2024. 2025.]


In [105]:
# Locations in data set
locations = data['domain_name'].unique()
print(f"Anzahl der Standorte: {len(locations)}")
print(f"Standorte:")
for loc in locations:
    print(loc)

Anzahl der Standorte: 22
Standorte:
Stadt Karlsruhe
Stadt Freiburg
Landeshauptstadt Stuttgart
Stadt Tübingen
Stadt Lörrach
Stadt Heilbronn
Stadt Mannheim
Stadt Kirchheim Unter Teck
Stadt Heidelberg
Stadt Offenburg
Stadt Ludwigsburg
Stadt Konstanz
Landkreis Böblingen
Ravensburg Tws Gmbh & Co. Kg
Stadt Ulm
Stadtverwaltung Aalen
Stadt Reutlingen
Landratsamt Rems-Murr-Kreis
Stadt Singen
Stadt Bad Säckingen
Landratsamt Ostalbkreis
Regierungspräsidium Stuttgart Aussenstelle Heilbronn


Note: counter = Gerät, channel = Richtung/Messspur

In [106]:
counters_per_location = data.groupby('domain_name')['counter_site_id'].nunique()
counters_per_location

domain_name
Landeshauptstadt Stuttgart                              15
Landkreis Böblingen                                      2
Landratsamt Ostalbkreis                                 11
Landratsamt Rems-Murr-Kreis                             13
Ravensburg Tws Gmbh & Co. Kg                             7
Regierungspräsidium Stuttgart Aussenstelle Heilbronn     1
Stadt Bad Säckingen                                      2
Stadt Freiburg                                          10
Stadt Heidelberg                                        15
Stadt Heilbronn                                          4
Stadt Karlsruhe                                          1
Stadt Kirchheim Unter Teck                               1
Stadt Konstanz                                           6
Stadt Ludwigsburg                                       15
Stadt Lörrach                                            2
Stadt Mannheim                                          14
Stadt Offenburg                             

In [107]:
# Check, at which location (domain_name) counter_serial is null
null_serial_locations = data[data['counter_serial'].isnull()]['domain_name'].unique()
print(f"Standorte mit null counter_serial: {null_serial_locations}")

Standorte mit null counter_serial: ['Stadt Heidelberg']


In [108]:
# Übersicht über die Counters in einer Stadt
city = "Stadt Tübingen"
data_city = data[data['domain_name'] == city]
print(f"Counter in {city}: {data_city['counter_site'].unique()}")

Counter in Stadt Tübingen: ['Unterführung Steinlach/Karlstraße Südseite - Steinlachallee'
 'Fuß- & Radtunnel Südportal - Derendinger Allee'
 'Neckartalradweg Hirschau - parallel L371'
 'Radbrücke Mitte - Wöhrdstraße' 'Radbrücke Ost']


##### Check Counter

In [109]:
# Check counter per city
city = "Stadt Tübingen"
counter_site = 'Radbrücke Ost'

data_city = data[data['domain_name'] == city]

# Check for counter site in chosen city
print(data_city[data_city['counter_site'] == counter_site].head())

            operator_name     domain_name  domain_id   counter_site  \
4354601  Eco Counter GmbH  Stadt Tübingen        667  Radbrücke Ost   
4354602  Eco Counter GmbH  Stadt Tübingen        667  Radbrücke Ost   
4354603  Eco Counter GmbH  Stadt Tübingen        667  Radbrücke Ost   
4354604  Eco Counter GmbH  Stadt Tübingen        667  Radbrücke Ost   
4354605  Eco Counter GmbH  Stadt Tübingen        667  Radbrücke Ost   

         counter_site_id counter_serial  longitude  latitude  \
4354601        300041398    Y2H23046192    9.06905  48.51824   
4354602        300041398    Y2H23046192    9.06905  48.51824   
4354603        300041398    Y2H23046192    9.06905  48.51824   
4354604        300041398    Y2H23046192    9.06905  48.51824   
4354605        300041398    Y2H23046192    9.06905  48.51824   

                             timezone             iso_timestamp channels_in  \
4354601  (UTC+01:00) Europe/Paris DST 2024-02-21 12:00:00+00:00           0   
4354602  (UTC+01:00) Europe/Pa

In [110]:
counters = data_city[['counter_site', 'counter_site_id', 'counter_serial']].drop_duplicates().reset_index(drop=True)

tracking = data_city.groupby(['counter_site', 'counter_site_id', 'counter_serial'], dropna = False)['iso_timestamp'] \
    .agg(first_timestamp='min', last_timestamp='max') \
    .reset_index()
counters_with_tracking = counters.merge(tracking, on=['counter_site', 'counter_site_id', 'counter_serial'])

counters_with_tracking

Unnamed: 0,counter_site,counter_site_id,counter_serial,first_timestamp,last_timestamp
0,Unterführung Steinlach/Karlstraße Südseite - S...,100003359,Y2H17123962,2013-05-14 22:00:00+00:00,2025-10-31 02:00:00+00:00
1,Fuß- & Radtunnel Südportal - Derendinger Allee,100003358,YAH24052174,2013-06-20 22:00:00+00:00,2025-10-31 01:00:00+00:00
2,Neckartalradweg Hirschau - parallel L371,100026408,Y2H21035424,2016-02-18 23:00:00+00:00,2025-10-31 21:00:00+00:00
3,Radbrücke Mitte - Wöhrdstraße,300030648,Y2H25043135,2023-02-23 07:00:00+00:00,2025-10-31 02:00:00+00:00
4,Radbrücke Ost,300041398,Y2H23046192,2024-02-21 12:00:00+00:00,2025-10-31 01:00:00+00:00


In [111]:
print(data.shape)
print(data.info())

(6122719, 18)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6122719 entries, 0 to 6122718
Data columns (total 18 columns):
 #   Column                  Dtype              
---  ------                  -----              
 0   operator_name           object             
 1   domain_name             object             
 2   domain_id               int64              
 3   counter_site            object             
 4   counter_site_id         int64              
 5   counter_serial          object             
 6   longitude               float64            
 7   latitude                float64            
 8   timezone                object             
 9   iso_timestamp           datetime64[ns, UTC]
 10  channels_in             object             
 11  channels_out            object             
 12  channels_unknown        object             
 13  channels_all            int64              
 14  site_temperature        object             
 15  site_rain_accumulation  object     

#### Preprocess Data

##### Clean Data

In [30]:
## Clean data
data_cleaned = data.copy()
data_cleaned = data_cleaned[['domain_name', 'counter_site', 'counter_site_id', 'longitude', 'latitude',
       'iso_timestamp', 'channels_in', 'channels_out', 'channels_unknown', 'channels_all', 'site_temperature',
       'site_rain_accumulation', 'site_snow_accumulation']]

# 1. Time
# 'Isotimestamp' is local time and considers 'Sommerzeit'. Therefore, we use this for better accuracy in time representation.
# Exchange 'timestamp' with 'iso_timestamp' and convert to datetime with UTC timezone.
# Drop 'timezone' as this is identical for all entries.
data_cleaned['timestamp'] = pd.to_datetime(data_cleaned['iso_timestamp'], utc = True, errors='coerce') 
data_cleaned = data_cleaned.drop(columns=['iso_timestamp'])

# 2. City
# Drop 'operator_name' as this is not of interest.
# Drop 'domain_id' as this is not informative.
# Rename 'domain_name' to 'city' for clarity.
data_cleaned = data_cleaned.rename(columns={'domain_name': 'city'})

# 3. Counter
# Drop 'counter_serial' as this is not informative and has many missing values.
# Rename 'counter_site' to 'counter_site_name' for clarity.
# Note: For further analysis, use 'counter_site_id' to uniquely identify counter sites.
data_cleaned = data_cleaned.rename(columns={'counter_site': 'counter_site_name'})


# 5. Count
# Rename 'channels_all' to 'count' for clarity.
data_cleaned = data_cleaned.rename(columns={'channels_all': 'count'})

# Save cleaned data
data_cleaned.to_csv("../data/cleaned_full_data.csv", index=False)

data_cleaned.head()

Unnamed: 0,city,counter_site_name,counter_site_id,longitude,latitude,channels_in,channels_out,channels_unknown,count,site_temperature,site_rain_accumulation,site_snow_accumulation,timestamp
0,Stadt Karlsruhe,Erbprinzenstraße,100004165,8.402715,49.007286,9,10,na,19,5.0,0.0,na,2012-12-31 23:00:00+00:00
1,Stadt Karlsruhe,Erbprinzenstraße,100004165,8.402715,49.007286,15,18,na,33,5.0,0.0,na,2013-01-01 00:00:00+00:00
2,Stadt Karlsruhe,Erbprinzenstraße,100004165,8.402715,49.007286,17,14,na,31,5.0,0.0,na,2013-01-01 01:00:00+00:00
3,Stadt Karlsruhe,Erbprinzenstraße,100004165,8.402715,49.007286,14,26,na,40,5.0,0.0,na,2013-01-01 02:00:00+00:00
4,Stadt Karlsruhe,Erbprinzenstraße,100004165,8.402715,49.007286,13,17,na,30,5.0,0.0,na,2013-01-01 03:00:00+00:00


In [31]:
data_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6080705 entries, 0 to 6080704
Data columns (total 13 columns):
 #   Column                  Dtype              
---  ------                  -----              
 0   city                    object             
 1   counter_site_name       object             
 2   counter_site_id         int64              
 3   longitude               float64            
 4   latitude                float64            
 5   channels_in             object             
 6   channels_out            object             
 7   channels_unknown        object             
 8   count                   int64              
 9   site_temperature        object             
 10  site_rain_accumulation  object             
 11  site_snow_accumulation  object             
 12  timestamp               datetime64[ns, UTC]
dtypes: datetime64[ns, UTC](1), float64(2), int64(2), object(8)
memory usage: 603.1+ MB


In [32]:

result = data_cleaned.groupby('city')['counter_site_name'].agg(
    unique_sites=lambda x: list(set(x)))
result['count_unique_sites'] = result['unique_sites'].apply(len)
result.reset_index()

print(result)

                                                                                         unique_sites  \
city                                                                                                    
Landeshauptstadt Stuttgart                          [Am Kräherwald, Samaraweg, Böblinger Straße, K...   
Landkreis Böblingen                                 [K 1077 Radschnellweg Böblingen-Ehningen, Röme...   
Landratsamt Ostalbkreis                             [OAK-11: Ellwangen-Röhlingen, OAK-03: Bucher S...   
Landratsamt Rems-Murr-Kreis                         [Höfen - Hertmannsweiler (K 1851), Weilerstraß...   
Ravensburg Tws Gmbh & Co. Kg                        [01 / 02 WGT Doggenriedstraße, 08 RV Bahnhofst...   
Regierungspräsidium Stuttgart Aussenstelle Heil...                                      [Bad Wimpfen]   
Stadt Bad Säckingen                                 [Murger Weg / RheinRadWeg, Bündtenweg - Wallba...   
Stadt Freiburg                                      [FR

In [33]:

def earliest_latest_at_max(
    df: pd.DataFrame,
    city_col: str = "city",
    counter_col: str = "counter_site_name",
    date_col: str = "timestamp",
    freq: str = "D" #(Tag), "M" (Monat), "Q" (Quartal), "Y" (Jahr) usw.
) -> pd.DataFrame:
    """
    Für jede Stadt wird berechnet:
      - max_count: maximale Anzahl unterschiedlicher counter_site_name im Zeitraum (aggregiert nach freq)
      - earliest_time: frühester Zeitpunkt (Timestamp) mit dieser max_count
      - latest_time: spätester Zeitpunkt (Timestamp) mit dieser max_count
      - times_with_max: Liste aller Zeitpunkte (Timestamp), an denen max_count erreicht wurde

    Annahmen:
      - date_col wird in datetime geparst
      - Aggregation nach 'freq' (z.B. Monat "M")
    """
    # Nur die benötigten Spalten + Kopie
    df2 = df[[city_col, counter_col, date_col]].copy()

    # Datum parsen
    df2[date_col] = pd.to_datetime(df2[date_col], errors="coerce")
    df2 = df2.dropna(subset=[date_col])

    # Zeitraster (Period -> Timestamp)
    period = df2[date_col].dt.to_period(freq)
    df2["period_start"] = period.dt.to_timestamp()  # Beginn des jeweiligen Zeitrasters

    # Schritt 1: Anzahl unterschiedlicher Counter je Stadt & Zeitraum
    counts = (
        df2.groupby([city_col, "period_start"])[counter_col]
           .nunique()
           .reset_index()
           .rename(columns={counter_col: "n_unique"})
    )

    # Schritt 2: Pro Stadt das Maximum und die Zeitpunkte mit diesem Maximum finden
    def summarize_city(group: pd.DataFrame) -> pd.Series:
        max_count = group["n_unique"].max()
        at_max = group.loc[group["n_unique"] == max_count, "period_start"].sort_values()

        return pd.Series({
            "max_count": int(max_count),
            "earliest_time": at_max.iloc[0] if len(at_max) else pd.NaT,
            "latest_time": at_max.iloc[-1] if len(at_max) else pd.NaT#,
            #"times_with_max": list(at_max)
        })

    result = counts.groupby(city_col).apply(summarize_city).reset_index()

    return result


In [34]:
earliest_latest_at_max(data_cleaned)


  period = df2[date_col].dt.to_period(freq)
  result = counts.groupby(city_col).apply(summarize_city).reset_index()


Unnamed: 0,city,max_count,earliest_time,latest_time
0,Landeshauptstadt Stuttgart,15,2020-08-31,2025-12-02
1,Landkreis Böblingen,2,2020-12-21,2025-12-14
2,Landratsamt Ostalbkreis,11,2024-12-22,2025-10-31
3,Landratsamt Rems-Murr-Kreis,13,2024-12-07,2025-12-09
4,Ravensburg Tws Gmbh & Co. Kg,7,2020-12-17,2025-12-14
5,Regierungspräsidium Stuttgart Aussenstelle Hei...,1,2024-12-15,2025-12-14
6,Stadt Bad Säckingen,2,2023-11-22,2025-05-09
7,Stadt Freiburg,3,2014-07-31,2025-12-14
8,Stadt Heidelberg,14,2020-01-31,2022-05-10
9,Stadt Heilbronn,4,2020-10-20,2025-11-10


In [8]:
# ersetze 'na' Werte in den 3 channel spalten durch 0, um "Checksum" berechnen zu können 
data_cleaned['channels_in'] = np.where(data_cleaned['channels_in'].eq('na'), 0, data_cleaned['channels_in'])
data_cleaned['channels_out'] = np.where(data_cleaned['channels_out'].eq('na'), int(0), data_cleaned['channels_out'])
data_cleaned['channels_unknown'] = np.where(data_cleaned['channels_unknown'].eq('na'), int(0), data_cleaned['channels_unknown'])

In [9]:
print(data_cleaned.loc[data_cleaned["channels_unknown"] == 'na'])
print(data_cleaned.loc[data_cleaned["channels_in"] == 'na'])
print(data_cleaned.loc[data_cleaned["channels_out"] == 'na'])
print(data_cleaned.loc[data_cleaned["count"] == 'na'])
# -> keine 'na' Werte (mehr) in den 4 Spalten

Empty DataFrame
Columns: [city, counter_site_name, counter_site_id, longitude, latitude, channels_in, channels_out, channels_unknown, count, site_temperature, site_rain_accumulation, site_snow_accumulation, timestamp]
Index: []
Empty DataFrame
Columns: [city, counter_site_name, counter_site_id, longitude, latitude, channels_in, channels_out, channels_unknown, count, site_temperature, site_rain_accumulation, site_snow_accumulation, timestamp]
Index: []
Empty DataFrame
Columns: [city, counter_site_name, counter_site_id, longitude, latitude, channels_in, channels_out, channels_unknown, count, site_temperature, site_rain_accumulation, site_snow_accumulation, timestamp]
Index: []
Empty DataFrame
Columns: [city, counter_site_name, counter_site_id, longitude, latitude, channels_in, channels_out, channels_unknown, count, site_temperature, site_rain_accumulation, site_snow_accumulation, timestamp]
Index: []


In [10]:
# konvertiere die 4 Spalten in Integer
data_cleaned[['channels_in', 'channels_out', 'channels_unknown', 'count']] = data_cleaned[['channels_in', 'channels_out', 'channels_unknown', 'count']].astype(int)
sum_cols = ['channels_in', 'channels_out', 'channels_unknown']

# Prüfe, ob die Summe der 3 channel Spalten der count Spalte entspricht
data_cleaned['checksum_correct'] = data_cleaned[sum_cols].sum(axis=1).eq(data_cleaned['count'])

In [11]:
print(data_cleaned['checksum_correct'].value_counts())

checksum_correct
True    6080705
Name: count, dtype: int64


In [12]:
checksum_failure_data = data_cleaned[data_cleaned["checksum_correct"] == False][['city', 'counter_site_name', 'counter_site_id', 'longitude', 'latitude',
       'timestamp', 'channels_in', 'channels_out', 'channels_unknown', 'count']]
# Speichere lokal als CSV-Datei
checksum_failure_data.to_csv("../data/cs_failure_data.csv", index=False)   
checksum_failure_data.head() 

Unnamed: 0,city,counter_site_name,counter_site_id,longitude,latitude,timestamp,channels_in,channels_out,channels_unknown,count


In [120]:
# Vermutung: 'channels_unknown' ist eine Art Fehlercode, der entweder 0 oder 8 ist -> zeige Werte von channels_unknown an
data_cleaned['channels_unknown'].unique()
# Vermutung widerlegt 

array([   0,   51,  117, ..., 1422, 1311, 1392], shape=(1502,))

In [127]:
c='Bahnhaltepunkt Fürstenberg'
t='2021-05-22 04:00:00+00:00'
print(data_cleaned[(data_cleaned['counter_site_name']==c) & (data_cleaned['timestamp']== t)])
print(data[(data['counter_site']==c) & (data['iso_timestamp']== t)])

                   city           counter_site_name  counter_site_id  \
2066806  Stadt Konstanz  Bahnhaltepunkt Fürstenberg        300016792   

         longitude   latitude  channels_in  channels_out  channels_unknown  \
2066806   9.162819  47.677375           11             4                 8   

         count site_temperature site_rain_accumulation site_snow_accumulation  \
2066806     15              7.0                    0.3                     na   

                        timestamp  checksum_correct  
2066806 2021-05-22 04:00:00+00:00             False  
            operator_name     domain_name  domain_id  \
2066806  Eco Counter GmbH  Stadt Konstanz       6456   

                       counter_site  counter_site_id counter_serial  \
2066806  Bahnhaltepunkt Fürstenberg        300016792    COM21020459   

         longitude   latitude                      timezone  \
2066806   9.162819  47.677375  (UTC+01:00) Europe/Paris DST   

                    iso_timestamp channels_i