In [3]:
import os
import pandas as pd

def load_telia_data(folder):
        """Laster alle CSV-filer i de oppgitte mappene og kombinerer dem til én DataFrame per type."""
        
        def load_csvs_from_folder(folder):
            """Hjelpefunksjon for å finne og slå sammen alle CSV-filer i en mappe."""
    
            if not os.path.exists(folder):
                raise FileNotFoundError(f"Mappen {folder} eksisterer ikke.")

            csv_files = [f for f in os.listdir(folder) if f.endswith(".csv")]
            if not csv_files:
                raise FileNotFoundError(f"Ingen CSV-filer funnet i {folder}")
            
            df_list = []
            for file in csv_files:
                file_path = os.path.join(folder, file)
                df = pd.read_csv(file_path, sep=";")  # Juster separator hvis nødvendig
                df_list.append(df)

            return pd.concat(df_list, ignore_index=True)

        # Laste timelige data
        hourly_data = load_csvs_from_folder(folder)
        print(f"✅ Lastet inn {len(hourly_data)} rader fra timelige Telia-data.")
        return hourly_data
        
folder="data/telia/hourly"
df = load_telia_data(folder)

✅ Lastet inn 82798311 rader fra timelige Telia-data.


In [6]:
print(df)

          batch_date hour     way_id  tag_key   tag_value admin_level_2 people
0         2022-11-03    5  717959781  highway       trunk     Trondheim    256
1         2022-11-03   19  733976762  highway    tertiary     Trondheim    256
2         2022-11-03   21  724402416  highway   secondary     Trondheim    256
3         2022-11-03   16  727900571  highway   secondary     Trondheim    768
4         2022-11-03   18  697301994  highway  trunk_link     Trondheim    768
...              ...  ...        ...      ...         ...           ...    ...
82798306  2020-01-26   18  774734983  highway       trunk     Trondheim   1521
82798307  2020-01-26    8  774734984  highway       trunk     Trondheim    242
82798308  2020-01-26   17  785686875  highway   secondary     Trondheim    243
82798309  2020-01-26    7  774734982  highway       trunk     Trondheim    250
82798310  2020-01-26    1  774734982  highway       trunk     Trondheim    253

[82798311 rows x 7 columns]


In [7]:
import pandas as pd

def calculate_hourly_distributions(df):
    # Convert 'batch_date' and 'hour' columns to datetime and integer types respectively
    df['batch_date'] = pd.to_datetime(df['batch_date'])
    df['hour'] = df['hour'].astype(int)
    df['people'] = df['people'].astype(int)

    # Group by 'batch_date', 'hour', and 'tag_value' and calculate the mean of 'people'
    hourly_distributions = df.groupby(['batch_date', 'hour', 'tag_value'])['people'].mean().reset_index()

    return hourly_distributions

# Calculate the hourly distributions
hourly_distributions = calculate_hourly_distributions(df)

# Print the result
print(hourly_distributions)

       batch_date  hour       tag_value      people
0      2019-01-17     0        motorway  127.037037
1      2019-01-17     0   motorway_link   24.700000
2      2019-01-17     0         primary   60.047619
3      2019-01-17     0            rail   11.275862
4      2019-01-17     0       secondary   41.335922
...           ...   ...             ...         ...
413272 2023-11-30    23  secondary_link   54.666667
413273 2023-11-30    23        tertiary   51.356473
413274 2023-11-30    23   tertiary_link   26.000000
413275 2023-11-30    23           trunk  361.039823
413276 2023-11-30    23      trunk_link   85.972477

[413277 rows x 4 columns]
