In [58]:
import pandas as pd

df_org = pd.read_csv("../rawData/all_data_08_08.csv")

# Filter for specific files only
target_files = ["Aconcagua.csv", "Annapurna.csv", "Everest.csv", "Kilimanjaro.csv", "Makalu.csv"]
df_org = df_org[df_org.file.isin(target_files)].copy()

# Filter for dates 2025-07-02 and 2025-07-03
df_org = df_org[df_org.datetime.str.startswith(("2025-07-02"))].copy()

print(f"Available files: {df_org.file.unique()}")
print(f"Date range: {df_org.datetime.min()} to {df_org.datetime.max()}")
df_org

Available files: ['Aconcagua.csv' 'Annapurna.csv' 'Everest.csv' 'Kilimanjaro.csv'
 'Makalu.csv']
Date range: 2025-07-02 00:01:02.438 to 2025-07-02 23:46:05.315


Unnamed: 0.1,Unnamed: 0,timestamp,temperature,datetime,humidity,file
167,168,1751415240556,24.0,2025-07-02 00:14:00.556,44.0,Aconcagua.csv
168,169,1751422398304,23.5,2025-07-02 02:13:18.304,44.3,Aconcagua.csv
169,170,1751422504060,23.7,2025-07-02 02:15:04.06,44.2,Aconcagua.csv
170,171,1751429763086,23.6,2025-07-02 04:16:03.086,45.3,Aconcagua.csv
171,172,1751437020178,24.0,2025-07-02 06:17:00.178,46.3,Aconcagua.csv
...,...,...,...,...,...,...
6206,6207,1751465281382,24.2,2025-07-02 14:08:01.382,46.6,Makalu.csv
6207,6208,1751472543545,24.1,2025-07-02 16:09:03.545,47.3,Makalu.csv
6208,6209,1751479801283,24.1,2025-07-02 18:10:01.283,47.3,Makalu.csv
6209,6210,1751487060754,24.1,2025-07-02 20:11:00.754,47.9,Makalu.csv


In [59]:
from datetime import datetime


# manual parsing of time
def parse_time(s):
    # Handle the comma issue in the first row by replacing comma with space
    s = s.replace(',', ' ')
    try:
        return datetime.strptime(s, "%Y-%m-%d %H:%M:%S.%f")
    except ValueError:
        return datetime.strptime(s, "%Y-%m-%d %H:%M:%S")

In [60]:
# Load and prepare dwd data for merging
dwd_df = pd.read_csv("../rawData/dwd_07_02.csv")

# Convert datetime column to proper datetime format (now that the file is fixed)
dwd_df['datetime'] = dwd_df['datetime'].apply(parse_time)

# Rename temperature column to outside_temperature for clarity
dwd_df = dwd_df.rename(columns={'temperature': 'outside_temperature'})

# Keep only datetime and outside_temperature columns
dwd_clean = dwd_df[['datetime', 'outside_temperature']].copy()
dwd_clean.head()

Unnamed: 0,datetime,outside_temperature
0,2025-07-02 00:00:00,20.7
1,2025-07-02 01:00:00,18.5
2,2025-07-02 02:00:00,17.9
3,2025-07-02 03:00:00,18.7
4,2025-07-02 04:00:00,18.8


In [None]:
def resample_data(fileName):
    # filter to needed data
    person = df_org[df_org.file == fileName].copy()
    person = person.drop(["Unnamed: 0", "timestamp", "file"], axis=1)

    person["datetime"] = person["datetime"].apply(parse_time)

    # resample to hourly data
    resample_person = person.resample("h", on="datetime").mean()

    # Create a complete hourly range from 00:00:00 on 2025-07-02 to 00:00:00 on 2025-07-03
    from datetime import datetime
    start_time = datetime(2025, 7, 2, 0, 0, 0)  # 2025-07-02 00:00:00
    end_time = datetime(2025, 7, 2, 23, 59, 59)    # 2025-07-02 23:59:59

    # Create complete hourly index
    complete_index = pd.date_range(start=start_time, end=end_time, freq='h')
    
    # Reindex to ensure complete coverage and interpolate missing values
    resample_person_complete = resample_person.reindex(complete_index)
    resample_person_fill = resample_person_complete.interpolate()
    
    # Reset index to make datetime a column again
    resample_person_fill = resample_person_fill.reset_index()
    resample_person_fill = resample_person_fill.rename(columns={'index': 'datetime'})

    # Merge with outside temperature data on datetime
    resample_person_fill = pd.merge(resample_person_fill, dwd_clean, on='datetime', how='left')
    
    # path = "processedData/all_resampled_" + fileName
    # resample_person_fill.to_csv(path)
    return resample_person_fill

In [46]:
persons = df_org.file.unique()
print(f"Files to process: {persons}")
persons

Files to process: ['Aconcagua.csv' 'Annapurna.csv' 'Everest.csv' 'Kilimanjaro.csv'
 'Makalu.csv']


array(['Aconcagua.csv', 'Annapurna.csv', 'Everest.csv', 'Kilimanjaro.csv',
       'Makalu.csv'], dtype=object)

In [51]:
for p in persons:
    resampled = resample_data(p)
    resampled["person"] = p.replace('.csv', '')  # Remove .csv extension for cleaner person names
    
    # Save individual CSV files with datetime included
    filename = p.replace('.csv', '')  # Remove .csv extension
    resampled.to_csv(f"processedData/resampled_{filename}.csv", index=False)
    print(f"Saved: processedData/resampled_{filename}.csv")

Saved: processedData/resampled_Aconcagua.csv
Saved: processedData/resampled_Annapurna.csv
Saved: processedData/resampled_Everest.csv
Saved: processedData/resampled_Kilimanjaro.csv
Saved: processedData/resampled_Makalu.csv
