In [4]:
import pandas as pd

df_org = pd.read_csv("../rawData/all_data_08_18.csv")

# Filter for specific files only
target_files = ["Aconcagua.csv", "Annapurna.csv", "Everest.csv", "Kilimanjaro.csv", "Makalu.csv"]
df_org = df_org[df_org.file.isin(target_files)].copy()

# Filter for dates 2025-07-30
df_org = df_org[df_org.datetime.str.startswith(("2025-07-30"))].copy()

print(f"Available files: {df_org.file.unique()}")
print(f"Date range: {df_org.datetime.min()} to {df_org.datetime.max()}")
df_org

Available files: ['Aconcagua.csv' 'Annapurna.csv' 'Everest.csv' 'Kilimanjaro.csv'
 'Makalu.csv']
Date range: 2025-07-30 00:27:02.092 to 2025-07-30 23:59:00.999


Unnamed: 0.1,Unnamed: 0,timestamp,temperature,datetime,file
522,523,1753840081633,23.6,2025-07-30 01:48:01.633,Aconcagua.csv
523,524,1753847342054,23.6,2025-07-30 03:49:02.054,Aconcagua.csv
524,525,1753854602405,23.4,2025-07-30 05:50:02.405,Aconcagua.csv
525,526,1753861869557,23.4,2025-07-30 07:51:09.557,Aconcagua.csv
526,527,1753869121514,23.5,2025-07-30 09:52:01.514,Aconcagua.csv
...,...,...,...,...,...
7266,7268,1753873200000,23.8,2025-07-30 11:00:00,Makalu.csv
7267,7269,1753880400000,23.9,2025-07-30 13:00:00,Makalu.csv
7268,7270,1753887600000,23.9,2025-07-30 15:00:00,Makalu.csv
7269,7271,1753894800000,23.8,2025-07-30 17:00:00,Makalu.csv


In [5]:
from datetime import datetime


# manual parsing of time
def parse_time(s):
    # Handle the comma issue in the first row by replacing comma with space
    s = s.replace(',', ' ')
    try:
        return datetime.strptime(s, "%Y-%m-%d %H:%M:%S.%f")
    except ValueError:
        return datetime.strptime(s, "%Y-%m-%d %H:%M:%S")

In [7]:
# Load and prepare dwd data for merging
dwd_df = pd.read_csv("../rawData/dwd_07_30.csv")

# Convert datetime column to proper datetime format (now that the file is fixed)
dwd_df['datetime'] = dwd_df['datetime'].apply(parse_time)

# Rename temperature column to outside_temperature for clarity
dwd_df = dwd_df.rename(columns={'temperature': 'outside_temperature'})

# Keep only datetime and outside_temperature columns
dwd_clean = dwd_df[['datetime', 'outside_temperature']].copy()
dwd_clean.head()

Unnamed: 0,datetime,outside_temperature
0,2025-07-30 00:00:00,14.4
1,2025-07-30 01:00:00,14.2
2,2025-07-30 02:00:00,14.0
3,2025-07-30 03:00:00,13.5
4,2025-07-30 04:00:00,13.7


In [8]:
def resample_data(fileName):
    # filter to needed data
    person = df_org[df_org.file == fileName].copy()
    person = person.drop(["Unnamed: 0", "timestamp", "file"], axis=1)

    person["datetime"] = person["datetime"].apply(parse_time)

    # resample to hourly data
    resample_person = person.resample("h", on="datetime").mean()

    # Create a complete hourly range from 00:00:00 on 2025-07-02 to 00:00:00 on 2025-07-03
    from datetime import datetime
    start_time = datetime(2025, 7, 30, 0, 0, 0)  # 2025-07-02 00:00:00
    end_time = datetime(2025, 7, 30, 23, 0, 0)    # 2025-07-02 23:00:00

    # Create complete hourly index
    complete_index = pd.date_range(start=start_time, end=end_time, freq='h')
    
    # Reindex to ensure complete coverage and interpolate missing values
    resample_person_complete = resample_person.reindex(complete_index)
    resample_person_fill = resample_person_complete.interpolate()
    
    # Reset index to make datetime a column again
    resample_person_fill = resample_person_fill.reset_index()
    resample_person_fill = resample_person_fill.rename(columns={'index': 'datetime'})

    # Merge with outside temperature data on datetime
    resample_person_fill = pd.merge(resample_person_fill, dwd_clean, on='datetime', how='left')
    
    # path = "processedData/all_resampled_" + fileName
    # resample_person_fill.to_csv(path)
    return resample_person_fill

In [9]:
persons = df_org.file.unique()
print(f"Files to process: {persons}")
persons

Files to process: ['Aconcagua.csv' 'Annapurna.csv' 'Everest.csv' 'Kilimanjaro.csv'
 'Makalu.csv']


array(['Aconcagua.csv', 'Annapurna.csv', 'Everest.csv', 'Kilimanjaro.csv',
       'Makalu.csv'], dtype=object)

In [10]:
for p in persons:
    resampled = resample_data(p)
    resampled["person"] = p.replace('.csv', '')  # Remove .csv extension for cleaner person names
    
    # Save individual CSV files with datetime included
    filename = p.replace('.csv', '')  # Remove .csv extension
    resampled.to_csv(f"processedData/resampled_30_07_25_{filename}.csv", index=False)
    print(f"Saved: processedData/resampled_30_07_25_{filename}.csv")

Saved: processedData/resampled_30_07_25_Aconcagua.csv
Saved: processedData/resampled_30_07_25_Annapurna.csv
Saved: processedData/resampled_30_07_25_Everest.csv
Saved: processedData/resampled_30_07_25_Kilimanjaro.csv
Saved: processedData/resampled_30_07_25_Makalu.csv
