In [3]:
import pandas as pd
from datetime import datetime, timedelta
# from tqdm import tqdm
from tqdm.auto import tqdm

# Load your CSV file
df = pd.read_csv('hourly_averages_v3.csv')

# Define the start of the year as April 10, 2019
start_year = 2019
start_day = 10
start_month = 4

# Function to calculate if a year is a leap year
def is_leap_year(year):
    if (year % 4 == 0 and year % 100 != 0) or (year % 400 == 0):
        return True
    return False

# Function to calculate the new "year" and "week" based on the adjusted year starting on April 10th
def redefine_year_week(row):
    # Get the datetime object for this row
    dt = datetime.strptime(row['datetime'], '%Y-%m-%d %H:%M:%S')
    
    # Calculate the new year based on 10th April as the start of the year
    year_start = datetime(start_year, start_month, start_day)
    
    # Increment year_start by 1 year until we find the correct "year" for this row, accounting for leap years
    year_count = 1
    while dt >= year_start:
        # Calculate the number of days in the current year (365 or 366 for leap years)
        days_in_year = 366 if is_leap_year(year_start.year) else 365
        if dt >= year_start + timedelta(days=days_in_year):
            year_start = year_start + timedelta(days=days_in_year)
            year_count += 1
        else:
            break
    
    # Find the week number within this new year system, starting from the first full Monday
    week_1_start = year_start - timedelta(days=(year_start.weekday() - 0) % 7)  # Adjust to first Monday
    
    if dt >= week_1_start:
        week_number = (dt - week_1_start).days // 7 + 1
    else:
        week_number = 1
    
    return pd.Series({'new_year': year_count, 'new_week': week_number})

tqdm.pandas()

# Apply the function to each row to calculate the new year and week
df[['new_year', 'new_week']] = df.progress_apply(redefine_year_week, axis=1)

# Drop the old "year" and "week" columns and rename the new ones
df.drop(columns=['year', 'week'], inplace=True)
df.rename(columns={'new_year': 'year', 'new_week': 'week'}, inplace=True)

# Save the updated dataframe to a new CSV file
df.to_csv('hourly_averages_v3_campaign_year.csv', index=False)


  0%|          | 0/351584 [00:00<?, ?it/s]