In [1]:
import pandas as pd
from datetime import timedelta
from epiweeks import Week, Year

In [2]:
# David's path
path = 'data/Temperature/'

In [3]:
def get_epiweek(date):
    week = Week.fromdate(date)
    return week

### Read CSV

In [4]:
weekly_temperature = pd.read_csv(path+'temperature_rio_daily.csv')
weekly_temperature.rename(columns={'system:time_start' : 'Date', 'LST_Day_1km' : 'temperature'}, inplace=True)
weekly_temperature['Date'] = pd.to_datetime(weekly_temperature['Date'])
weekly_temperature

Unnamed: 0,Date,temperature
0,2015-01-01,42.239
1,2015-01-02,39.204
2,2015-01-03,
3,2015-01-04,
4,2015-01-05,
...,...,...
2850,2022-11-11,
2851,2022-11-12,
2852,2022-11-13,25.200
2853,2022-11-14,


#### Fill NaN with average of last and next value

In [5]:
# Fill the NaN of firsth row
weekly_temperature['ffill'] = weekly_temperature.temperature.fillna(method="ffill")
weekly_temperature['ffill'] = weekly_temperature['ffill'].fillna(method="backfill")
# Fill the NaN of last row
weekly_temperature['backfill'] = weekly_temperature.temperature.fillna(method="backfill")
weekly_temperature['backfill'] = weekly_temperature['backfill'].fillna(method="ffill")
## Create a new temperature column with NaN as mean of last and next temperature
weekly_temperature['temperature'] = (weekly_temperature['backfill'] + weekly_temperature['ffill'])/2
weekly_temperature.drop(columns=['ffill', 'backfill'], inplace=True)
weekly_temperature

Unnamed: 0,Date,temperature
0,2015-01-01,42.2390
1,2015-01-02,39.2040
2,2015-01-03,36.5490
3,2015-01-04,36.5490
4,2015-01-05,36.5490
...,...,...
2850,2022-11-11,28.2415
2851,2022-11-12,28.2415
2852,2022-11-13,25.2000
2853,2022-11-14,29.2250


### Calculate temperature per epi-week

In [6]:
# Apply function to get epiweek for each date
weekly_temperature['Epiweek'] = weekly_temperature['Date'].apply(get_epiweek)

# Group by epiweek and calculate mean temperature
mean_temp_per_epiweek = weekly_temperature.groupby('Epiweek')['temperature'].mean().reset_index()
mean_temp_per_epiweek.sort_values(by='Epiweek', inplace=True)
mean_temp_per_epiweek

Unnamed: 0,Epiweek,temperature
0,201453,39.330667
1,201501,37.142143
2,201502,36.971643
3,201503,37.902000
4,201504,33.811714
...,...,...
405,202241,27.580500
406,202243,29.708286
407,202244,27.855143
408,202245,28.147571


In [8]:
# Dataframe to CSV
mean_temp_per_epiweek.to_csv(path+'temperature_rio.csv', index=False)