## Upsampling data from quraterly to monthly frequency(with in each groups)

In [29]:
import pandas as pd
import time

In [10]:
# Sample quarterly data
data = {
    'Date': ['2021-03-31', '2021-06-30', '2021-09-30', '2021-12-31'],
    'Value': [10, 20, 30, 40]
}
df = pd.DataFrame(data)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
print(df)
print("-" * 40)

# Upsample to monthly frequency and forward fill
monthly_ffill = df.resample('ME').ffill()
print("Monthly Frequency:")
print(monthly_ffill)
print("-" * 40)

# Sample DataFrame with groups
data = {
    'date': pd.to_datetime([
        '2023-01-01', '2023-04-01', '2023-07-01', '2023-10-01',  # Group 1
        '2023-01-01', '2023-04-01', '2023-07-01', '2023-10-01'   # Group 2
    ]),
    'id': ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'B'],
    'value': [100, 200, 150, 180, 50, 60, 70, 80]
}

df = pd.DataFrame(data)
df.set_index('date', inplace=True)

# Perform forward fill within each group
df_ffill = df.groupby('id').resample('ME', include_groups=False).ffill().reset_index()

# df,groupby().apply(function , include_groups=False) - include_groups=False makes the fucntion work within each group

print("Grouped:")
print(df_ffill)

            Value
Date             
2021-03-31     10
2021-06-30     20
2021-09-30     30
2021-12-31     40
----------------------------------------
Monthly Frequency:
            Value
Date             
2021-03-31     10
2021-04-30     10
2021-05-31     10
2021-06-30     20
2021-07-31     20
2021-08-31     20
2021-09-30     30
2021-10-31     30
2021-11-30     30
2021-12-31     40
----------------------------------------
Grouped:
   id       date  value
0   A 2023-01-31    100
1   A 2023-02-28    100
2   A 2023-03-31    100
3   A 2023-04-30    200
4   A 2023-05-31    200
5   A 2023-06-30    200
6   A 2023-07-31    150
7   A 2023-08-31    150
8   A 2023-09-30    150
9   A 2023-10-31    180
10  B 2023-01-31     50
11  B 2023-02-28     50
12  B 2023-03-31     50
13  B 2023-04-30     60
14  B 2023-05-31     60
15  B 2023-06-30     60
16  B 2023-07-31     70
17  B 2023-08-31     70
18  B 2023-09-30     70
19  B 2023-10-31     80


In [37]:
# Sample DataFrame with daily data
data = {
    'group': ['A', 'A', 'A', 'B', 'B', 'c', 'B', 'A', 'A', 'B', 'c', 'c', 'A', 'B', 'B', 'c', 'B', 'A', 'c', 'B', 'A', 'A', 'c', 'B', 'c', 'B', 'B', 'A', 'A', 'B'],
    'date': ['2023-01-01', '2023-01-15', '2023-01-21', '2023-02-15', '2023-02-01', '2023-01-11', '2023-01-22', '2023-02-05', '2023-02-11', '2023-01-06', 
             '2024-01-01', '2024-01-15', '2024-01-21', '2024-02-15', '2024-02-01', '2024-01-11', '2024-01-22', '2024-02-05', '2024-02-11', '2024-01-06',
            '2025-01-01', '2025-01-15', '2025-01-21', '2025-02-15', '2025-02-01', '2025-01-11', '2025-01-22', '2025-02-05', '2025-02-11', '2025-01-06',],
    'value1': [10, 15, 10, 20, 30, 25, 35, 10, 5, 15, 10, 15, 10, 20, 30, 25, 35, 10, 5, 15, 10, 15, 10, 20, 30, 25, 35, 10, 5, 15],
    'value2': [5, 10, 15, 20, 25, 30, 35, 10, 15, 25, 5, 10, 15, 20, 25, 30, 35, 10, 15, 25, 5, 10, 15, 20, 25, 30, 35, 10, 15, 25]
}

df = pd.DataFrame(data)

In [38]:
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)

In [39]:
start_time = time.time()

# Resample from daily to monthly frequency and calculate the mean
df_monthly = df.groupby('group').resample('ME', include_groups=False).mean().reset_index()

print(df_monthly)
print("--- %s seconds ---" % (time.time() - start_time))

   group       date     value1  value2
0      A 2023-01-31  11.666667    10.0
1      A 2023-02-28   7.500000    12.5
2      A 2023-03-31        NaN     NaN
3      A 2023-04-30        NaN     NaN
4      A 2023-05-31        NaN     NaN
..   ...        ...        ...     ...
73     c 2024-10-31        NaN     NaN
74     c 2024-11-30        NaN     NaN
75     c 2024-12-31        NaN     NaN
76     c 2025-01-31  10.000000    15.0
77     c 2025-02-28  30.000000    25.0

[78 rows x 4 columns]
--- 0.018128156661987305 seconds ---


In [32]:
start_time = time.time()

# Resample from daily to monthly frequency and calculate the mean
df_monthly = df.groupby('group').resample('ME', include_groups=False).mean().reset_index().dropna()

print(df_monthly)
print("--- %s seconds ---" % (time.time() - start_time))

   group       date     value1  value2
0      A 2023-01-31  11.666667    10.0
1      A 2023-02-28   7.500000    12.5
12     A 2024-01-31  10.000000    15.0
13     A 2024-02-29  10.000000    10.0
24     A 2025-01-31  12.500000     7.5
25     A 2025-02-28   7.500000    12.5
26     B 2023-01-31  25.000000    30.0
27     B 2023-02-28  25.000000    22.5
38     B 2024-01-31  25.000000    30.0
39     B 2024-02-29  25.000000    22.5
50     B 2025-01-31  25.000000    30.0
51     B 2025-02-28  20.000000    20.0
52     c 2023-01-31  25.000000    30.0
64     c 2024-01-31  16.666667    15.0
65     c 2024-02-29   5.000000    15.0
76     c 2025-01-31  10.000000    15.0
77     c 2025-02-28  30.000000    25.0
--- 0.018864870071411133 seconds ---
