[How to find the daily max for several columns and append the result to the dataframe?](https://stackoverflow.com/questions/63800602)

In [1]:
import pandas as pd
from datetime import date, timedelta

In [2]:
today = pd.Timestamp.today()

# note that the 8 and 7 for china and america are swapped for testing
df = pd.DataFrame([['china',today,1,4,8],  
                     ['america',today,2,5,7], 
                     ['china',date.today() - timedelta(days=1),3,6,9], 
                     ['india',date.today() - timedelta(days=2),4,7,10]], 

                     columns=['country','date', 'a','b','c'])

# find the daily max: 1 line of fast code compared to 7 lines of a for-loop
daily_max = df.groupby('date', as_index=False)[['a', 'b', 'c']].max()

# add column with daily_max
daily_max['country'] = 'daily max'

# combine with df
df_updated = pd.concat([df, daily_max]).sort_values(['date', 'country']).reset_index(drop=True)

df_updated

Unnamed: 0,country,date,a,b,c
0,daily max,2020-09-06 00:00:00.000000,4,7,10
1,india,2020-09-06 00:00:00.000000,4,7,10
2,china,2020-09-07 00:00:00.000000,3,6,9
3,daily max,2020-09-07 00:00:00.000000,3,6,9
4,america,2020-09-08 16:08:14.696839,2,5,7
5,china,2020-09-08 16:08:14.696839,1,4,8
6,daily max,2020-09-08 16:08:14.696839,2,5,8


In [3]:
# add columns using groupby and transform
df[['max_a', 'max_b', 'max_c']] = df.groupby('date')[['a', 'b', 'c']].transform('max') == df[['a', 'b', 'c']]

df

Unnamed: 0,country,date,a,b,c,max_a,max_b,max_c
0,china,2020-09-08 16:08:14.696839,1,4,8,False,False,True
1,america,2020-09-08 16:08:14.696839,2,5,7,True,True,False
2,china,2020-09-07 00:00:00.000000,3,6,9,True,True,True
3,india,2020-09-06 00:00:00.000000,4,7,10,True,True,True


In [4]:
# load first 6 columns of data and parse dates
df = pd.read_csv('https://raw.githubusercontent.com/trenton3983/stack_overflow/master/data/so_data/2020-09-08%2063800602/covid_data.csv', parse_dates=['date'], usecols=range(6))

# remove World from location, because this is the sum for each day and will always be the max
df = df[df.location != 'World']

# get last four columns, because I'm to lazy to type them
cols = df.columns[-4:]
max_cols = [f'max {v}' for v in cols]  # new column names

# add columns using groupby and transform
df[max_cols] = df.groupby('date')[cols].transform('max') == df[cols]

# find the daily max: 1 line of fast code compared to 7 lines of a for-loop
daily_max = df.groupby('date', as_index=False)[cols].max()

# add column with daily_max
daily_max['location'] = 'daily max'

# combine with df
df_updated = pd.concat([df, daily_max]).sort_values(['date', 'location']).reset_index(drop=True)

df_updated[df_updated.date == '2020-07-04'].tail(15)

Unnamed: 0,date,location,new_cases,new_deaths,total_cases,total_deaths,max new_cases,max new_deaths,max total_cases,max total_deaths
28124,2020-07-04,Ukraine,876.0,27.0,46763.0,1212.0,False,False,False,False
28125,2020-07-04,United Arab Emirates,672.0,1.0,50141.0,318.0,False,False,False,False
28126,2020-07-04,United Kingdom,602.0,49.0,286141.0,40581.0,False,False,False,False
28127,2020-07-04,United States,54442.0,694.0,2794321.0,129434.0,True,False,True,True
28128,2020-07-04,United States Virgin Islands,13.0,0.0,111.0,6.0,False,False,False,False
28129,2020-07-04,Uruguay,5.0,0.0,952.0,28.0,False,False,False,False
28130,2020-07-04,Uzbekistan,301.0,2.0,9500.0,29.0,False,False,False,False
28131,2020-07-04,Vatican,0.0,0.0,12.0,0.0,False,False,False,False
28132,2020-07-04,Venezuela,264.0,2.0,6537.0,59.0,False,False,False,False
28133,2020-07-04,Vietnam,0.0,0.0,355.0,0.0,False,False,False,False


In [5]:
date_mask = (df_updated.date > '2020-01-31')
col_mask = ((df_updated.location == 'daily max') | (df_updated['max new_cases'] == True) | 
            (df_updated['max new_deaths'] == True) | (df_updated['max total_cases'] == True) | (df_updated['max total_deaths'] == True))

In [6]:
df_updated[date_mask & col_mask].head(15)

Unnamed: 0,date,location,new_cases,new_deaths,total_cases,total_deaths,max new_cases,max new_deaths,max total_cases,max total_deaths
2188,2020-02-01,China,2095.0,46.0,11809.0,259.0,True,True,True,True
2243,2020-02-01,daily max,2095.0,46.0,11809.0,259.0,,,,
2256,2020-02-02,China,2590.0,45.0,14399.0,304.0,True,True,True,True
2311,2020-02-02,daily max,2590.0,45.0,14399.0,304.0,,,,
2324,2020-02-03,China,2812.0,57.0,17211.0,361.0,True,True,True,True
2379,2020-02-03,daily max,2812.0,57.0,17211.0,361.0,,,,
2392,2020-02-04,China,3237.0,65.0,20448.0,426.0,True,True,True,True
2447,2020-02-04,daily max,3237.0,65.0,20448.0,426.0,,,,
2460,2020-02-05,China,3872.0,66.0,24320.0,492.0,True,True,True,True
2515,2020-02-05,daily max,3872.0,66.0,24320.0,492.0,,,,


In [7]:
df_updated[date_mask & col_mask].tail(15)

Unnamed: 0,date,location,new_cases,new_deaths,total_cases,total_deaths,max new_cases,max new_deaths,max total_cases,max total_deaths
41147,2020-09-04,United States,36249.0,1053.0,6150655.0,186797.0,False,False,True,True
41158,2020-09-04,daily max,83341.0,1096.0,6150655.0,186797.0,,,,
41248,2020-09-05,India,86432.0,1089.0,4023179.0,69561.0,True,True,False,False
41357,2020-09-05,United States,51071.0,968.0,6201726.0,187765.0,False,False,True,True
41368,2020-09-05,daily max,86432.0,1089.0,6201726.0,187765.0,,,,
41458,2020-09-06,India,90632.0,1065.0,4113811.0,70626.0,True,True,False,False
41567,2020-09-06,United States,44140.0,773.0,6245866.0,188538.0,False,False,True,True
41578,2020-09-06,daily max,90632.0,1065.0,6245866.0,188538.0,,,,
41636,2020-09-07,Ecuador,-8261.0,3800.0,109784.0,10524.0,False,True,False,False
41668,2020-09-07,India,90802.0,1016.0,4204613.0,71642.0,True,False,False,False
