In [1]:
import pandas as pd
import numpy as np

In [2]:
np.random.seed(seed=42)

df = pd.DataFrame(data = np.random.randint(0,100,(4,3)),
                  columns=["Maths","Stats","DS"],
                  index=["Student1","Student2","Student3","Student4"])
df.head()

Unnamed: 0,Maths,Stats,DS
Student1,51,92,14
Student2,71,60,20
Student3,82,86,74
Student4,74,87,99


In [3]:
df.idxmax(axis=1)

Student1    Stats
Student2    Maths
Student3    Stats
Student4       DS
dtype: object

In [4]:
df.idxmax(axis=0)

Maths    Student3
Stats    Student1
DS       Student4
dtype: object

In [5]:
df.idxmin(axis=1)

Student1       DS
Student2       DS
Student3       DS
Student4    Maths
dtype: object

In [6]:
df.idxmin(axis=0)

Maths    Student1
Stats    Student2
DS       Student1
dtype: object

In [7]:
np.random.seed(42)
no_of_students = 1000
no_of_tests = 100

df = pd.DataFrame(data = np.random.randint(0,100,(no_of_students,no_of_tests)),
                  columns = ["Test{}".format(x) for x in range(1,no_of_tests+1)],
                  index = ["Student{}".format(x) for x in range(1,no_of_students+1)])

print("There are {:,} test scores".format(len(df)*len(df.columns)))
df.head()          

There are 100,000 test scores


Unnamed: 0,Test1,Test2,Test3,Test4,Test5,Test6,Test7,Test8,Test9,Test10,...,Test91,Test92,Test93,Test94,Test95,Test96,Test97,Test98,Test99,Test100
Student1,51,92,14,71,60,20,82,86,74,74,...,71,77,86,61,39,84,79,81,52,23
Student2,25,88,59,40,28,14,44,64,88,70,...,61,96,0,26,61,76,2,69,71,26
Student3,8,61,36,96,50,43,23,78,58,31,...,0,47,11,68,36,31,8,98,18,47
Student4,79,2,19,23,53,32,23,74,71,35,...,94,53,57,66,45,23,31,46,85,22
Student5,65,26,1,89,16,32,8,42,47,38,...,88,0,15,60,63,62,68,21,92,66


In [8]:
df.idxmax(axis = 1)

Student1       Test12
Student2       Test37
Student3       Test85
Student4       Test26
Student5       Test14
                ...  
Student996     Test36
Student997     Test28
Student998     Test53
Student999     Test28
Student1000     Test5
Length: 1000, dtype: object

In [9]:
df.idxmax(axis = 0)

Test1      Student175
Test2       Student80
Test3      Student314
Test4       Student43
Test5      Student101
              ...    
Test96      Student41
Test97      Student31
Test98      Student74
Test99      Student10
Test100     Student89
Length: 100, dtype: object

In [10]:
df.idxmin(axis = 1)

Student1       Test17
Student2       Test13
Student3       Test31
Student4        Test2
Student5       Test85
                ...  
Student996     Test69
Student997     Test20
Student998     Test84
Student999     Test83
Student1000     Test6
Length: 1000, dtype: object

In [11]:
df.idxmin(axis = 0)

Test1      Student106
Test2       Student76
Test3       Student32
Test4      Student333
Test5      Student327
              ...    
Test96     Student256
Test97      Student34
Test98     Student294
Test99      Student76
Test100     Student33
Length: 100, dtype: object

In [12]:
np.random.seed(42)
no_of_students = 1000
no_of_tests = 100

df = pd.DataFrame(data = np.random.randint(0,100,(no_of_students,no_of_tests)),
                  columns = [f"Test{x}" for x in range(1,no_of_tests+1)],
                  index = [f"Student{x}" for x in range(1,no_of_students+1)])

print(f"There are {(len(df)*len(df.columns)):,} test scores")
df.head()

There are 100,000 test scores


Unnamed: 0,Test1,Test2,Test3,Test4,Test5,Test6,Test7,Test8,Test9,Test10,...,Test91,Test92,Test93,Test94,Test95,Test96,Test97,Test98,Test99,Test100
Student1,51,92,14,71,60,20,82,86,74,74,...,71,77,86,61,39,84,79,81,52,23
Student2,25,88,59,40,28,14,44,64,88,70,...,61,96,0,26,61,76,2,69,71,26
Student3,8,61,36,96,50,43,23,78,58,31,...,0,47,11,68,36,31,8,98,18,47
Student4,79,2,19,23,53,32,23,74,71,35,...,94,53,57,66,45,23,31,46,85,22
Student5,65,26,1,89,16,32,8,42,47,38,...,88,0,15,60,63,62,68,21,92,66


In [13]:
len(df)

1000

In [14]:
(len(df)*len(df.columns))

100000

### Resample

In [18]:
index = pd.date_range("04/06/2022",periods = 25, freq = "T")
index

DatetimeIndex(['2022-04-06 00:00:00', '2022-04-06 00:01:00',
               '2022-04-06 00:02:00', '2022-04-06 00:03:00',
               '2022-04-06 00:04:00', '2022-04-06 00:05:00',
               '2022-04-06 00:06:00', '2022-04-06 00:07:00',
               '2022-04-06 00:08:00', '2022-04-06 00:09:00',
               '2022-04-06 00:10:00', '2022-04-06 00:11:00',
               '2022-04-06 00:12:00', '2022-04-06 00:13:00',
               '2022-04-06 00:14:00', '2022-04-06 00:15:00',
               '2022-04-06 00:16:00', '2022-04-06 00:17:00',
               '2022-04-06 00:18:00', '2022-04-06 00:19:00',
               '2022-04-06 00:20:00', '2022-04-06 00:21:00',
               '2022-04-06 00:22:00', '2022-04-06 00:23:00',
               '2022-04-06 00:24:00'],
              dtype='datetime64[ns]', freq='T')

In [22]:
df = pd.DataFrame(data = range(25),index = index , columns = ["Count"])
df

Unnamed: 0,Count
2022-04-06 00:00:00,0
2022-04-06 00:01:00,1
2022-04-06 00:02:00,2
2022-04-06 00:03:00,3
2022-04-06 00:04:00,4
2022-04-06 00:05:00,5
2022-04-06 00:06:00,6
2022-04-06 00:07:00,7
2022-04-06 00:08:00,8
2022-04-06 00:09:00,9


In [23]:
df.resample("5T").sum()

Unnamed: 0,Count
2022-04-06 00:00:00,10
2022-04-06 00:05:00,35
2022-04-06 00:10:00,60
2022-04-06 00:15:00,85
2022-04-06 00:20:00,110


In [24]:
df.resample("6T").sum()

Unnamed: 0,Count
2022-04-06 00:00:00,15
2022-04-06 00:06:00,51
2022-04-06 00:12:00,87
2022-04-06 00:18:00,123
2022-04-06 00:24:00,24


In [25]:
df.resample("5T",closed = "right").sum()

Unnamed: 0,Count
2022-04-05 23:55:00,0
2022-04-06 00:00:00,15
2022-04-06 00:05:00,40
2022-04-06 00:10:00,65
2022-04-06 00:15:00,90
2022-04-06 00:20:00,90


In [26]:
df.resample("5T",label = "right").sum()

Unnamed: 0,Count
2022-04-06 00:05:00,10
2022-04-06 00:10:00,35
2022-04-06 00:15:00,60
2022-04-06 00:20:00,85
2022-04-06 00:25:00,110


In [27]:
index = pd.period_range("04/06/2022",periods = 3, freq = "Q")
df = pd.DataFrame(data = range(1,4),index = index , columns = ["Count"])
df

Unnamed: 0,Count
2022Q2,1
2022Q3,2
2022Q4,3


In [28]:
df.resample("M",convention="start").sum()

Unnamed: 0,Count
2022-04,1.0
2022-05,
2022-06,
2022-07,2.0
2022-08,
2022-09,
2022-10,3.0
2022-11,
2022-12,


In [29]:
df.resample("M",convention="end").sum()

Unnamed: 0,Count
2022-06,1.0
2022-07,
2022-08,
2022-09,2.0
2022-10,
2022-11,
2022-12,3.0


In [30]:
index = pd.period_range("04/06/2022",periods = 9, freq = "3T")
df = pd.DataFrame(data = range(9),index = index, columns = ["Count"])
df

Unnamed: 0,Count
2022-04-06 00:00,0
2022-04-06 00:03,1
2022-04-06 00:06,2
2022-04-06 00:09,3
2022-04-06 00:12,4
2022-04-06 00:15,5
2022-04-06 00:18,6
2022-04-06 00:21,7
2022-04-06 00:24,8


In [31]:
df.resample("7T",label = "right",closed="right").sum()

Unnamed: 0,Count
2022-04-05 23:53,3
2022-04-06 00:00,7
2022-04-06 00:07,11
2022-04-06 00:14,15
2022-04-06 00:21,0
