In [2]:
import pandas as pd
from datetime import datetime
import numpy as np

In [2]:
date_rng = pd.date_range(start='1/1/2018', end='1/08/2018', freq='H')

In [3]:
date_rng

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',
               '2018-01-01 02:00:00', '2018-01-01 03:00:00',
               '2018-01-01 04:00:00', '2018-01-01 05:00:00',
               '2018-01-01 06:00:00', '2018-01-01 07:00:00',
               '2018-01-01 08:00:00', '2018-01-01 09:00:00',
               ...
               '2018-01-07 15:00:00', '2018-01-07 16:00:00',
               '2018-01-07 17:00:00', '2018-01-07 18:00:00',
               '2018-01-07 19:00:00', '2018-01-07 20:00:00',
               '2018-01-07 21:00:00', '2018-01-07 22:00:00',
               '2018-01-07 23:00:00', '2018-01-08 00:00:00'],
              dtype='datetime64[ns]', length=169, freq='H')

In [4]:
type(date_rng[0])

pandas._libs.tslibs.timestamps.Timestamp

Let's create an example data frame with the timestamp data and look at the first 15 elements:

In [5]:
df = pd.DataFrame(date_rng, columns=['date'])
df['data'] = np.random.randint(0, 100,size=(len(date_rng)))

In [6]:
df.head(15)

Unnamed: 0,date,data
0,2018-01-01 00:00:00,58
1,2018-01-01 01:00:00,53
2,2018-01-01 02:00:00,19
3,2018-01-01 03:00:00,4
4,2018-01-01 04:00:00,55
5,2018-01-01 05:00:00,73
6,2018-01-01 06:00:00,75
7,2018-01-01 07:00:00,99
8,2018-01-01 08:00:00,29
9,2018-01-01 09:00:00,30


CONVERT THE DATA FRAME INDEX TO A A DATETIME INDEX THAT SHOW THE FIRST ELEMENTS

In [8]:
df['datetime'] = pd.to_datetime(df['date'])
df = df.set_index('datetime')
df.drop(['date'], axis=1, inplace=True)
df.head()

Unnamed: 0_level_0,data
datetime,Unnamed: 1_level_1
2018-01-01 00:00:00,58
2018-01-01 01:00:00,53
2018-01-01 02:00:00,19
2018-01-01 03:00:00,4
2018-01-01 04:00:00,55


In [9]:
#import datetime
from datetime import datetime, timedelta

In [11]:
# get current time
now = datetime.now()
print("Today's date: ", str(now))

Today's date:  2020-02-21 13:53:34.334899


In [12]:
# add 15 days to current date
future_date_after_15days = now + timedelta(days = 15)
print("date after 15 day: ", future_date_after_15days)

date after 15 day:  2020-03-07 13:53:34.334899


In [13]:
# subtract 2 weeks from current date
two_weeks_ago = now - timedelta(weeks = 2)
print('Date two weeks ago: ', two_weeks_ago)
print('two_weeks_ago object type: ', type(two_weeks_ago))

Date two weeks ago:  2020-02-07 13:53:34.334899
two_weeks_ago object type:  <class 'datetime.datetime'>


In [14]:
import pandas as pd

In [15]:
# create date object using to_datetime() function
date = pd.to_datetime("8th of sep, 2019")
print(date)

2019-09-08 00:00:00


---

In [3]:
df = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'],
                   'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']})

In [4]:
df

Unnamed: 0,key,A
0,K0,A0
1,K1,A1
2,K2,A2
3,K3,A3
4,K4,A4
5,K5,A5


In [5]:
other = pd.DataFrame({'key': ['K0', 'K1', 'K2'],
                      'B': ['B0', 'B1', 'B2']})

In [6]:
other

Unnamed: 0,key,B
0,K0,B0
1,K1,B1
2,K2,B2


Join DataFrames using their indexes.

In [7]:
df.join(other, lsuffix='_caller', rsuffix='_other')

Unnamed: 0,key_caller,A,key_other,B
0,K0,A0,K0,B0
1,K1,A1,K1,B1
2,K2,A2,K2,B2
3,K3,A3,,
4,K4,A4,,
5,K5,A5,,


If we want to join using the key columns, we need to set key to be the index in both df and other. The joined DataFrame will have key as its index.

In [8]:
df.set_index('key').join(other.set_index('key'))

Unnamed: 0_level_0,A,B
key,Unnamed: 1_level_1,Unnamed: 2_level_1
K0,A0,B0
K1,A1,B1
K2,A2,B2
K3,A3,
K4,A4,
K5,A5,


Another option to join using the key columns is to use the on parameter. DataFrame.join always uses other’s index but we can use any column in df. This method preserves the original DataFrame’s index in the result.

In [9]:
df.join(other.set_index('key'), on='key')

Unnamed: 0,key,A,B
0,K0,A0,B0
1,K1,A1,B1
2,K2,A2,B2
3,K3,A3,
4,K4,A4,
5,K5,A5,


---
# Categorical Index

In [3]:
pd.CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'])

CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], categories=['a', 'b', 'c'], ordered=False, dtype='category')

`CategoricalIndex` can also be instantiated from a `Categorical`:

In [4]:
c = pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'])
pd.CategoricalIndex(c)

CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], categories=['a', 'b', 'c'], ordered=False, dtype='category')

Ordered `CategoricalIndex` can have a min and max value.

In [5]:
ci = pd.CategoricalIndex(['a','b','c','a','b','c'], ordered=True,
                         categories=['c', 'b', 'a'])
ci

CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], categories=['c', 'b', 'a'], ordered=True, dtype='category')

---
# Multi-Indexing

In [6]:
df = pd.DataFrame({'row': [0, 1, 2],
                   'One_X': [1.1, 1.1, 1.1],
                   'One_Y': [1.2, 1.2, 1.2],
                   'Two_X': [1.11, 1.11, 1.11],
                   'Two_Y': [1.22, 1.22, 1.22]})
df

Unnamed: 0,row,One_X,One_Y,Two_X,Two_Y
0,0,1.1,1.2,1.11,1.22
1,1,1.1,1.2,1.11,1.22
2,2,1.1,1.2,1.11,1.22


In [7]:
# AS LABELED INDEX
df = df.set_index('row')
df

Unnamed: 0_level_0,One_X,One_Y,Two_X,Two_Y
row,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1.1,1.2,1.11,1.22
1,1.1,1.2,1.11,1.22
2,1.1,1.2,1.11,1.22


In [8]:
# With Hierarchical Columns
df.columns = pd.MultiIndex.from_tuples([tuple(c.split('_'))
                                        for c in df.columns])
df

Unnamed: 0_level_0,One,One,Two,Two
Unnamed: 0_level_1,X,Y,X,Y
row,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
0,1.1,1.2,1.11,1.22
1,1.1,1.2,1.11,1.22
2,1.1,1.2,1.11,1.22


In [9]:
# NOW STACK & RESET
df = df.stack(0).reset_index(1)
df

Unnamed: 0_level_0,level_1,X,Y
row,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,One,1.1,1.2
0,Two,1.11,1.22
1,One,1.1,1.2
1,Two,1.11,1.22
2,One,1.1,1.2
2,Two,1.11,1.22


In [10]:
#  And fix the labels (Notice the label 'level_1' got added automatically)
df.columns = ['Sample', 'All_X', 'All_Y']
df

Unnamed: 0_level_0,Sample,All_X,All_Y
row,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,One,1.1,1.2
0,Two,1.11,1.22
1,One,1.1,1.2
1,Two,1.11,1.22
2,One,1.1,1.2
2,Two,1.11,1.22
