In [2]:
import pandas as pd
import numpy as np

In [3]:
my_dates_index = pd.date_range('20220112', periods=6)
my_dates_index

DatetimeIndex(['2022-01-12', '2022-01-13', '2022-01-14', '2022-01-15',
               '2022-01-16', '2022-01-17'],
              dtype='datetime64[ns]', freq='D')

##### sample NumPy data


In [4]:
sample_numpy_data = np.array(np.arange(24)).reshape((6,4))
sample_numpy_data

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

##### sample data frame, with column headers; uses our dates_index
documentation: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html

In [5]:
sample_df = pd.DataFrame(sample_numpy_data, index=my_dates_index, columns=list('ABCD'))
sample_df

Unnamed: 0,A,B,C,D
2022-01-12,0,1,2,3
2022-01-13,4,5,6,7
2022-01-14,8,9,10,11
2022-01-15,12,13,14,15
2022-01-16,16,17,18,19
2022-01-17,20,21,22,23


In [14]:
sample_df['2022-01-13':'2022-01-16']

Unnamed: 0,A,B,C,D
2022-01-13,4,5,6,7
2022-01-14,8,9,10,11
2022-01-15,12,13,14,15
2022-01-16,16,17,18,19


##### data frame from a Python dictionary


In [20]:
df_from_dictionary = pd.DataFrame({ 
                         'float' : 1.,
                         'time' : pd.Timestamp('20220113'),
                         'seq' : np.arange(1,5),
                         'array' : np.array([3] * 4,dtype='int32'),
                         'categories' : ["test","train","taxes","tools"],
                         'dull' : 'boring data' 
                      })
df_from_dictionary

Unnamed: 0,float,time,seq,array,categories,dull
0,1.0,2022-01-13,1,3,test,boring data
1,1.0,2022-01-13,2,3,train,boring data
2,1.0,2022-01-13,3,3,taxes,boring data
3,1.0,2022-01-13,4,3,tools,boring data


In [22]:
df_from_dictionary = pd.DataFrame({ 
                         'float' : 1.,
                         'time' : pd.date_range('20220112', periods=4),
                         'seq' : np.arange(1,5),  
                         'array' : np.array([3] * 4,dtype='int32'),
                         'categories' : ["test","train","taxes","tools"],
                         'dull' : 'boring data' 
                      }, ['a', 'b', 'c', 'd'])
df_from_dictionary

Unnamed: 0,float,time,seq,array,categories,dull
a,1.0,2022-01-12,1,3,test,boring data
b,1.0,2022-01-13,2,3,train,boring data
c,1.0,2022-01-14,3,3,taxes,boring data
d,1.0,2022-01-15,4,3,tools,boring data


##### pandas retains data type for each column


In [8]:
df_from_dictionary.dtypes

float                float64
time          datetime64[ns]
series               float32
array                  int32
categories            object
dull                  object
dtype: object

##### head and tail; default is 5 rows


In [11]:
sample_df.head(3)

Unnamed: 0,A,B,C,D
2021-07-18,0,1,2,3
2021-07-19,4,5,6,7
2021-07-20,8,9,10,11


In [12]:
sample_df.tail(2)

Unnamed: 0,A,B,C,D
2021-07-22,16,17,18,19
2021-07-23,20,21,22,23


##### underlying data: values, index and columns


In [12]:
sample_df.values

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

In [27]:
sample_df.index

DatetimeIndex(['2021-07-18', '2021-07-19', '2021-07-20', '2021-07-21',
               '2021-07-22', '2021-07-23'],
              dtype='datetime64[ns]', freq='D')

In [28]:
sample_df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

##### transpose rows and columns

In [32]:
sample_df.T

Unnamed: 0,2021-07-18,2021-07-19,2021-07-20,2021-07-21,2021-07-22,2021-07-23
A,0,4,8,12,16,20
B,1,5,9,13,17,21
C,2,6,10,14,18,22
D,3,7,11,15,19,23
