In [1]:
# https://pandas.pydata.org/docs/user_guide/10min.html#min
In [1]: import numpy as np

In [2]: import pandas as pd

In [2]:
# Creating a Series by passing a list of values, letting pandas create a default integer index
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [3]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [6]:
#Creating a DataFrame by passing a NumPy array, with a datetime index and labeled columns
dates = pd.date_range("20130101", periods=10)

In [7]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06', '2013-01-07', '2013-01-08',
               '2013-01-09', '2013-01-10'],
              dtype='datetime64[ns]', freq='D')

In [11]:
df = pd.DataFrame(np.random.randn(10, 4), index=dates, columns=list("1234"))

In [12]:
df

Unnamed: 0,1,2,3,4
2013-01-01,1.367263,-0.171257,0.42861,0.050025
2013-01-02,-1.8864,0.261755,0.033428,-2.107026
2013-01-03,0.085557,0.63621,1.441403,1.979614
2013-01-04,-1.615251,1.010016,0.377631,-1.631396
2013-01-05,0.951663,-0.78042,1.614135,-0.306678
2013-01-06,1.57336,0.909731,-1.145545,0.603921
2013-01-07,-1.009285,-0.242192,0.270666,-1.809228
2013-01-08,-1.095781,-0.92847,-0.474325,-0.760582
2013-01-09,-1.180316,-1.488034,1.177143,0.553916
2013-01-10,1.01186,-0.761266,-1.095542,-0.941848


In [16]:
#Creating a DataFrame by passing a dict of objects that can be converted to series-ish.
df2 = pd.DataFrame(
   {
       "A": 1.0,
       "B": pd.Timestamp("20130102"),
       "C": pd.Series(1, index=list(range(4)), dtype="float32"),
       "D": np.array([3] * 4, dtype="int32"),
       "E": pd.Categorical(["test", "train", "test", "train"]),
       "F": "foo",
   }
 )

In [20]:
df2


Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [21]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [23]:
#df2.<TAB> #NOT using IPython

In [24]:
df.head()

Unnamed: 0,1,2,3,4
2013-01-01,1.367263,-0.171257,0.42861,0.050025
2013-01-02,-1.8864,0.261755,0.033428,-2.107026
2013-01-03,0.085557,0.63621,1.441403,1.979614
2013-01-04,-1.615251,1.010016,0.377631,-1.631396
2013-01-05,0.951663,-0.78042,1.614135,-0.306678


In [25]:
df.tail()

Unnamed: 0,1,2,3,4
2013-01-06,1.57336,0.909731,-1.145545,0.603921
2013-01-07,-1.009285,-0.242192,0.270666,-1.809228
2013-01-08,-1.095781,-0.92847,-0.474325,-0.760582
2013-01-09,-1.180316,-1.488034,1.177143,0.553916
2013-01-10,1.01186,-0.761266,-1.095542,-0.941848


In [26]:
df2.head()

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [27]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06', '2013-01-07', '2013-01-08',
               '2013-01-09', '2013-01-10'],
              dtype='datetime64[ns]', freq='D')

In [29]:
df.columns

Index(['1', '2', '3', '4'], dtype='object')

In [30]:
df.to_numpy()

array([[ 1.36726254, -0.17125722,  0.42860974,  0.050025  ],
       [-1.88640045,  0.26175511,  0.03342788, -2.10702568],
       [ 0.08555655,  0.6362095 ,  1.44140334,  1.97961414],
       [-1.61525137,  1.01001577,  0.37763067, -1.63139556],
       [ 0.95166254, -0.78041977,  1.61413471, -0.30667838],
       [ 1.57336017,  0.90973138, -1.14554499,  0.60392113],
       [-1.00928518, -0.24219227,  0.27066555, -1.80922849],
       [-1.09578078, -0.92846957, -0.47432482, -0.7605815 ],
       [-1.18031611, -1.48803418,  1.17714336,  0.55391589],
       [ 1.01185966, -0.76126599, -1.09554207, -0.94184849]])

In [31]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [32]:
df2.describe() # summary stats

Unnamed: 0,A,C,D
count,4.0,4.0,4.0
mean,1.0,1.0,3.0
std,0.0,0.0,0.0
min,1.0,1.0,3.0
25%,1.0,1.0,3.0
50%,1.0,1.0,3.0
75%,1.0,1.0,3.0
max,1.0,1.0,3.0


In [34]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06,2013-01-07,2013-01-08,2013-01-09,2013-01-10
1,1.367263,-1.8864,0.085557,-1.615251,0.951663,1.57336,-1.009285,-1.095781,-1.180316,1.01186
2,-0.171257,0.261755,0.63621,1.010016,-0.78042,0.909731,-0.242192,-0.92847,-1.488034,-0.761266
3,0.42861,0.033428,1.441403,0.377631,1.614135,-1.145545,0.270666,-0.474325,1.177143,-1.095542
4,0.050025,-2.107026,1.979614,-1.631396,-0.306678,0.603921,-1.809228,-0.760582,0.553916,-0.941848
