In [1]:
import numpy as np

In [2]:
import pandas as pd

## Object creation 

Creating a **Series** by passing a list of values, letting pandas create a default integer index:

In [3]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [4]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

Creating a **DataFrame** by passing a NumPy array, with a datetime index and labeled columns:

In [5]:
dates = pd.date_range("20130101", periods=6)

In [6]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [7]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))

In [8]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-1.558375,-0.373283,1.164895,0.263129
2013-01-02,0.420508,-0.264466,0.545965,0.562743
2013-01-03,0.959432,-1.877503,2.014876,2.016936
2013-01-04,0.953524,-1.387702,0.658471,-0.400184
2013-01-05,-0.293881,2.313481,0.513831,-0.268608
2013-01-06,1.716714,0.872217,-0.778302,-0.294951


In [9]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)

In [10]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [11]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

## Viewing Data 

In [12]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-1.558375,-0.373283,1.164895,0.263129
2013-01-02,0.420508,-0.264466,0.545965,0.562743
2013-01-03,0.959432,-1.877503,2.014876,2.016936
2013-01-04,0.953524,-1.387702,0.658471,-0.400184
2013-01-05,-0.293881,2.313481,0.513831,-0.268608


In [13]:
df.tail()

Unnamed: 0,A,B,C,D
2013-01-02,0.420508,-0.264466,0.545965,0.562743
2013-01-03,0.959432,-1.877503,2.014876,2.016936
2013-01-04,0.953524,-1.387702,0.658471,-0.400184
2013-01-05,-0.293881,2.313481,0.513831,-0.268608
2013-01-06,1.716714,0.872217,-0.778302,-0.294951


In [14]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [15]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

**NumPy arrays have one dtype for the entire array, while pandas DataFrames have one dtype per column.**

In [16]:
df.to_numpy()

array([[-1.55837495, -0.3732833 ,  1.16489521,  0.26312908],
       [ 0.42050781, -0.26446613,  0.54596534,  0.56274273],
       [ 0.95943167, -1.8775031 ,  2.01487611,  2.01693594],
       [ 0.95352374, -1.38770213,  0.65847129, -0.40018443],
       [-0.29388139,  2.31348081,  0.51383107, -0.26860755],
       [ 1.71671416,  0.87221745, -0.77830173, -0.29495057]])

In [17]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

**describe()** shows a quick statistic summary of your data:

In [18]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.36632,-0.119543,0.686623,0.313178
std,1.154418,1.528871,0.915362,0.914851
min,-1.558375,-1.877503,-0.778302,-0.400184
25%,-0.115284,-1.134097,0.521865,-0.288365
50%,0.687016,-0.318875,0.602218,-0.002739
75%,0.957955,0.588047,1.038289,0.487839
max,1.716714,2.313481,2.014876,2.016936
