https://pandas.pydata.org/docs/user_guide/10min.html#basic-data-structures-in-pandas

In [1]:
import pandas as pd
import numpy as np

In [2]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s
s.dtypes

dtype('float64')

In [3]:
dates = pd.date_range("20130101", periods=10)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06', '2013-01-07', '2013-01-08',
               '2013-01-09', '2013-01-10'],
              dtype='datetime64[ns]', freq='D')

In [5]:
df = pd.DataFrame(np.random.randn(10, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.995491,-0.884979,-2.749317,-0.813294
2013-01-02,0.247785,-0.971489,1.070218,1.084025
2013-01-03,-0.586929,-2.630745,0.396218,1.445532
2013-01-04,1.403179,-0.027475,0.645445,0.697941
2013-01-05,-0.471021,-1.231519,-0.9187,0.608476
2013-01-06,0.389643,0.207057,0.819567,-0.280101
2013-01-07,0.386991,1.017099,0.472341,-0.485138
2013-01-08,-0.488677,1.166031,0.887148,-1.361028
2013-01-09,-0.362853,1.242709,-0.69671,1.252547
2013-01-10,1.28257,0.466505,-0.698002,0.339556


In [6]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)
df2

df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [7]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,0.995491,-0.884979,-2.749317,-0.813294
2013-01-02,0.247785,-0.971489,1.070218,1.084025
2013-01-03,-0.586929,-2.630745,0.396218,1.445532
2013-01-04,1.403179,-0.027475,0.645445,0.697941
2013-01-05,-0.471021,-1.231519,-0.9187,0.608476


In [8]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06', '2013-01-07', '2013-01-08',
               '2013-01-09', '2013-01-10'],
              dtype='datetime64[ns]', freq='D')

In [9]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [10]:
df.to_numpy()

array([[ 0.99549147, -0.88497895, -2.74931723, -0.81329449],
       [ 0.24778516, -0.97148927,  1.07021791,  1.08402491],
       [-0.58692923, -2.63074456,  0.39621765,  1.44553222],
       [ 1.40317874, -0.0274753 ,  0.64544504,  0.69794136],
       [-0.47102139, -1.23151893, -0.9187001 ,  0.60847555],
       [ 0.3896434 ,  0.20705678,  0.81956667, -0.28010104],
       [ 0.38699144,  1.01709882,  0.47234073, -0.48513847],
       [-0.48867661,  1.16603054,  0.88714844, -1.36102821],
       [-0.36285268,  1.24270908, -0.69670962,  1.25254662],
       [ 1.28257036,  0.46650485, -0.69800225,  0.33955557]])

In [11]:
df2.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

In [12]:

df.describe()

Unnamed: 0,A,B,C,D
count,10.0,10.0,10.0,10.0
mean,0.279618,-0.164681,-0.077179,0.248851
std,0.754456,1.251352,1.188677,0.943669
min,-0.586929,-2.630745,-2.749317,-1.361028
25%,-0.443979,-0.949862,-0.697679,-0.433879
50%,0.317388,0.089791,0.434279,0.474016
75%,0.844029,0.87945,0.776036,0.987504
max,1.403179,1.242709,1.070218,1.445532


In [12]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,0.407593,-0.322386,0.895974,-2.347006
2013-01-02,0.385788,0.912516,-0.136216,0.192383
2013-01-03,-0.694671,0.006287,-0.605742,0.084541
2013-01-04,1.126213,0.464007,0.709731,-0.767921
2013-01-05,0.346612,-0.107587,1.840926,-0.9054
2013-01-06,-0.546422,-0.01445,1.452602,0.443543


In [13]:
df["A"]

2013-01-01   -2.347006
2013-01-02    0.192383
2013-01-03    0.084541
2013-01-04   -0.767921
2013-01-05   -0.905400
2013-01-06    0.443543
Freq: D, Name: A, dtype: float64

In [13]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,0.995491,-0.884979,-2.749317,-0.813294
2013-01-02,0.247785,-0.971489,1.070218,1.084025
2013-01-03,-0.586929,-2.630745,0.396218,1.445532


In [14]:
df.loc[dates[0]]

A    0.995491
B   -0.884979
C   -2.749317
D   -0.813294
Name: 2013-01-01 00:00:00, dtype: float64

In [15]:
df.loc[:, ["A", "B"]]

Unnamed: 0,A,B
2013-01-01,0.995491,-0.884979
2013-01-02,0.247785,-0.971489
2013-01-03,-0.586929,-2.630745
2013-01-04,1.403179,-0.027475
2013-01-05,-0.471021,-1.231519
2013-01-06,0.389643,0.207057
2013-01-07,0.386991,1.017099
2013-01-08,-0.488677,1.166031
2013-01-09,-0.362853,1.242709
2013-01-10,1.28257,0.466505


In [16]:
df.loc["20130102":"20130104", ["A", "B"]]

Unnamed: 0,A,B
2013-01-02,0.247785,-0.971489
2013-01-03,-0.586929,-2.630745
2013-01-04,1.403179,-0.027475


In [17]:
df.loc[dates[0], "A"]

np.float64(0.9954914667178926)

In [18]:
df.at[dates[0], "A"]

np.float64(0.9954914667178926)

In [19]:
df.iloc[3]

A    1.403179
B   -0.027475
C    0.645445
D    0.697941
Name: 2013-01-04 00:00:00, dtype: float64

In [20]:
df

Unnamed: 0,A,B,C,D
2013-01-01,0.995491,-0.884979,-2.749317,-0.813294
2013-01-02,0.247785,-0.971489,1.070218,1.084025
2013-01-03,-0.586929,-2.630745,0.396218,1.445532
2013-01-04,1.403179,-0.027475,0.645445,0.697941
2013-01-05,-0.471021,-1.231519,-0.9187,0.608476
2013-01-06,0.389643,0.207057,0.819567,-0.280101
2013-01-07,0.386991,1.017099,0.472341,-0.485138
2013-01-08,-0.488677,1.166031,0.887148,-1.361028
2013-01-09,-0.362853,1.242709,-0.69671,1.252547
2013-01-10,1.28257,0.466505,-0.698002,0.339556


In [22]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2013-01-04,-0.767921,0.709731
2013-01-05,-0.9054,1.840926


In [21]:
df = pd.DataFrame(
    {
        "A": ["one", "one", "two", "three"] * 3,
        "B": ["A", "B", "C"] * 4,
        "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 2,
        "D": np.random.randn(12),
        "E": np.random.randn(12),
    }
)


df

Unnamed: 0,A,B,C,D,E
0,one,A,foo,-0.238418,-0.215528
1,one,B,foo,-1.161139,0.677508
2,two,C,foo,-1.410109,0.537513
3,three,A,bar,1.01431,-1.353471
4,one,B,bar,-0.463591,0.584242
5,one,C,bar,-0.088493,0.106645
6,two,A,foo,-0.415314,0.033952
7,three,B,foo,0.409113,-1.945065
8,one,C,foo,0.475706,-0.703265
9,one,A,bar,0.086717,-0.545307


In [22]:
pd.pivot_table(df, values="D", index=["A", "B"], columns=["C"])

Unnamed: 0_level_0,C,bar,foo
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,0.086717,-0.238418
one,B,-0.463591,-1.161139
one,C,-0.088493,0.475706
three,A,1.01431,
three,B,,0.409113
three,C,-0.748196,
two,A,,-0.415314
two,B,-1.033061,
two,C,,-1.410109


In [None]:
import pandas as pd
 

