# Pandas
from https://pandas.pydata.org/docs/user_guide/10min.html#merge

In [1]:
import numpy as np
import pandas as pd

## Object creation

In [2]:
pd.DataFrame({'A': [1, 2, 3]})

Unnamed: 0,A
0,1
1,2
2,3


In [3]:
dates = pd.date_range("20130101", periods=6)

In [7]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))

In [8]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)

df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [9]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

## Viewing data

In [10]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-0.979944,0.718523,-1.125321,-0.351343
2013-01-02,-2.28034,-1.199615,-0.413113,-0.023053
2013-01-03,0.210404,1.302316,-1.046942,0.994456
2013-01-04,-0.361405,1.046153,-0.71685,-0.819919
2013-01-05,-0.299721,0.752822,-1.303618,-1.139991


In [11]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-0.361405,1.046153,-0.71685,-0.819919
2013-01-05,-0.299721,0.752822,-1.303618,-1.139991
2013-01-06,-0.094168,0.254632,-1.013764,-0.359291


In [12]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [13]:
df.to_numpy()

array([[-0.97994408,  0.71852346, -1.1253214 , -0.3513426 ],
       [-2.28034009, -1.1996153 , -0.41311293, -0.02305319],
       [ 0.21040382,  1.3023157 , -1.0469418 ,  0.99445614],
       [-0.36140465,  1.04615326, -0.71684972, -0.81991885],
       [-0.29972149,  0.75282165, -1.30361817, -1.13999142],
       [-0.09416756,  0.25463205, -1.0137644 , -0.35929106]])

In [14]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [15]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.634196,0.479138,-0.936601,-0.28319
std,0.896727,0.894422,0.319617,0.73928
min,-2.28034,-1.199615,-1.303618,-1.139991
25%,-0.825309,0.370605,-1.105726,-0.704762
50%,-0.330563,0.735673,-1.030353,-0.355317
75%,-0.145556,0.97282,-0.791078,-0.105126
max,0.210404,1.302316,-0.413113,0.994456


In [16]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,-0.979944,-2.28034,0.210404,-0.361405,-0.299721,-0.094168
B,0.718523,-1.199615,1.302316,1.046153,0.752822,0.254632
C,-1.125321,-0.413113,-1.046942,-0.71685,-1.303618,-1.013764
D,-0.351343,-0.023053,0.994456,-0.819919,-1.139991,-0.359291


In [17]:
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2013-01-02,-2.28034,-1.199615,-0.413113,-0.023053
2013-01-06,-0.094168,0.254632,-1.013764,-0.359291
2013-01-01,-0.979944,0.718523,-1.125321,-0.351343
2013-01-05,-0.299721,0.752822,-1.303618,-1.139991
2013-01-04,-0.361405,1.046153,-0.71685,-0.819919
2013-01-03,0.210404,1.302316,-1.046942,0.994456


## Selection

In [None]:
# Getting
df["A"]
df[0:3]
df.loc[dates[0]]
df.loc[:, ["A", "B"]]
df.loc["20130102":"20130104", ["A", "B"]]
df.loc["20130102", ["A", "B"]]
df.loc[dates[0], "A"]
df.at[dates[0], "A"]

# Selection by label
df.loc[dates[0]]
df.loc[:, ["A", "B"]]
df.loc["20130102":"20130104", ["A", "B"]]
df.loc["20130102", ["A", "B"]]
df.loc[dates[0], "A"]
df.at[dates[0], "A"]

# Selection by position
df.iloc[3]
df.iloc[3:5, 0:2]
df.iloc[[1, 2, 4], [0, 2]]
df.iloc[1:3, :]
df.iloc[:, 1:3]
df.iloc[1, 1]
df.iat[1, 1]

# Boolean indexing
df[df["A"] > 0]
df[df > 0]

df2 = df.copy()
df2["E"] = ["one", "one", "two", "three", "four", "three"]

# Setting
s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range("20130102", periods=6))
df["F"] = s1

df.at[dates[0], "A"] = 0
df.iat[0, 1] = 0
df.loc[:, "D"] = np.array([5] * len(df))

df2 = df.copy()
df2[df2 > 0] = -df2

## Missing data

## Operations
## Merge
## Grouping
## Reshaping
## Time series
## Categoricals
## Plotting
## Importing and exporting data
## Gotchas