### 10 Minutes to pandas
https://pandas.pydata.org/docs/user_guide/10min.html#minutes-to-pandas

_Follow along / scratch coding with the above reading_

`// import statements`

In [1]:
import numpy as np

In [2]:
import pandas as pd

---

`// create a series allowing pandas to set index`

In [3]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

---

`// create a date range array with 6 values`

In [4]:
dates = pd.date_range("20130101", periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

---

`// create a DataFrame with the index being the previously set date range, the column titles being labled ABCD and the values being random number values`

In [5]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.891579,-0.629987,-0.646915,-1.586359
2013-01-02,-0.795256,-1.659257,-0.672701,-0.103304
2013-01-03,-0.328656,0.528977,0.206297,-2.199484
2013-01-04,0.19465,-0.267058,-2.094794,0.47627
2013-01-05,-0.32972,-1.146506,-0.004487,0.621417
2013-01-06,0.601877,-1.182562,0.426508,0.596868


---

`// create a DataFrame with set indexes and column values`

In [6]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


---

`// list the df2 DataFrame type values for each column`

In [7]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

---

`// view the first row of the df DataFrame (amt of rows set by (X) value)`

In [8]:
df.head(1)

Unnamed: 0,A,B,C,D
2013-01-01,0.891579,-0.629987,-0.646915,-1.586359


---

`// view the last row of the df DataFrame (amt of rows set by (X) value)`

In [9]:
df.tail(1)

Unnamed: 0,A,B,C,D
2013-01-06,0.601877,-1.182562,0.426508,0.596868


---

`// find the index values for the df DataFrame`

In [10]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

---

`// find the column index values and data types for the df DataFrame`

In [11]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

---

`// convert df DataFrame to an array of all the same type values (float) -- less expensive performance`

In [12]:
df.to_numpy()

array([[ 0.89157862, -0.62998664, -0.64691467, -1.58635875],
       [-0.79525589, -1.65925744, -0.67270125, -0.10330443],
       [-0.32865602,  0.52897713,  0.20629689, -2.19948437],
       [ 0.19464998, -0.26705815, -2.09479447,  0.47627015],
       [-0.32971953, -1.14650599, -0.00448692,  0.62141705],
       [ 0.60187701, -1.18256155,  0.42650781,  0.59686821]])

---

`// convert df2 DataFrame to an array of all the same type values (mixed) -- more expensive, most likely object type`

In [13]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

---

`// data analysis of the df DataFrame`

In [14]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.039079,-0.726065,-0.464349,-0.365765
std,0.637974,0.78116,0.915419,1.227312
min,-0.795256,-1.659257,-2.094794,-2.199484
25%,-0.329454,-1.173548,-0.666255,-1.215595
50%,-0.067003,-0.888246,-0.325701,0.186483
75%,0.50007,-0.35779,0.153601,0.566719
max,0.891579,0.528977,0.426508,0.621417


---

`// transpose the data of the df DataFrame`

In [15]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,0.891579,-0.795256,-0.328656,0.19465,-0.32972,0.601877
B,-0.629987,-1.659257,0.528977,-0.267058,-1.146506,-1.182562
C,-0.646915,-0.672701,0.206297,-2.094794,-0.004487,0.426508
D,-1.586359,-0.103304,-2.199484,0.47627,0.621417,0.596868


---

`// sort the df DataFrame by its axis in descending order`

In [16]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-1.586359,-0.646915,-0.629987,0.891579
2013-01-02,-0.103304,-0.672701,-1.659257,-0.795256
2013-01-03,-2.199484,0.206297,0.528977,-0.328656
2013-01-04,0.47627,-2.094794,-0.267058,0.19465
2013-01-05,0.621417,-0.004487,-1.146506,-0.32972
2013-01-06,0.596868,0.426508,-1.182562,0.601877


---

`// sort the df DataFrame by its B column values`

In [17]:
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2013-01-02,-0.795256,-1.659257,-0.672701,-0.103304
2013-01-06,0.601877,-1.182562,0.426508,0.596868
2013-01-05,-0.32972,-1.146506,-0.004487,0.621417
2013-01-01,0.891579,-0.629987,-0.646915,-1.586359
2013-01-04,0.19465,-0.267058,-2.094794,0.47627
2013-01-03,-0.328656,0.528977,0.206297,-2.199484


---

`// select a specific column - shows index (left) and value (right)`

In [18]:
df["A"]

2013-01-01    0.891579
2013-01-02   -0.795256
2013-01-03   -0.328656
2013-01-04    0.194650
2013-01-05   -0.329720
2013-01-06    0.601877
Freq: D, Name: A, dtype: float64

---

`// select a specific row - [start:end] - shows index, column headers and value`

In [19]:
df["20130103":"20130103"]

Unnamed: 0,A,B,C,D
2013-01-03,-0.328656,0.528977,0.206297,-2.199484
