### 10 Minutes to pandas
https://pandas.pydata.org/docs/user_guide/10min.html#minutes-to-pandas

_Follow along / scratch coding with the above reading_

`// import statements`

In [1]:
import numpy as np

In [2]:
import pandas as pd

---

`// create a series allowing pandas to set index`

In [3]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

---

`// create a date range array with 6 values`

In [4]:
dates = pd.date_range("20130101", periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

---

`// create a DataFrame with the index being the previously set date range, the column titles being labled ABCD and the values being random number values`

In [5]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.199249,0.639385,-0.072936,0.120937
2013-01-02,0.400969,-0.027296,-0.105651,-2.519166
2013-01-03,-0.582395,-0.1521,0.661342,0.898475
2013-01-04,0.164998,-1.063246,0.869119,1.318826
2013-01-05,-0.37499,-0.614946,-0.704097,0.518012
2013-01-06,0.261193,-0.188344,1.419346,-0.530489


---

`// create a DataFrame with set indexes and column values`

In [6]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


---

`// list the df2 DataFrame type values for each column`

In [7]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

---

`// view the first row of the df DataFrame (amt of rows set by (X) value)`

In [8]:
df.head(1)

Unnamed: 0,A,B,C,D
2013-01-01,0.199249,0.639385,-0.072936,0.120937


---

`// view the last row of the df DataFrame (amt of rows set by (X) value)`

In [9]:
df.tail(1)

Unnamed: 0,A,B,C,D
2013-01-06,0.261193,-0.188344,1.419346,-0.530489


---

`// find the index values for the df DataFrame`

In [10]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

---

`// find the column index values and data types for the df DataFrame`

In [11]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

---

`// convert df DataFrame to an array of all the same type values (float) -- less expensive performance`

In [12]:
df.to_numpy()

array([[ 0.19924912,  0.63938515, -0.0729361 ,  0.12093706],
       [ 0.40096865, -0.0272961 , -0.1056508 , -2.5191656 ],
       [-0.58239485, -0.15209975,  0.66134219,  0.89847514],
       [ 0.16499764, -1.06324637,  0.86911882,  1.31882649],
       [-0.37498956, -0.6149461 , -0.70409725,  0.51801209],
       [ 0.26119252, -0.18834401,  1.41934633, -0.53048897]])

---

`// convert df2 DataFrame to an array of all the same type values (mixed) -- more expensive, most likely object type`

In [13]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

---

`// data analysis of the df DataFrame`

In [14]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.011504,-0.234425,0.344521,-0.032234
std,0.393682,0.573949,0.775545,1.374838
min,-0.582395,-1.063246,-0.704097,-2.519166
25%,-0.239993,-0.508296,-0.097472,-0.367632
50%,0.182123,-0.170222,0.294203,0.319475
75%,0.245707,-0.058497,0.817175,0.803359
max,0.400969,0.639385,1.419346,1.318826


---

`// transpose the data of the df DataFrame`

In [15]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,0.199249,0.400969,-0.582395,0.164998,-0.37499,0.261193
B,0.639385,-0.027296,-0.1521,-1.063246,-0.614946,-0.188344
C,-0.072936,-0.105651,0.661342,0.869119,-0.704097,1.419346
D,0.120937,-2.519166,0.898475,1.318826,0.518012,-0.530489


---

`// sort the df DataFrame by its axis in descending order`

In [16]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,0.120937,-0.072936,0.639385,0.199249
2013-01-02,-2.519166,-0.105651,-0.027296,0.400969
2013-01-03,0.898475,0.661342,-0.1521,-0.582395
2013-01-04,1.318826,0.869119,-1.063246,0.164998
2013-01-05,0.518012,-0.704097,-0.614946,-0.37499
2013-01-06,-0.530489,1.419346,-0.188344,0.261193


---

`// sort the df DataFrame by its B column values`

In [17]:
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2013-01-04,0.164998,-1.063246,0.869119,1.318826
2013-01-05,-0.37499,-0.614946,-0.704097,0.518012
2013-01-06,0.261193,-0.188344,1.419346,-0.530489
2013-01-03,-0.582395,-0.1521,0.661342,0.898475
2013-01-02,0.400969,-0.027296,-0.105651,-2.519166
2013-01-01,0.199249,0.639385,-0.072936,0.120937


---

`// select a specific column - shows index (left) and value (right)`

In [19]:
df["A"]

2013-01-01    0.199249
2013-01-02    0.400969
2013-01-03   -0.582395
2013-01-04    0.164998
2013-01-05   -0.374990
2013-01-06    0.261193
Freq: D, Name: A, dtype: float64

---

`// select a specific row - [start:end] - shows index, column headers and value`

In [32]:
df["20130103":"20130103"]

Unnamed: 0,A,B,C,D
2013-01-03,-0.582395,-0.1521,0.661342,0.898475
