### 10 Minutes to pandas
https://pandas.pydata.org/docs/user_guide/10min.html#minutes-to-pandas

_Follow along / scratch coding with the above reading_

`// import statements`

In [4]:
import numpy as np

In [3]:
import pandas as pd

---

`// create a series allowing pandas to set index`

In [5]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

---

`// create a date range array with 6 values`

In [6]:
dates = pd.date_range("20130101", periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

---

`// create a DataFrame with the index being the previously set date range, the column titles being labled ABCD and the values being random number values`

In [7]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.902688,-0.084564,0.227529,-0.348789
2013-01-02,0.180477,-0.877039,-0.525577,0.090993
2013-01-03,1.005072,0.734529,0.630311,1.825319
2013-01-04,0.141014,-0.239865,0.03772,-0.150803
2013-01-05,0.693082,0.402142,-0.134913,0.490771
2013-01-06,-1.4115,1.282646,0.968649,1.097266


---

`// create a DataFrame with set indexes and column values`

In [8]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


---

`// list the df2 DataFrame type values for each column`

In [10]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

---

`// view the first row of the df DataFrame (amt of rows set by (X) value)`

In [13]:
df.head(1)

Unnamed: 0,A,B,C,D
2013-01-01,1.051186,0.967743,-0.9451,-0.037207


---

`// view the last row of the df DataFrame (amt of rows set by (X) value)`

In [14]:
df.tail(1)

Unnamed: 0,A,B,C,D
2013-01-06,0.166152,-1.255228,0.730229,-0.823481


---

`// find the index values for the df DataFrame`

In [15]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

---

`// find the column index values and data types for the df DataFrame`

In [16]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

---

`// convert df DataFrame to an array of all the same type values (float) -- less expensive performance`

In [25]:
df.to_numpy()

array([[-1.56382825, -1.02824035,  0.20563502,  2.16673319],
       [-1.82564328,  0.48532261, -0.12460579, -1.4467163 ],
       [ 1.69809028,  0.56084296, -1.27237499,  0.93677572],
       [ 1.00637307, -0.87076585,  0.65225794, -0.420269  ],
       [-1.34901188,  1.13296663,  1.06663903,  1.85259412],
       [ 1.1361006 ,  1.22507339,  0.84045318,  0.02650225]])

---

`// convert df2 DataFrame to an array of all the same type values (mixed) -- more expensive, most likely object type`

In [18]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

---

`// data analysis of the df DataFrame`

In [19]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.277882,-0.226239,-0.378539,-0.403209
std,0.709897,0.934004,1.244828,0.673575
min,-0.607962,-1.255228,-1.94884,-1.158505
25%,-0.159548,-0.913361,-1.309803,-0.845474
50%,0.149569,-0.402097,-0.380913,-0.520076
75%,0.829928,0.512995,0.59349,-0.082073
max,1.18199,0.967743,1.140571,0.669415


---

`// transpose the data of the df DataFrame`

In [20]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,1.051186,-0.257059,1.18199,0.132985,-0.607962,0.166152
B,0.967743,0.815273,-0.393839,-0.410356,-1.08103,-1.255228
C,-0.9451,1.140571,0.183274,-1.94884,-1.43137,0.730229
D,-0.037207,-0.21667,0.669415,-1.158505,-0.852804,-0.823481


---

`// sort the df DataFrame by its axis in descending order`

In [23]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-0.037207,-0.9451,0.967743,1.051186
2013-01-02,-0.21667,1.140571,0.815273,-0.257059
2013-01-03,0.669415,0.183274,-0.393839,1.18199
2013-01-04,-1.158505,-1.94884,-0.410356,0.132985
2013-01-05,-0.852804,-1.43137,-1.08103,-0.607962
2013-01-06,-0.823481,0.730229,-1.255228,0.166152


---

`// sort the df DataFrame by its B column values`

In [26]:
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2013-01-01,-1.563828,-1.02824,0.205635,2.166733
2013-01-04,1.006373,-0.870766,0.652258,-0.420269
2013-01-02,-1.825643,0.485323,-0.124606,-1.446716
2013-01-03,1.69809,0.560843,-1.272375,0.936776
2013-01-05,-1.349012,1.132967,1.066639,1.852594
2013-01-06,1.136101,1.225073,0.840453,0.026502


---